From 04d0d602ef7245fd566debd91bcb148acd9ed067 Mon Sep 17 00:00:00 2001 From: xleroy Date: Mon, 28 Jul 2014 12:13:15 +0000 Subject: PowerPC port: refactored the expansion of built-in functions and pseudo-instructions so that it does not need to be re-done in cchecklink. cchecklink: updated accordingly. testsuite: compile with -sdump and run cchecklink if supported. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2553 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- Changelog | 4 + arm/Asmexpand.ml | 18 + checklink/Asm_printers.ml | 47 +- checklink/Check.ml | 1164 +++++++++++++-------------------------------- checklink/Frameworks.ml | 8 +- checklink/Fuzz.ml | 19 +- driver/Driver.ml | 5 +- extraction/extraction.v | 1 + ia32/Asmexpand.ml | 18 + powerpc/Asm.v | 99 +++- powerpc/Asmexpand.ml | 525 ++++++++++++++++++++ powerpc/Asmgen.v | 3 +- powerpc/Asmgenproof.v | 24 +- powerpc/PrintAsm.ml | 567 ++++------------------ test/Makefile | 3 + test/c/Makefile | 12 +- test/compression/Makefile | 16 +- test/raytracer/Makefile | 11 +- test/regression/Makefile | 13 +- test/spass/Makefile | 10 +- 20 files changed, 1200 insertions(+), 1367 deletions(-) create mode 100644 arm/Asmexpand.ml create mode 100644 ia32/Asmexpand.ml create mode 100644 powerpc/Asmexpand.ml diff --git a/Changelog b/Changelog index 604618c..8a2af33 100644 --- a/Changelog +++ b/Changelog @@ -22,6 +22,10 @@ registers that can contain data of unknown types (e.g. float32 or float64) but known sizes. +- PowerPC port: refactored the expansion of built-in functions and + pseudo-instructions so that it does not need to be re-done in + cchecklink. Updated the cchecklink validator accordingly. + Release 2.3pl2, 2014-05-15 ========================== diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml new file mode 100644 index 0000000..4baaac3 --- /dev/null +++ b/arm/Asmexpand.ml @@ -0,0 +1,18 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(* Expanding built-ins and some pseudo-instructions by rewriting + of the ARM assembly code. Currently not done, this expansion + is performed on the fly in PrintAsm. *) + +let expand_program p = p + diff --git a/checklink/Asm_printers.ml b/checklink/Asm_printers.ml index 9bb2009..1f737c2 100644 --- a/checklink/Asm_printers.ml +++ b/checklink/Asm_printers.ml @@ -88,6 +88,7 @@ let string_of_preg = function | CR0_1 -> "CR0_1" | CR0_2 -> "CR0_2" | CR0_3 -> "CR0_3" +| CR1_2 -> "CR1_2" let string_of_external_function e = match e with @@ -117,8 +118,19 @@ let string_of_crbit = function | CRbit_1 -> "CRbit_1" | CRbit_2 -> "CRbit_2" | CRbit_3 -> "CRbit_3" +| CRbit_6 -> "CRbit_6" -let string_of_memory_chunk mc = "MEMORY_CHUNK" +let string_of_memory_chunk = function + | Mint8signed -> "int8s" + | Mint8unsigned -> "int8u" + | Mint16signed -> "int16s" + | Mint16unsigned -> "int16u" + | Mint32 -> "int32" + | Mint64 -> "int64" + | Mfloat32 -> "float32" + | Mfloat64 -> "float64" + | Many32 -> "any32" + | Many64 -> "any64" let string_of_annot_param = function | APreg (p0)-> "APreg(" ^ string_of_preg p0 ^ ")" @@ -126,6 +138,7 @@ let string_of_annot_param = function let string_of_instruction = function | Padd (i0, i1, i2) -> "Padd(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Paddc (i0, i1, i2) -> "Paddc(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Padde (i0, i1, i2) -> "Padde(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Paddi (i0, i1, c2) -> "Paddi(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")" | Paddic (i0, i1, c2) -> "Paddic(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")" @@ -139,6 +152,7 @@ let string_of_instruction = function | Pb (l0) -> "Pb(" ^ string_of_label l0 ^ ")" | Pbctr sg -> "Pbctr" | Pbctrl sg -> "Pbctrl" +| Pbdnz (l1) -> "Pbdnz(" ^ string_of_label l1 ^ ")" | Pbf (c0, l1) -> "Pbf(" ^ string_of_crbit c0 ^ ", " ^ string_of_label l1 ^ ")" | Pbl (i0, sg) -> "Pbl(" ^ string_of_ident i0 ^ ")" | Pbs (i0, sg) -> "Pbs(" ^ string_of_ident i0 ^ ")" @@ -149,9 +163,13 @@ let string_of_instruction = function | Pcmplwi (i0, c1) -> "Pcmplwi(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ")" | Pcmpw (i0, i1) -> "Pcmpw(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")" | Pcmpwi (i0, c1) -> "Pcmpwi(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ")" +| Pcntlz (i0, i1) -> "Pcntlz(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")" +| Pcreqv (c0, c1, c2) -> "Pcreqv(" ^ string_of_crbit c0 ^ ", " ^ string_of_crbit c1 ^ ", " ^ string_of_crbit c2 ^ ")" | Pcror (c0, c1, c2) -> "Pcror(" ^ string_of_crbit c0 ^ ", " ^ string_of_crbit c1 ^ ", " ^ string_of_crbit c2 ^ ")" +| Pcrxor (c0, c1, c2) -> "Pcrxor(" ^ string_of_crbit c0 ^ ", " ^ string_of_crbit c1 ^ ", " ^ string_of_crbit c2 ^ ")" | Pdivw (i0, i1, i2) -> "Pdivw(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pdivwu (i0, i1, i2) -> "Pdivwu(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Peieio -> "Peieio" | Peqv (i0, i1, i2) -> "Peqv(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pextsb (i0, i1) -> "Pextsb(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")" | Pextsh (i0, i1) -> "Pextsh(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")" @@ -162,6 +180,8 @@ let string_of_instruction = function | Pfadds (f0, f1, f2) -> "Pfadds(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")" | Pfcmpu (f0, f1) -> "Pfcmpu(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")" | Pfcti (i0, f1) -> "Pfcti(" ^ string_of_ireg i0 ^ ", " ^ string_of_freg f1 ^ ")" +| Pfctiw (f0, f1) -> "Pfctiw(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")" +| Pfctiwz (f0, f1) -> "Pfctiwz(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")" | Pfdiv (f0, f1, f2) -> "Pfdiv(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")" | Pfdivs (f0, f1, f2) -> "Pfdivs(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")" | Pfmake (f0, i1, i2) -> "Pfmake(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" @@ -174,6 +194,15 @@ let string_of_instruction = function | Pfxdp (f0, f1) -> "Pfxdp(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")" | Pfsub (f0, f1, f2) -> "Pfsub(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")" | Pfsubs (f0, f1, f2) -> "Pfsubs(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")" +| Pfmadd (f0, f1, f2, f3) -> "Pfmadd(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")" +| Pfmsub (f0, f1, f2, f3) -> "Pfmsub(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")" +| Pfnmadd (f0, f1, f2, f3) -> "Pfnmadd(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")" +| Pfnmsub (f0, f1, f2, f3) -> "Pfnmsub(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")" +| Pfsqrt (f0, f1) -> "Pfsqrt(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")" +| Pfrsqrte (f0, f1) -> "Pfrsqrte(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")" +| Pfres (f0, f1) -> "Pfres(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")" +| Pfsel (f0, f1, f2, f3) -> "Pfsel(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")" +| Pisync -> "Pisync" | Plbz (i0, c1, i2) -> "Plbz(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plbzx (i0, i1, i2) -> "Plbzx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plfd (f0, c1, i2) -> "Plfd(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" @@ -184,14 +213,18 @@ let string_of_instruction = function | Plfsx (f0, i1, i2) -> "Plfsx(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plha (i0, c1, i2) -> "Plha(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plhax (i0, i1, i2) -> "Plhax(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Plhbrx (i0, i1, i2) -> "Plhbrx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plhz (i0, c1, i2) -> "Plhz(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plhzx (i0, i1, i2) -> "Plhzx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plfi (f0, f1) -> "Plfi(" ^ string_of_freg f0 ^ ", " ^ string_of_ffloat f1 ^ ")" | Plfis (f0, f1) -> "Plfis(" ^ string_of_freg f0 ^ ", " ^ string_of_ffloat32 f1 ^ ")" +| Plwbrx (i0, i1, i2) -> "Plwbrx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plwz (i0, c1, i2) -> "Plwz(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Plwzu (i0, c1, i2) -> "Plwzu(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plwz_a (i0, c1, i2) -> "Plwz_a(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plwzx (i0, i1, i2) -> "Plwzx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Plwzx_a (i0, i1, i2) -> "Plwzx_a(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Pmfcr (i0) -> "Pmfcr(" ^ string_of_ireg i0 ^ ")" | Pmfcrbit (i0, c1) -> "Pmfcrbit(" ^ string_of_ireg i0 ^ ", " ^ string_of_crbit c1 ^ ")" | Pmflr (i0) -> "Pmflr(" ^ string_of_ireg i0 ^ ")" | Pmr (i0, i1) -> "Pmr(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")" @@ -217,32 +250,42 @@ let string_of_instruction = function | Pstbx (i0, i1, i2) -> "Pstbx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstfd (f0, c1, i2) -> "Pstfd(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstfd_a (f0, c1, i2) -> "Pstfd_a(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Pstfdu (f0, c1, i2) -> "Pstfdu(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstfdx (f0, i1, i2) -> "Pstfdx(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstfdx_a (f0, i1, i2) -> "Pstfdx_a(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstfs (f0, c1, i2) -> "Pstfs(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstfsx (f0, i1, i2) -> "Pstfsx(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Psth (i0, c1, i2) -> "Psth(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Psthx (i0, i1, i2) -> "Psthx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Psthbrx (i0, i1, i2) -> "Psthbrx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstw (i0, c1, i2) -> "Pstw(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstw_a (i0, c1, i2) -> "Pstw_a(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Pstwu (i0, c1, i2) -> "Pstwu(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstwx (i0, i1, i2) -> "Pstwx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pstwx_a (i0, i1, i2) -> "Pstwx_a(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Pstwxu (i0, i1, i2) -> "Pstwxu(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Pstwbrx (i0, i1, i2) -> "Pstwbrx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Psubfc (i0, i1, i2) -> "Psubfc(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Psubfe (i0, i1, i2) -> "Psubfe(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" +| Psubfze (i0, i1) -> "Psubfze(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")" | Psubfic (i0, i1, c2) -> "Psubfic(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")" +| Psync -> "Psync" +| Ptrap -> "Ptrap" | Pxor (i0, i1, i2) -> "Pxor(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")" | Pxori (i0, i1, c2) -> "Pxori(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")" | Pxoris (i0, i1, c2) -> "Pxoris(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")" | Plabel (l0) -> "Plabel(" ^ string_of_label l0 ^ ")" | Pbuiltin (e0, p1, p2) -> "Pbuiltin(" ^ string_of_external_function e0 ^ ", " ^ string_of_list string_of_preg ", " p1 ^ ", " ^ string_of_list string_of_preg ", " p2 ^ ")" | Pannot (e0, a1) -> "Pannot(" ^ string_of_external_function e0 ^ ", " ^ string_of_list string_of_annot_param ", " a1 ^ ")" +| Pcfi_adjust n -> "Pcfi_adjust(" ^ string_of_coq_Z n ^ ")" +| Pcfi_rel_offset n -> "Pcfi_rel_offset(" ^ string_of_coq_Z n ^ ")" let string_of_init_data = function | Init_int8(i) -> "Init_int8(" ^ string_of_int (z_int_lax i) ^ ")" | Init_int16(i) -> "Init_int16(" ^ string_of_int (z_int_lax i) ^ ")" | Init_int32(i) -> "Init_int32(" ^ string_of_int32i (z_int32 i) ^ ")" | Init_int64(i) -> "Init_int64(" ^ string_of_int64i (z_int64 i) ^ ")" -| Init_float32(f) -> "Init_float32(" ^ string_of_ffloat f ^ ")" +| Init_float32(f) -> "Init_float32(" ^ string_of_ffloat32 f ^ ")" | Init_float64(f) -> "Init_float64(" ^ string_of_ffloat f ^ ")" | Init_space(z) -> "Init_space(" ^ string_of_int (z_int z) ^ ")" | Init_addrof(ident, ofs) -> diff --git a/checklink/Check.ml b/checklink/Check.ml index 9f842b8..7eb3ea3 100644 --- a/checklink/Check.ml +++ b/checklink/Check.ml @@ -68,8 +68,8 @@ let check_st_bind atom (sym: elf32_sym): s_framework -> s_framework = (** Adapted from CompCert *) let name_of_section_Linux: section_name -> string = function | Section_text -> ".text" -| Section_data i -> if i then ".data" else ".bss" -| Section_small_data i -> if i then ".sdata" else ".sbss" +| Section_data i -> if i then ".data" else "COMM" +| Section_small_data i -> if i then ".sdata" else "COMM" | Section_const -> ".rodata" | Section_small_const -> ".sdata2" | Section_string -> ".rodata" @@ -277,10 +277,12 @@ let freg_arr: freg array = FPR23; FPR24; FPR25; FPR26; FPR27; FPR28; FPR29; FPR30; FPR31 |] -let crbit_arr: crbit array = - [| - CRbit_0; CRbit_1; CRbit_2; CRbit_3 - |] +let num_crbit = function + | CRbit_0 -> 0 + | CRbit_1 -> 1 + | CRbit_2 -> 2 + | CRbit_3 -> 3 + | CRbit_6 -> 6 type checker = f_framework -> f_framework or_err let check (cond: bool) (msg: string): checker = @@ -301,22 +303,20 @@ let match_int32s a b: checker = ) a b (** We compare floats by their bit representation, so that 0.0 and -0.0 are different. *) -let match_floats (a: Floats.float) (b: float): checker = - let a = Int64.bits_of_float (camlfloat_of_coqfloat a) in - let b = Int64.bits_of_float b in +let match_floats (a: Floats.float) (b: int64): checker = + let a = camlint64_of_coqint (Floats.Float.to_bits a) in check_eq ( Printf.sprintf "match_floats %s %s" (string_of_int64 a) (string_of_int64 b) ) a b -let match_floats32 (a: Floats.float32) (b: float): checker = - let a = Int64.bits_of_float (camlfloat_of_coqfloat32 a) in - let b = Int64.bits_of_float b in +let match_floats32 (a: Floats.float32) (b: int32): checker = + let a = camlint_of_coqint (Floats.Float32.to_bits a) in check_eq ( - Printf.sprintf "match_floats %s %s" (string_of_int64 a) (string_of_int64 b) + Printf.sprintf "match_floats %s %s" (string_of_int32 a) (string_of_int32 b) ) a b let match_crbits cb eb = - let eb = crbit_arr.(eb) in + let cb = num_crbit cb in check_eq ( - Printf.sprintf "match_crbits %s %s" (string_of_crbit cb) (string_of_crbit eb) + Printf.sprintf "match_crbits %d %d" cb eb ) cb eb let match_iregs cr er = let er = ireg_arr.(er) in @@ -575,195 +575,6 @@ let rec match_jmptbl lbllist vaddr size ffw = end end -(** Matches [ecode] against the expected CR6 magic before a function call. -*) -let match_set_cr6 sg ecode = - if sg.sig_cc.cc_vararg then - if List.mem Tfloat sg.sig_args then - match ecode with - | CREQV(6, 6, 6) :: ecode' -> Some ecode' - | _ -> None - else - match ecode with - | CRXOR(6, 6, 6) :: ecode' -> Some ecode' - | _ -> None - else Some ecode - -(** Matches [ecode] agains the expected code for a small memory copy - pseudo-instruction. Returns a triple containing the updated framework, - the remaining ELF code, and the updated program counter. -*) -let match_memcpy_small ecode pc sz al src dst (fw: f_framework) - : (f_framework * ecode * int32) or_err = - let error = ERR("match_memcpy_small") in - let rec match_memcpy_small_aux ofs sz ecode pc (fw: f_framework) = - let ofs32 = Safe32.of_int ofs in - if sz >= 8 && al >= 4 - then ( - match ecode with - | LFD (frD0, rA0, d0) :: - STFD(frS1, rA1, d1) :: es -> - OK(fw) - >>= match_fregs FPR13 frD0 - >>= match_iregs src rA0 - >>= match_int32s ofs32 (exts d0) - >>= match_fregs FPR13 frS1 - >>= match_iregs dst rA1 - >>= match_int32s ofs32 (exts d1) - >>= match_memcpy_small_aux (ofs + 8) (sz - 8) es (Int32.add 8l pc) - | _ -> error - ) - else if sz >= 4 - then ( - match ecode with - | LWZ(rD0, rA0, d0) :: - STW(rS1, rA1, d1) :: es -> - OK(fw) - >>= match_iregs GPR0 rD0 - >>= match_iregs src rA0 - >>= match_int32s ofs32 (exts d0) - >>= match_iregs GPR0 rS1 - >>= match_iregs dst rA1 - >>= match_int32s ofs32 (exts d0) - >>= match_memcpy_small_aux (ofs + 4) (sz - 4) es (Int32.add 8l pc) - | _ -> error - ) - else if sz >= 2 - then ( - match ecode with - | LHZ(rD0, rA0, d0) :: - STH(rS1, rA1, d1) :: es -> - OK(fw) - >>= match_iregs GPR0 rD0 - >>= match_iregs src rA0 - >>= match_int32s ofs32 (exts d0) - >>= match_iregs GPR0 rS1 - >>= match_iregs dst rA1 - >>= match_int32s ofs32 (exts d0) - >>= match_memcpy_small_aux (ofs + 2) (sz - 2) es (Int32.add 8l pc) - | _ -> error - ) - else if sz >= 1 - then ( - match ecode with - | LBZ(rD0, rA0, d0) :: - STB(rS1, rA1, d1) :: es -> - OK(fw) - >>= match_iregs GPR0 rD0 - >>= match_iregs src rA0 - >>= match_int32s ofs32 (exts d0) - >>= match_iregs GPR0 rS1 - >>= match_iregs dst rA1 - >>= match_int32s ofs32 (exts d0) - >>= match_memcpy_small_aux (ofs + 1) (sz - 1) es (Int32.add 8l pc) - | _ -> error - ) - else OK(fw, ecode, pc) - in match_memcpy_small_aux 0 sz ecode pc fw - -(** Matches [ecode] agains the expected code for a big memory copy - pseudo-instruction. Returns a triple containing the updated framework, - the remaining ELF code, and the updated program counter. -*) -let match_memcpy_big ecode pc sz al src dst fw - : (f_framework * ecode * int32) or_err = - let error = ERR("match_memcpy_big") in - match ecode with - | ADDI (rD0, rA0, simm0) :: (* pc *) - MTSPR(rS1, spr1) :: - ADDI (rD2, rA2, simm2) :: - ADDI (rD3, rA3, simm3) :: - LWZU (rD4, rA4, d4) :: (* pc + 16 <- *) - STWU (rS5, rA5, d5) :: (* | *) - BCx (bo6, bi6, bd6, aa6, lk6) :: (* pc + 24 -- *) - es -> - let sz' = Safe32.of_int (sz / 4) in - let (s, d) = if dst <> GPR11 then (GPR11, GPR12) else (GPR12, GPR11) in - let target_vaddr = Int32.(add 16l pc) in - let dest_vaddr = Int32.(add (add 24l pc) (mul 4l (exts bd6))) in - OK(fw) - >>= match_iregs GPR0 rD0 - >>= match_iregs GPR0 rA0 - >>= match_int32s sz' (exts simm0) - >>= match_iregs GPR0 rS1 - >>= match_ctr spr1 - >>= match_iregs s rD2 - >>= match_iregs src rA2 - >>= match_int32s (-4l) (exts simm2) - >>= match_iregs d rD3 - >>= match_iregs dst rA3 - >>= match_int32s (-4l) (exts simm3) - >>= match_iregs GPR0 rD4 - >>= match_iregs s rA4 - >>= match_int32s 4l (exts d4) - >>= match_iregs GPR0 rS5 - >>= match_iregs d rA5 - >>= match_int32s 4l (exts d5) - >>= (fun ffw -> - bitmatch bo6 with - | { 16:5:int } -> OK(ffw) - | { _ } -> ERR("bitmatch bo") - ) - >>= match_ints bi6 0 - >>= match_int32s dest_vaddr target_vaddr - >>= match_bools false aa6 - >>= match_bools false lk6 - >>= (fun fw -> - match sz land 3 with - | 1 -> - begin match es with - | LBZ(rD0, rA0, d0) :: - STB(rS1, rA1, d1) :: es -> - OK(fw) - >>= match_iregs GPR0 rD0 - >>= match_iregs s rA0 - >>= match_int32s 4l (exts d0) - >>= match_iregs GPR0 rS1 - >>= match_iregs d rA1 - >>= match_int32s 4l (exts d1) - >>= (fun fw -> OK(fw, es, Int32.add 36l pc)) - | _ -> error - end - | 2 -> - begin match es with - | LHZ(rD0, rA0, d0) :: - STH(rS1, rA1, d1) :: es -> - OK(fw) - >>= match_iregs GPR0 rD0 - >>= match_iregs s rA0 - >>= match_int32s 4l (exts d0) - >>= match_iregs GPR0 rS1 - >>= match_iregs d rA1 - >>= match_int32s 4l (exts d1) - >>= (fun fw -> OK(fw, es , Int32.add 36l pc)) - | _ -> error - end - | 3 -> - begin match es with - | LHZ(rD0, rA0, d0) :: - STH(rS1, rA1, d1) :: - LBZ(rD2, rA2, d2) :: - STB(rS3, rA3, d3) :: es -> - OK(fw) - >>= match_iregs GPR0 rD0 - >>= match_iregs s rA0 - >>= match_int32s 4l (exts d0) - >>= match_iregs GPR0 rS1 - >>= match_iregs d rA1 - >>= match_int32s 4l (exts d1) - >>= match_iregs GPR0 rD2 - >>= match_iregs s rA2 - >>= match_int32s 6l (exts d2) - >>= match_iregs GPR0 rS3 - >>= match_iregs d rA3 - >>= match_int32s 6l (exts d3) - >>= (fun fw -> OK(fw, es, Int32.add 44l pc)) - | _ -> error - end - | _ -> OK(fw, es, Int32.add 28l pc) - ) - | _ -> error - let match_bo_bt_bool bo = bitmatch bo with | { false:1; true:1; true:1; false:1; false:1 } -> true @@ -774,6 +585,11 @@ let match_bo_bf_bool bo = | { false:1; false:1; true:1; false:1; false:1 } -> true | { _ } -> false +let match_bo_bdnz_bool bo = + bitmatch bo with + | { true:1; false:1; false:1; false:1; false:1 } -> true + | { _ } -> false + let match_bo_bt bo: checker = fun ffw -> bitmatch bo with | { false:1; true:1; true:1; false:1; false:1 } -> OK(ffw) @@ -869,6 +685,18 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Paddc(rd, r1, r2) -> + begin match ecode with + | ADDCx(rD, rA, rB, oe, rc) :: es -> + OK(fw) + >>= match_iregs rd rD + >>= match_iregs r1 rA + >>= match_iregs r2 rB + >>= match_bools false oe + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end | Padde(rd, r1, r2) -> begin match ecode with | ADDEx(rD, rA, rB, oe, rc) :: es -> @@ -931,34 +759,7 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end - | Pallocframe(sz, ofs) -> - begin match ecode with - | STWU(rS, rA, d) :: es -> - OK(fw) - >>= match_iregs GPR1 rS - >>= match_iregs GPR1 rA - >>= match_z_int32 sz (Int32.neg (exts d)) - >>= match_z_int32 ofs 0l - >>= recur_simpl - | ADDIS (rD0, rA0, simm0) :: - ORI (rS1, rA1, uimm1) :: - STWUX (rS2, rA2, rB2) :: es -> - let sz32 = Int32.neg (z_int32 sz) in - let sz_hi = Int32.shift_right_logical sz32 16 in - let sz_lo = Int32.logand sz32 0xFFFFl in - OK(fw) - >>= match_iregs GPR12 rD0 - >>= match_iregs GPR0 rA0 - >>= match_int32s sz_hi (Safe32.of_int simm0) - >>= match_iregs GPR12 rS1 - >>= match_iregs GPR12 rA1 - >>= match_int32s sz_lo (Safe32.of_int uimm1) - >>= match_iregs GPR1 rS2 - >>= match_iregs GPR1 rA2 - >>= match_iregs GPR12 rB2 - >>= compare_code cs es (Int32.add 12l pc) - | _ -> error - end + | Pallocframe(sz, ofs) -> error | Pandc(rd, r1, r2) -> begin match ecode with | ANDCx(rS, rA, rB, rc) :: es -> @@ -1016,8 +817,8 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | _ -> error end | Pbctr sg -> - begin match match_set_cr6 sg ecode with - | Some(BCCTRx(bo, bi, lk) :: es) -> + begin match ecode with + | BCCTRx(bo, bi, lk) :: es -> OK(fw) >>= match_bo_ctr bo >>= match_ints 0 bi @@ -1026,8 +827,8 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | _ -> error end | Pbctrl sg -> - begin match match_set_cr6 sg ecode with - | Some(BCCTRx(bo, bi, lk) :: es) -> + begin match ecode with + | BCCTRx(bo, bi, lk) :: es -> OK(fw) >>= match_bo_ctr bo >>= match_ints 0 bi @@ -1035,6 +836,18 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Pbdnz(lbl) -> + begin match ecode with + | BCx (bo, bi, bd, aa, lk) :: es when match_bo_bdnz_bool bo -> + let lblvaddr = Int32.(add pc (mul 4l (exts bd))) in + OK(fw) + >>= match_ints 0 bi + >>= lblmap_unify lbl lblvaddr + >>= match_bools false aa + >>= match_bools false lk + >>= recur_simpl + | _ -> error + end | Pbf(bit, lbl) -> begin match ecode with | BCx(bo, bi, bd, aa, lk) :: es when match_bo_bf_bool bo -> @@ -1064,8 +877,8 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | _ -> error end | Pbl(ident, sg) -> - begin match match_set_cr6 sg ecode with - | Some(Bx(li, aa, lk) :: es) -> + begin match ecode with + | Bx(li, aa, lk) :: es -> let dest = Int32.(add pc (mul 4l (exts li))) in OK(fw) >>= (ff_sf ^%=? idmap_unify ident dest) @@ -1085,8 +898,8 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | _ -> error end | Pbs(ident, sg) -> - begin match match_set_cr6 sg ecode with - | Some(Bx(li, aa, lk) :: es) -> + begin match ecode with + | Bx(li, aa, lk) :: es -> let dest = Int32.(add pc (mul 4l (exts li))) in OK(fw) >>= match_bools false aa @@ -1156,386 +969,12 @@ let rec compare_code ccode ecode pc: checker = fun fw -> end | Pbuiltin(ef, args, res) -> begin match ef with - | EF_builtin(name, sg) -> - begin match Hashtbl.find - (fw |. ff_sf).ident_to_name name, args, res with - | "__builtin_mulhw", [IR a1; IR a2], [IR res] -> - begin match ecode with - | MULHWx(rD, rA, rB, rc) :: es -> - OK(fw) - >>= match_iregs res rD - >>= match_iregs a1 rA - >>= match_iregs a2 rB - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_mulhwu", [IR a1; IR a2], [IR res] -> - begin match ecode with - | MULHWUx(rD, rA, rB, rc) :: es -> - OK(fw) - >>= match_iregs res rD - >>= match_iregs a1 rA - >>= match_iregs a2 rB - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_cntlz", [IR a1], [IR res] -> - begin match ecode with - | CNTLZWx(rS, rA, rc) :: es -> - OK(fw) - >>= match_iregs a1 rS - >>= match_iregs res rA - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | ("__builtin_bswap"|"__builtin_bswap32"), [IR a1], [IR res] -> - begin match ecode with - | STWU (rS0, rA0, d0) :: - LWBRX(rD1, rA1, rB1) :: - ADDI (rD2, rA2, simm2) :: es -> - OK(fw) - >>= match_iregs a1 rS0 - >>= match_iregs GPR1 rA0 - >>= match_int32s (-8l) (exts d0) - >>= match_iregs res rD1 - >>= match_iregs GPR0 rA1 - >>= match_iregs GPR1 rB1 - >>= match_iregs GPR1 rD2 - >>= match_iregs GPR1 rA2 - >>= match_int32s 8l (exts simm2) - >>= compare_code cs es (Int32.add 12l pc) - | _ -> error - end - | "__builtin_bswap16", [IR a1], [IR res] -> - begin match ecode with - | RLWINMx(rS1, rA1, sh1, mb1, me1, rc1) :: - RLWINMx(rS2, rA2, sh2, mb2, me2, rc2) :: - ORx(rS3, rA3, rB3, rc3) :: es -> - OK(fw) - >>= match_iregs GPR0 rS1 - >>= match_iregs a1 rA1 - >>= check_eq "bswap16-1" sh1 8 - >>= check_eq "bswap16-2" mb1 16 - >>= check_eq "bswap16-3" me1 23 - >>= match_iregs res rS2 - >>= match_iregs a1 rA2 - >>= check_eq "bswap16-4" sh2 24 - >>= check_eq "bswap16-5" mb2 24 - >>= check_eq "bswap16-6" me2 31 - >>= match_iregs res rS3 - >>= match_iregs GPR0 rA3 - >>= match_iregs res rB3 - >>= compare_code cs es (Int32.add 12l pc) - | _ -> error - end - | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] -> - begin match ecode with - | FMADDx(frD, frA, frB, frC, rc) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_fregs a1 frA - >>= match_fregs a3 frB - >>= match_fregs a2 frC - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] -> - begin match ecode with - | FMSUBx(frD, frA, frB, frC, rc) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_fregs a1 frA - >>= match_fregs a3 frB - >>= match_fregs a2 frC - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] -> - begin match ecode with - | FNMADDx(frD, frA, frB, frC, rc) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_fregs a1 frA - >>= match_fregs a3 frB - >>= match_fregs a2 frC - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] -> - begin match ecode with - | FNMSUBx(frD, frA, frB, frC, rc) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_fregs a1 frA - >>= match_fregs a3 frB - >>= match_fregs a2 frC - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_fabs", [FR a1], [FR res] -> - begin match ecode with - | FABSx(frD, frB, rc) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_fregs a1 frB - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_fsqrt", [FR a1], [FR res] -> - begin match ecode with - | FSQRTx(frD, frB, rc) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_fregs a1 frB - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_frsqrte", [FR a1], [FR res] -> - begin match ecode with - | FRSQRTEx(frD, frB, rc) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_fregs a1 frB - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_fres", [FR a1], [FR res] -> - begin match ecode with - | FRESx(frD, frB, rc) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_fregs a1 frB - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] -> - begin match ecode with - | FSELx(frD, frA, frB, frC, rc) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_fregs a1 frA - >>= match_fregs a3 frB - >>= match_fregs a2 frC - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end - | "__builtin_fcti", [FR r1], [IR rd] -> - begin match ecode with - | FCTIWx(frD0, frB0, rc0) :: - STFDU (frS1, rA1, d1) :: - LWZ (rD2, rA2, d2) :: - ADDI (rD3, rA3, simm3) :: es -> - OK(fw) - >>= match_fregs FPR13 frD0 - >>= match_fregs r1 frB0 - >>= match_bools false rc0 - >>= match_fregs FPR13 frS1 - >>= match_iregs GPR1 rA1 - >>= match_int32s (-8l) (exts d1) - >>= match_iregs rd rD2 - >>= match_iregs GPR1 rA2 - >>= match_int32s 4l (exts d2) - >>= match_iregs GPR1 rD3 - >>= match_iregs GPR1 rA3 - >>= match_int32s 8l (exts simm3) - >>= compare_code cs es (Int32.add 16l pc) - | _ -> error - end - | "__builtin_read16_reversed", [IR a1], [IR res] -> - begin match ecode with - | LHBRX(rD, rA, rB):: es -> - OK(fw) - >>= match_iregs res rD - >>= match_iregs GPR0 rA - >>= match_iregs a1 rB - >>= recur_simpl - | _ -> error - end - | "__builtin_read32_reversed", [IR a1], [IR res] -> - begin match ecode with - | LWBRX(rD, rA, rB) :: es -> - OK(fw) - >>= match_iregs res rD - >>= match_iregs GPR0 rA - >>= match_iregs a1 rB - >>= recur_simpl - | _ -> error - end - | "__builtin_write16_reversed", [IR a1; IR a2], _ -> - begin match ecode with - | STHBRX(rS, rA, rB) :: es -> - OK(fw) - >>= match_iregs a2 rS - >>= match_iregs GPR0 rA - >>= match_iregs a1 rB - >>= recur_simpl - | _ -> error - end - | "__builtin_write32_reversed", [IR a1; IR a2], _ -> - begin match ecode with - | STWBRX(rS, rA, rB) :: es -> - OK(fw) - >>= match_iregs a2 rS - >>= match_iregs GPR0 rA - >>= match_iregs a1 rB - >>= recur_simpl - | _ -> error - end - | "__builtin_eieio", [], _ -> - begin match ecode with - | EIEIO :: es -> - OK(fw) - >>= recur_simpl - | _ -> error - end - | "__builtin_sync", [], _ -> - begin match ecode with - | SYNC :: es -> - OK(fw) - >>= recur_simpl - | _ -> error - end - | "__builtin_isync", [], _ -> - begin match ecode with - | ISYNC :: es -> - OK(fw) - >>= recur_simpl - | _ -> error - end - | "__builtin_trap", [], _ -> - begin match ecode with - | TW(tO, rA, rB) :: es -> - OK(fw) - >>= (fun ffw -> - bitmatch tO with - | { 31 : 5 : int } -> OK(ffw) - | { _ } -> ERR("bitmatch") - ) - >>= match_iregs GPR0 rA - >>= match_iregs GPR0 rB - >>= recur_simpl - | _ -> error - end - | _ -> error - end - | EF_vload(chunk) -> - begin match args with - | [IR addr] -> - OK(fw) - >>= check_builtin_vload_common - cs ecode pc chunk addr (Cint Integers.Int.zero) res - | _ -> fatal "Unexpected args in EF_vload, please report." - end - | EF_vstore(chunk) -> - begin match args with - | [IR addr; src] -> - OK(fw) - >>= check_builtin_vstore_common - cs ecode pc chunk addr (Cint Integers.Int.zero) src - | _ -> fatal "Unexpected args in EF_vstore, please report." - end - | EF_vload_global(chunk, ident, ofs) -> - begin match ecode with - | ADDIS(rD, rA, simm) :: es -> - let high = Csymbol_high(ident, ofs) in - OK(fw) - >>= match_iregs GPR11 rD - >>= match_iregs GPR0 rA - >>= match_csts high (Safe32.of_int simm) - >>= check_builtin_vload_common - cs es (Int32.add pc 4l) chunk GPR11 - (Csymbol_low(ident, ofs)) res - | _ -> error - end - | EF_vstore_global(chunk, ident, ofs) -> - begin match args with - | [src] -> - begin match ecode with - | ADDIS(rD, rA, simm) :: es -> - let tmp = - if src = IR GPR11 - then GPR12 - else GPR11 - in - let high = Csymbol_high(ident, ofs) in - OK(fw) - >>= match_iregs tmp rD - >>= match_iregs GPR0 rA - >>= match_csts high (Safe32.of_int simm) - >>= check_builtin_vstore_common - cs es (Int32.add pc 4l) chunk tmp - (Csymbol_low(ident, ofs)) src - | _ -> error - end - | _ -> fatal "Unexpected args in EF_vstore_global, please report." - end - | EF_memcpy(sz, al) -> - let sz = z_int sz in - let al = z_int al in - begin match args with - | [IR dst; IR src] -> - if sz <= 48 - then ( - match match_memcpy_small ecode pc sz al src dst fw with - | ERR(s) -> ERR(s) - | OK(fw, es, pc) -> compare_code cs es pc fw - ) - else ( - match match_memcpy_big ecode pc sz al src dst fw with - | ERR(s) -> ERR(s) - | OK(fw, es, pc) -> compare_code cs es pc fw - ) - | _ -> error - end - | EF_annot_val(text, targ) -> - begin match args, res with - | IR src :: _, [IR dst] -> - if dst <> src - then ( - match ecode with - | ORx(rS, rA, rB, rc) :: es -> - OK(fw) - >>= match_iregs src rS - >>= match_iregs dst rA - >>= match_iregs src rB - >>= match_bools false rc - >>= recur_simpl - | _ -> error - ) - else compare_code cs ecode pc fw - | FR src :: _, [FR dst] -> - if dst <> src - then ( - match ecode with - | FMRx(frD, frB, rc) :: es -> - OK(fw) - >>= match_fregs dst frD - >>= match_fregs src frB - >>= match_bools false rc - >>= recur_simpl - | _ -> error - ) - else compare_code cs ecode pc fw - | _ -> error - end - | EF_annot(_, _) -> fatal "Unexpected EF_annot, please report." - | EF_external(_, _) -> fatal "Unexpected EF_external, please report." - | EF_free -> fatal "Unexpected EF_free, please report." - | EF_malloc -> fatal "Unexpected EF_malloc, please report." | EF_inline_asm(_) -> fatal "Unsupported: inline asm statement." + | _ -> fatal "Unexpected Pbuiltin, please report." end + | Pcfi_adjust _ | Pcfi_rel_offset _ -> + OK(fw) + >>= compare_code cs ecode pc | Pcmplw(r1, r2) -> begin match ecode with | CMPL(crfD, l, rA, rB) :: es -> @@ -1580,6 +1019,26 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Pcntlz(r1, r2) -> + begin match ecode with + | CNTLZWx(rS, rA, rc) :: es -> + OK(fw) + >>= match_iregs r2 rS + >>= match_iregs r1 rA + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end + | Pcreqv(bd, b1, b2) -> + begin match ecode with + | CREQV(crbD, crbA, crbB) :: es -> + OK(fw) + >>= match_crbits bd crbD + >>= match_crbits b1 crbA + >>= match_crbits b2 crbB + >>= recur_simpl + | _ -> error + end | Pcror(bd, b1, b2) -> begin match ecode with | CROR(crbD, crbA, crbB) :: es -> @@ -1590,6 +1049,16 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Pcrxor(bd, b1, b2) -> + begin match ecode with + | CRXOR(crbD, crbA, crbB) :: es -> + OK(fw) + >>= match_crbits bd crbD + >>= match_crbits b1 crbA + >>= match_crbits b2 crbB + >>= recur_simpl + | _ -> error + end | Pdivw(rd, r1, r2) -> begin match ecode with | DIVWx(rD, rA, rB, oe, rc) :: es -> @@ -1614,6 +1083,13 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Peieio -> + begin match ecode with + | EIEIO :: es -> + OK(fw) + >>= recur_simpl + | _ -> error + end | Peqv(rd, r1, r2) -> begin match ecode with | EQVx(rS, rA, rB, rc) :: es -> @@ -1688,25 +1164,25 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | _ -> error end | Pfcti(rd, r1) -> + error + | Pfctiw(rd, r1) -> begin match ecode with - | FCTIWZx(frD0, frB0, rc0) :: - STFDU (frS1, rA1, d1) :: - LWZ (rD2, rA2, d2) :: - ADDI (rD3, rA3, simm3) :: es -> + | FCTIWx(frD0, frB0, rc0) :: es -> OK(fw) - >>= match_fregs FPR13 frD0 + >>= match_fregs rd frD0 >>= match_fregs r1 frB0 >>= match_bools false rc0 - >>= match_fregs FPR13 frS1 - >>= match_iregs GPR1 rA1 - >>= match_int32s (-8l) (exts d1) - >>= match_iregs rd rD2 - >>= match_iregs GPR1 rA2 - >>= match_int32s 4l (exts d2) - >>= match_iregs GPR1 rD3 - >>= match_iregs GPR1 rA3 - >>= match_int32s 8l (exts simm3) - >>= compare_code cs es (Int32.add 16l pc) + >>= recur_simpl + | _ -> error + end + | Pfctiwz(rd, r1) -> + begin match ecode with + | FCTIWZx(frD0, frB0, rc0) :: es -> + OK(fw) + >>= match_fregs rd frD0 + >>= match_fregs r1 frB0 + >>= match_bools false rc0 + >>= recur_simpl | _ -> error end | Pfdiv(rd, r1, r2) -> @@ -1732,27 +1208,7 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | _ -> error end | Pfmake(rd, r1, r2) -> - begin match ecode with - | STWU (rS0, rA0, d0) :: - STW (rS1, rA1, d1) :: - LFD (frD2, rA2, d2) :: - ADDI (rD3, rA3, simm3) :: es -> - OK(fw) - >>= match_iregs r1 rS0 - >>= match_iregs GPR1 rA0 - >>= match_int32s (-8l) (exts d0) - >>= match_iregs r2 rS1 - >>= match_iregs GPR1 rA1 - >>= match_int32s 4l (exts d1) - >>= match_fregs rd frD2 - >>= match_iregs GPR1 rA2 - >>= match_int32s 0l (exts d2) - >>= match_iregs GPR1 rD3 - >>= match_iregs GPR1 rA3 - >>= match_int32s 8l (exts simm3) - >>= compare_code cs es (Int32.add 16l pc) - | _ -> error - end + error | Pfmr(rd, r1) -> begin match ecode with | FMRx(frD, frB, rc) :: es -> @@ -1796,21 +1252,7 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | _ -> error end | Pfreeframe(sz, ofs) -> - begin match ecode with - | ADDI(rD, rA, simm) :: es -> - OK(fw) - >>= match_iregs GPR1 rD - >>= match_iregs GPR1 rA - >>= match_z_int32 sz (exts simm) - >>= recur_simpl - | LWZ(rD, rA, d) :: es -> - OK(fw) - >>= match_iregs GPR1 rD - >>= match_iregs GPR1 rA - >>= match_z_int32 ofs (exts d) - >>= recur_simpl - | _ -> error - end + error | Pfrsp(rd, r1) -> begin match ecode with | FRSPx(frD, frB, rc) :: es -> @@ -1822,16 +1264,7 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | _ -> error end | Pfxdp(rd, r1) -> - if rd = r1 then OK(fw) >>= compare_code cs ecode pc else - begin match ecode with - | FMRx(frD, frB, rc) :: es -> - OK(fw) - >>= match_fregs rd frD - >>= match_fregs r1 frB - >>= match_bools false rc - >>= recur_simpl - | _ -> error - end + error | Pfsub(rd, r1, r2) -> begin match ecode with | FSUBx(frD, frA, frB, rc) :: es -> @@ -1854,6 +1287,103 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Pfmadd(rd, r1, r2, r3) -> + begin match ecode with + | FMADDx(frD, frA, frB, frC, rc) :: es -> + OK(fw) + >>= match_fregs rd frD + >>= match_fregs r1 frA + >>= match_fregs r3 frB + >>= match_fregs r2 frC + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end + | Pfmsub(rd, r1, r2, r3) -> + begin match ecode with + | FMSUBx(frD, frA, frB, frC, rc) :: es -> + OK(fw) + >>= match_fregs rd frD + >>= match_fregs r1 frA + >>= match_fregs r3 frB + >>= match_fregs r2 frC + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end + | Pfnmadd(rd, r1, r2, r3) -> + begin match ecode with + | FNMADDx(frD, frA, frB, frC, rc) :: es -> + OK(fw) + >>= match_fregs rd frD + >>= match_fregs r1 frA + >>= match_fregs r3 frB + >>= match_fregs r2 frC + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end + | Pfnmsub(rd, r1, r2, r3) -> + begin match ecode with + | FNMSUBx(frD, frA, frB, frC, rc) :: es -> + OK(fw) + >>= match_fregs rd frD + >>= match_fregs r1 frA + >>= match_fregs r3 frB + >>= match_fregs r2 frC + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end + | Pfsqrt(rd, r1) -> + begin match ecode with + | FSQRTx(frD, frB, rc) :: es -> + OK(fw) + >>= match_fregs rd frD + >>= match_fregs r1 frB + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end + | Pfrsqrte(rd, r1) -> + begin match ecode with + | FRSQRTEx(frD, frB, rc) :: es -> + OK(fw) + >>= match_fregs rd frD + >>= match_fregs r1 frB + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end + | Pfres(rd, r1) -> + begin match ecode with + | FRESx(frD, frB, rc) :: es -> + OK(fw) + >>= match_fregs rd frD + >>= match_fregs r1 frB + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end + | Pfsel(rd, r1, r2, r3) -> + begin match ecode with + | FSELx(frD, frA, frB, frC, rc) :: es -> + OK(fw) + >>= match_fregs rd frD + >>= match_fregs r1 frA + >>= match_fregs r3 frB + >>= match_fregs r2 frC + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end + | Pisync -> + begin match ecode with + | ISYNC :: es -> + OK(fw) + >>= recur_simpl + | _ -> error + end | Plabel(lbl) -> OK(fw) >>= lblmap_unify lbl pc @@ -1942,7 +1472,7 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | Some(bs, pofs, psize) -> let f = bitmatch bs with - | { f : 64 : int } -> Int64.float_of_bits f + | { f : 64 : int } -> f in OK(fw) >>= (fun ffw -> @@ -1994,7 +1524,7 @@ let rec compare_code ccode ecode pc: checker = fun fw -> | Some(bs, pofs, psize) -> let f = bitmatch bs with - | { f : 32 : int } -> Int32.float_of_bits f + | { f : 32 : int } -> f in OK(fw) >>= (fun ffw -> @@ -2014,7 +1544,7 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= match_iregs GPR12 rD0 >>= match_iregs GPR0 rA0 >>= match_fregs r1 frD1 - >>= match_floats32 c f + >>= match_floats32 c f >>^ (ff_ef ^%= add_range pofs psize 4 (Float32_literal(f))) >>= match_iregs GPR12 rA1 >>= continue @@ -2079,6 +1609,16 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Plhbrx(rd, r1, r2) -> + begin match ecode with + | LHBRX(rD, rA, rB):: es -> + OK(fw) + >>= match_iregs rd rD + >>= match_iregs r1 rA + >>= match_iregs r2 rB + >>= recur_simpl + | _ -> error + end | Plhz(rd, Csymbol_sda(ident, ofs), r1) -> begin match ecode with | LHZ(rD, rA, d) :: es -> @@ -2108,6 +1648,16 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Plwbrx(rd, r1, r2) -> + begin match ecode with + | LWBRX(rD, rA, rB):: es -> + OK(fw) + >>= match_iregs rd rD + >>= match_iregs r1 rA + >>= match_iregs r2 rB + >>= recur_simpl + | _ -> error + end | Plwz(rd, Csymbol_sda(ident, ofs), r1) -> begin match ecode with | LWZ(rD, rA, d) :: es -> @@ -2127,6 +1677,16 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Plwzu(rd, cst, r1) -> + begin match ecode with + | LWZU(rD, rA, d) :: es -> + OK(fw) + >>= match_iregs rd rD + >>= match_iregs r1 rA + >>= match_csts cst (exts d) + >>= recur_simpl + | _ -> error + end | Plwzx(rd, r1, r2) | Plwzx_a(rd, r1, r2) -> begin match ecode with | LWZX(rD, rA, rB) :: es -> @@ -2137,21 +1697,16 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end - | Pmfcrbit(rd, bit) -> + | Pmfcr rd -> begin match ecode with - | MFCR (rD0) :: - RLWINMx(rS1, rA1, sh1, mb1, me1, rc1) :: es -> + | MFCR (rD0) :: es -> OK(fw) >>= match_iregs rd rD0 - >>= match_iregs rd rS1 - >>= match_iregs rd rA1 - >>= match_crbits bit (sh1 - 1) - >>= match_ints 31 mb1 - >>= match_ints 31 me1 - >>= match_bools false rc1 - >>= compare_code cs es (Int32.add 8l pc) + >>= recur_simpl | _ -> error - end + end + | Pmfcrbit(rd, bit) -> + error | Pmflr(r) -> begin match ecode with | MFSPR(rD, spr) :: es -> @@ -2395,6 +1950,16 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Pstfdu(rd, cst, r1) -> + begin match ecode with + | STFDU(frS, rA, d) :: es -> + OK(fw) + >>= match_fregs rd frS + >>= match_iregs r1 rA + >>= match_csts cst (exts d) + >>= recur_simpl + | _ -> error + end | Pstfdx(rd, r1, r2) | Pstfdx_a(rd, r1, r2) -> begin match ecode with | STFDX(frS, rA, rB) :: es -> @@ -2445,6 +2010,16 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Psthbrx(rd, r1, r2) -> + begin match ecode with + | STHBRX(rS, rA, rB) :: es -> + OK(fw) + >>= match_iregs rd rS + >>= match_iregs r1 rA + >>= match_iregs r2 rB + >>= recur_simpl + | _ -> error + end | Pstw(rd, cst, r1) | Pstw_a(rd, cst, r1) -> begin match ecode with | STW(rS, rA, d) :: es -> @@ -2455,6 +2030,16 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Pstwu(rd, cst, r1) -> + begin match ecode with + | STWU(rS, rA, d) :: es -> + OK(fw) + >>= match_iregs rd rS + >>= match_iregs r1 rA + >>= match_csts cst (exts d) + >>= recur_simpl + | _ -> error + end | Pstwx(rd, r1, r2) | Pstwx_a(rd, r1, r2) -> begin match ecode with | STWX(rS, rA, rB) :: es -> @@ -2465,6 +2050,26 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Pstwbrx(rd, r1, r2) -> + begin match ecode with + | STWBRX(rS, rA, rB) :: es -> + OK(fw) + >>= match_iregs rd rS + >>= match_iregs r1 rA + >>= match_iregs r2 rB + >>= recur_simpl + | _ -> error + end + | Pstwxu(rd, r1, r2) -> + begin match ecode with + | STWUX(rS, rA, rB) :: es -> + OK(fw) + >>= match_iregs rd rS + >>= match_iregs r1 rA + >>= match_iregs r2 rB + >>= recur_simpl + | _ -> error + end | Psubfc(rd, r1, r2) -> begin match ecode with | SUBFCx(rD, rA, rB, oe, rc) :: es -> @@ -2489,6 +2094,17 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Psubfze(rd, r1) -> + begin match ecode with + | SUBFZEx(rD, rA, oe, rc) :: es -> + OK(fw) + >>= match_iregs rd rD + >>= match_iregs r1 rA + >>= match_bools false oe + >>= match_bools false rc + >>= recur_simpl + | _ -> error + end | Psubfic(rd, r1, cst) -> begin match ecode with | SUBFIC(rD, rA, simm) :: es -> @@ -2499,6 +2115,27 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end + | Psync -> + begin match ecode with + | SYNC :: es -> + OK(fw) + >>= recur_simpl + | _ -> error + end + | Ptrap -> + begin match ecode with + | TW(tO, rA, rB) :: es -> + OK(fw) + >>= (fun ffw -> + bitmatch tO with + | { 31 : 5 : int } -> OK(ffw) + | { _ } -> ERR("bitmatch") + ) + >>= match_iregs GPR0 rA + >>= match_iregs GPR0 rB + >>= recur_simpl + | _ -> error + end | Pxor(rd, r1, r2) -> begin match ecode with | XORx(rS, rA, rB, rc) :: es -> @@ -2530,142 +2167,6 @@ let rec compare_code ccode ecode pc: checker = fun fw -> >>= recur_simpl | _ -> error end -and check_builtin_vload_common ccode ecode pc chunk addr offset res fw = - let error = ERR("Non-matching instructions") in - let recur_simpl = compare_code ccode (List.tl ecode) (Int32.add pc 4l) in - begin match chunk, res with - | Mint8unsigned, [IR res] -> - begin match ecode with - | LBZ(rD, rA, d) :: es -> - OK(fw) - >>= match_iregs res rD - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | Mint8signed, [IR res] -> - begin match ecode with - | LBZ (rD0, rA0, d0) :: - EXTSBx(rS1, rA1, rc1) :: es -> - OK(fw) - >>= match_iregs res rD0 - >>= match_iregs addr rA0 - >>= match_csts offset (exts d0) - >>= match_iregs res rS1 - >>= match_iregs res rA1 - >>= match_bools false rc1 - >>= compare_code ccode es (Int32.add 8l pc) - | _ -> error - end - | Mint16unsigned, [IR res] -> - begin match ecode with - | LHZ(rD, rA, d) :: es -> - OK(fw) - >>= match_iregs res rD - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | Mint16signed, [IR res] -> - begin match ecode with - | LHA(rD, rA, d) :: es -> - OK(fw) - >>= match_iregs res rD - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | Mint32, [IR res] -> - begin match ecode with - | LWZ(rD, rA, d) :: es -> - OK(fw) - >>= match_iregs res rD - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | Mfloat32, [FR res] -> - begin match ecode with - | LFS(frD, rA, d) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | Mfloat64, [FR res] -> - begin match ecode with - | LFD(frD, rA, d) :: es -> - OK(fw) - >>= match_fregs res frD - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | _ -> error - end -and check_builtin_vstore_common ccode ecode pc chunk addr offset src fw = - let recur_simpl = compare_code ccode (List.tl ecode) (Int32.add pc 4l) in - let error = ERR("Non-matching instructions") in - begin match chunk, src with - | (Mint8signed | Mint8unsigned), IR src -> - begin match ecode with - | STB(rS, rA, d) :: es -> - OK(fw) - >>= match_iregs src rS - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | (Mint16signed | Mint16unsigned), IR src -> - begin match ecode with - | STH(rS, rA, d) :: es -> - OK(fw) - >>= match_iregs src rS - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | Mint32, IR src -> - begin match ecode with - | STW(rS, rA, d) :: es -> - OK(fw) - >>= match_iregs src rS - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | Mfloat32, FR src -> - begin match ecode with - | STFS(frS, rA, d) :: es -> - OK(fw) - >>= match_fregs src frS - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | Mfloat64, FR src -> - begin match ecode with - | STFD(frS, rA, d) :: es -> - OK(fw) - >>= match_fregs src frS - >>= match_iregs addr rA - >>= match_csts offset (exts d) - >>= recur_simpl - | _ -> error - end - | _ -> error - end (** A work element is a triple giving a CompCert ident for the function to analyze, its name as a string, and the actual code. It is not obvious how @@ -2822,7 +2323,7 @@ let compare_data (l: init_data list) (bs: bitstring) (sfw: s_framework) | Init_float32(f) -> ( bitmatch bs with | { j : 32 : int; bs : -1 : bitstring } -> - if camlfloat_of_coqfloat f = Int32.float_of_bits j + if camlint_of_coqint (Floats.Float32.to_bits f) = j then compare_data_aux l bs (s + 4) sfw else ERR("Wrong float32") | { _ } -> error @@ -2830,7 +2331,7 @@ let compare_data (l: init_data list) (bs: bitstring) (sfw: s_framework) | Init_float64(f) -> ( bitmatch bs with | { j : 64 : int; bs : -1 : bitstring } -> - if camlfloat_of_coqfloat f = Int64.float_of_bits j + if camlint64_of_coqint (Floats.Float.to_bits f) = j then compare_data_aux l bs (s + 8) sfw else ERR("Wrong float64") | { _ } -> error @@ -3276,6 +2777,7 @@ let warn_sections_remapping efw = print_debug "Checking remapped sections"; StringMap.fold (fun c_name (e_name, conflicts) efw -> + if c_name = "COMM" then efw else if StringSet.is_empty conflicts then match e_name with diff --git a/checklink/Frameworks.ml b/checklink/Frameworks.ml index 0f7ec44..30c0b38 100644 --- a/checklink/Frameworks.ml +++ b/checklink/Frameworks.ml @@ -24,8 +24,8 @@ type byte_chunk_desc = | Zero_symbol | Stub of string | Jumptable - | Float_literal of float - | Float32_literal of float + | Float_literal of int64 + | Float32_literal of int32 | Padding | Unknown of string @@ -208,7 +208,7 @@ let string_of_byte_chunk_desc = function | Zero_symbol -> "Symbol 0" | Stub(s) -> "Stub for: " ^ s | Jumptable -> "Jump table" -| Float_literal(f) -> "Float literal: " ^ string_of_float f -| Float32_literal(f) -> "Float32 literal: " ^ string_of_float f +| Float_literal(f) -> "Float literal: " ^ string_of_int64 f +| Float32_literal(f) -> "Float32 literal: " ^ string_of_int32 f | Padding -> "Padding" | Unknown(s) -> "???" ^ (if !verbose_elfmap then s else "") diff --git a/checklink/Fuzz.ml b/checklink/Fuzz.ml index d7947ee..dc98493 100644 --- a/checklink/Fuzz.ml +++ b/checklink/Fuzz.ml @@ -75,7 +75,6 @@ let fuzz_check elfmap bs byte old sdumps = let ok_fuzz elfmap str byte fuzz = let (a, b, _, r) = full_range_of_byte elfmap byte in let a = Safe32.to_int a in - let b = Safe32.to_int b in let old = Char.code str.[byte] in let fuz = Char.code fuzz in match r with @@ -98,13 +97,7 @@ let ok_fuzz elfmap str byte fuzz = && ((old land 0xf = 0) || (fuz land 0xf = 0)) ) | Symtab_function(_) -> true - | Data_symbol(_) -> - (* False positive: 0. becomes -0. *) - not ( - (byte + 7 <= b) - && (fuz = 0x80) (* sign bit *) - && String.sub str byte 8 = "\000\000\000\000\000\000\000\000" - ) + | Data_symbol(_) -> true | Function_symbol(_) -> let opcode = Char.code str.[byte - 3] in (* False positive: rlwinm with bitmask 0 31 = bitmask n (n - 1) *) @@ -113,14 +106,8 @@ let ok_fuzz elfmap str byte fuzz = | Zero_symbol -> false | Stub(_) -> true | Jumptable -> true - | Float_literal(_) -> (* FIXME: this shouldn't be a false positive! *) - (* False positive: 0. becomes -0. *) - not ( - (byte = a) - && (fuz = 0x80) (* sign bit *) - && String.sub str byte 8 = "\000\000\000\000\000\000\000\000" - ) - | Float32_literal(_) -> true + | Float_literal(_) -> true + | Float32_literal(_) -> true (* padding is allowed to be non-null, but won't be recognized as padding, but as unknown, which is not an ERROR *) | Padding -> false diff --git a/driver/Driver.ml b/driver/Driver.ml index 556a476..09451eb 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -152,7 +152,8 @@ let compile_c_ast sourcename csyntax ofile = (* Convert to Asm *) let asm = match Compiler.transf_c_program csyntax with - | Errors.OK x -> x + | Errors.OK asm -> + Asmexpand.expand_program (Unusedglob.transf_program asm) | Errors.Error msg -> print_error stderr msg; exit 2 in @@ -161,7 +162,7 @@ let compile_c_ast sourcename csyntax ofile = dump_asm asm (output_filename sourcename ".c" ".sdump"); (* Print Asm in text form *) let oc = open_out ofile in - PrintAsm.print_program oc (Unusedglob.transf_program asm); + PrintAsm.print_program oc asm; close_out oc (* From C source to asm *) diff --git a/extraction/extraction.v b/extraction/extraction.v index f0033af..d987629 100644 --- a/extraction/extraction.v +++ b/extraction/extraction.v @@ -157,4 +157,5 @@ Separate Extraction RTL.instr_defs RTL.instr_uses Machregs.mregs_for_operation Machregs.mregs_for_builtin Machregs.two_address_op Machregs.is_stack_reg + AST.signature_main Parser.translation_unit_file. diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml new file mode 100644 index 0000000..9a458c3 --- /dev/null +++ b/ia32/Asmexpand.ml @@ -0,0 +1,18 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(* Expanding built-ins and some pseudo-instructions by rewriting + of the IA32 assembly code. Currently not done, this expansion + is performed on the fly in PrintAsm. *) + +let expand_program p = p + diff --git a/powerpc/Asm.v b/powerpc/Asm.v index a7e5eaf..ab52ca5 100644 --- a/powerpc/Asm.v +++ b/powerpc/Asm.v @@ -69,7 +69,8 @@ Inductive preg: Type := | CR0_0: preg (**r bit 0 of the condition register *) | CR0_1: preg (**r bit 1 of the condition register *) | CR0_2: preg (**r bit 2 of the condition register *) - | CR0_3: preg. (**r bit 3 of the condition register *) + | CR0_3: preg (**r bit 3 of the condition register *) + | CR1_2: preg. (**r bit 6 of the condition register *) Coercion IR: ireg >-> preg. Coercion FR: freg >-> preg. @@ -114,14 +115,15 @@ Inductive constant: Type := range. Of course, our PPC generator (file [Asmgen]) is careful to respect this range. *) -(** Bits in the condition register. We are only interested in the - first 4 bits. *) +(** Bits in the condition register. We are only interested in bits + 0, 1, 2, 3 and 6. *) Inductive crbit: Type := | CRbit_0: crbit | CRbit_1: crbit | CRbit_2: crbit - | CRbit_3: crbit. + | CRbit_3: crbit + | CRbit_6: crbit. (** The instruction set. Most instructions correspond exactly to actual instructions of the PowerPC processor. See the PowerPC @@ -134,12 +136,13 @@ Definition label := positive. Inductive instruction : Type := | Padd: ireg -> ireg -> ireg -> instruction (**r integer addition *) - | Padde: ireg -> ireg -> ireg -> instruction (**r integer addition with carry *) + | Paddc: ireg -> ireg -> ireg -> instruction (**r integer addition and set carry *) + | Padde: ireg -> ireg -> ireg -> instruction (**r integer addition with carry *) | Paddi: ireg -> ireg -> constant -> instruction (**r add immediate *) - | Paddic: ireg -> ireg -> constant -> instruction (**r add immediate and set carry *) + | Paddic: ireg -> ireg -> constant -> instruction (**r add immediate and set carry *) | Paddis: ireg -> ireg -> constant -> instruction (**r add immediate high *) | Paddze: ireg -> ireg -> instruction (**r add carry *) - | Pallocframe: Z -> int -> instruction (**r allocate new stack frame *) + | Pallocframe: Z -> int -> instruction (**r allocate new stack frame (pseudo) *) | Pand_: ireg -> ireg -> ireg -> instruction (**r bitwise and *) | Pandc: ireg -> ireg -> ireg -> instruction (**r bitwise and-complement *) | Pandi_: ireg -> ireg -> constant -> instruction (**r and immediate and set conditions *) @@ -147,32 +150,39 @@ Inductive instruction : Type := | Pb: label -> instruction (**r unconditional branch *) | Pbctr: signature -> instruction (**r branch to contents of register CTR *) | Pbctrl: signature -> instruction (**r branch to contents of CTR and link *) + | Pbdnz: label -> instruction (**r decrement CTR and branch if not zero *) | Pbf: crbit -> label -> instruction (**r branch if false *) | Pbl: ident -> signature -> instruction (**r branch and link *) | Pbs: ident -> signature -> instruction (**r branch to symbol *) | Pblr: instruction (**r branch to contents of register LR *) | Pbt: crbit -> label -> instruction (**r branch if true *) - | Pbtbl: ireg -> list label -> instruction (**r N-way branch through a jump table *) + | Pbtbl: ireg -> list label -> instruction (**r N-way branch through a jump table (pseudo) *) | Pcmplw: ireg -> ireg -> instruction (**r unsigned integer comparison *) | Pcmplwi: ireg -> constant -> instruction (**r same, with immediate argument *) | Pcmpw: ireg -> ireg -> instruction (**r signed integer comparison *) | Pcmpwi: ireg -> constant -> instruction (**r same, with immediate argument *) + | Pcntlz: ireg -> ireg -> instruction (**r count leading zeros *) + | Pcreqv: crbit -> crbit -> crbit -> instruction (**r not-xor between condition bits *) | Pcror: crbit -> crbit -> crbit -> instruction (**r or between condition bits *) + | Pcrxor: crbit -> crbit -> crbit -> instruction (**r xor between condition bits *) | Pdivw: ireg -> ireg -> ireg -> instruction (**r signed division *) | Pdivwu: ireg -> ireg -> ireg -> instruction (**r unsigned division *) + | Peieio: instruction (**r EIEIO barrier *) | Peqv: ireg -> ireg -> ireg -> instruction (**r bitwise not-xor *) | Pextsb: ireg -> ireg -> instruction (**r 8-bit sign extension *) | Pextsh: ireg -> ireg -> instruction (**r 16-bit sign extension *) - | Pfreeframe: Z -> int -> instruction (**r deallocate stack frame and restore previous frame *) + | Pfreeframe: Z -> int -> instruction (**r deallocate stack frame and restore previous frame (pseudo) *) | Pfabs: freg -> freg -> instruction (**r float absolute value *) | Pfabss: freg -> freg -> instruction (**r float absolute value *) | Pfadd: freg -> freg -> freg -> instruction (**r float addition *) | Pfadds: freg -> freg -> freg -> instruction (**r float addition *) | Pfcmpu: freg -> freg -> instruction (**r float comparison *) - | Pfcti: ireg -> freg -> instruction (**r float-to-signed-int conversion *) + | Pfcti: ireg -> freg -> instruction (**r float-to-signed-int conversion, round towards 0 (pseudo) *) + | Pfctiw: freg -> freg -> instruction (**r float-to-signed-int conversion, round by default *) + | Pfctiwz: freg -> freg -> instruction (**r float-to-signed-int conversion, round towards 0 *) | Pfdiv: freg -> freg -> freg -> instruction (**r float division *) | Pfdivs: freg -> freg -> freg -> instruction (**r float division *) - | Pfmake: freg -> ireg -> ireg -> instruction (**r build a float from 2 ints *) + | Pfmake: freg -> ireg -> ireg -> instruction (**r build a float from 2 ints (pseudo) *) | Pfmr: freg -> freg -> instruction (**r float move *) | Pfmul: freg -> freg -> freg -> instruction (**r float multiply *) | Pfmuls: freg -> freg -> freg -> instruction (**r float multiply *) @@ -182,6 +192,15 @@ Inductive instruction : Type := | Pfxdp: freg -> freg -> instruction (**r float extend to double precision (pseudo) *) | Pfsub: freg -> freg -> freg -> instruction (**r float subtraction *) | Pfsubs: freg -> freg -> freg -> instruction (**r float subtraction *) + | Pfmadd: freg -> freg -> freg -> freg -> instruction (**r fused multiply-add *) + | Pfmsub: freg -> freg -> freg -> freg -> instruction (**r fused multiply-sub *) + | Pfnmadd: freg -> freg -> freg -> freg -> instruction (**r fused neg-multiply-add *) + | Pfnmsub: freg -> freg -> freg -> freg -> instruction (**r fused neg-multiply-sub *) + | Pfsqrt: freg -> freg -> instruction (**r square root *) + | Pfrsqrte: freg -> freg -> instruction (**r approximate reciprocal of square root *) + | Pfres: freg -> freg -> instruction (**r approximate inverse *) + | Pfsel: freg -> freg -> freg -> freg -> instruction (**r FP conditional move *) + | Pisync: instruction (**r ISYNC barrier *) | Plbz: ireg -> constant -> ireg -> instruction (**r load 8-bit unsigned int *) | Plbzx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) | Plfd: freg -> constant -> ireg -> instruction (**r load 64-bit float *) @@ -192,15 +211,19 @@ Inductive instruction : Type := | Plfsx: freg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) | Plha: ireg -> constant -> ireg -> instruction (**r load 16-bit signed int *) | Plhax: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Plhbrx: ireg -> ireg -> ireg -> instruction (**r load 16-bit int and reverse endianness *) | Plhz: ireg -> constant -> ireg -> instruction (**r load 16-bit unsigned int *) | Plhzx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) | Plfi: freg -> float -> instruction (**r load float constant *) | Plfis: freg -> float32 -> instruction (**r load float constant *) | Plwz: ireg -> constant -> ireg -> instruction (**r load 32-bit int *) + | Plwzu: ireg -> constant -> ireg -> instruction (**r load 32-bit int with update *) | Plwzx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) | Plwz_a: ireg -> constant -> ireg -> instruction (**r load 32-bit quantity to int reg *) | Plwzx_a: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) - | Pmfcrbit: ireg -> crbit -> instruction (**r move condition bit to reg *) + | Plwbrx: ireg -> ireg -> ireg -> instruction (**r load 32-bit int and reverse endianness *) + | Pmfcr: ireg -> instruction (**r move condition register to reg *) + | Pmfcrbit: ireg -> crbit -> instruction (**r move condition bit to reg (pseudo) *) | Pmflr: ireg -> instruction (**r move LR to reg *) | Pmr: ireg -> ireg -> instruction (**r integer move *) | Pmtctr: ireg -> instruction (**r move ireg to CTR *) @@ -224,6 +247,7 @@ Inductive instruction : Type := | Pstb: ireg -> constant -> ireg -> instruction (**r store 8-bit int *) | Pstbx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) | Pstfd: freg -> constant -> ireg -> instruction (**r store 64-bit float *) + | Pstfdu: freg -> constant -> ireg -> instruction (**r store 64-bit float with update *) | Pstfdx: freg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) | Pstfd_a: freg -> constant -> ireg -> instruction (**r store 64-bit quantity from float reg *) | Pstfdx_a: freg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) @@ -231,19 +255,28 @@ Inductive instruction : Type := | Pstfsx: freg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) | Psth: ireg -> constant -> ireg -> instruction (**r store 16-bit int *) | Psthx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Psthbrx: ireg -> ireg -> ireg -> instruction (**r store 16-bit int with reverse endianness *) | Pstw: ireg -> constant -> ireg -> instruction (**r store 32-bit int *) + | Pstwu: ireg -> constant -> ireg -> instruction (**r store 32-bit int with update *) | Pstwx: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Pstwxu: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs and update *) | Pstw_a: ireg -> constant -> ireg -> instruction (**r store 32-bit quantity from int reg *) | Pstwx_a: ireg -> ireg -> ireg -> instruction (**r same, with 2 index regs *) + | Pstwbrx: ireg -> ireg -> ireg -> instruction (**r store 32-bit int with reverse endianness *) | Psubfc: ireg -> ireg -> ireg -> instruction (**r reversed integer subtraction *) | Psubfe: ireg -> ireg -> ireg -> instruction (**r reversed integer subtraction with carry *) + | Psubfze: ireg -> ireg -> instruction (**r integer opposite with carry *) | Psubfic: ireg -> ireg -> constant -> instruction (**r integer subtraction from immediate *) + | Psync: instruction (**r SYNC barrier *) + | Ptrap: instruction (**r unconditional trap *) | Pxor: ireg -> ireg -> ireg -> instruction (**r bitwise xor *) | Pxori: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate *) | Pxoris: ireg -> ireg -> constant -> instruction (**r bitwise xor with immediate high *) | Plabel: label -> instruction (**r define a code label *) - | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function *) - | Pannot: external_function -> list annot_param -> instruction (**r annotation statement *) + | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function (pseudo) *) + | Pannot: external_function -> list annot_param -> instruction (**r annotation statement (pseudo) *) + | Pcfi_adjust: int -> instruction (**r .cfi_adjust debug directive *) + | Pcfi_rel_offset: int -> instruction (**r .cfi_rel_offset lr debug directive *) with annot_param : Type := | APreg: preg -> annot_param @@ -528,6 +561,7 @@ Definition reg_of_crbit (bit: crbit) := | CRbit_1 => CR0_1 | CRbit_2 => CR0_2 | CRbit_3 => CR0_3 + | CRbit_6 => CR1_2 end. Definition compare_sint (rs: regset) (v1 v2: val) := @@ -564,6 +598,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out match i with | Padd rd r1 r2 => Next (nextinstr (rs#rd <- (Val.add rs#r1 rs#r2))) m + | Paddc rd r1 r2 => + Next (nextinstr (rs#rd <- (Val.add rs#r1 rs#r2) + #CARRY <- (Val.add_carry rs#r1 rs#r2 Vzero))) m | Padde rd r1 r2 => Next (nextinstr (rs #rd <- (Val.add (Val.add rs#r1 rs#r2) rs#CARRY) #CARRY <- (Val.add_carry rs#r1 rs#r2 rs#CARRY))) m @@ -819,10 +856,44 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out Next (nextinstr (rs#rd <- (Val.xor rs#r1 (const_high cst)))) m | Plabel lbl => Next (nextinstr rs) m + | Pcfi_rel_offset ofs => + Next (nextinstr rs) m | Pbuiltin ef args res => Stuck (**r treated specially below *) | Pannot ef args => Stuck (**r treated specially below *) + (** The following instructions and directives are not generated directly by Asmgen, + so we do not model them. *) + | Pbdnz _ + | Pcntlz _ _ + | Pcreqv _ _ _ + | Pcrxor _ _ _ + | Peieio + | Pfctiw _ _ + | Pfctiwz _ _ + | Pfmadd _ _ _ _ + | Pfmsub _ _ _ _ + | Pfnmadd _ _ _ _ + | Pfnmsub _ _ _ _ + | Pfsqrt _ _ + | Pfrsqrte _ _ + | Pfres _ _ + | Pfsel _ _ _ _ + | Plwbrx _ _ _ + | Pisync + | Plhbrx _ _ _ + | Plwzu _ _ _ + | Pmfcr _ + | Pstwbrx _ _ _ + | Pstfdu _ _ _ + | Psthbrx _ _ _ + | Pstwu _ _ _ + | Pstwxu _ _ _ + | Psubfze _ _ + | Psync + | Ptrap + | Pcfi_adjust _ => + Stuck end. (** Translation of the LTL/Linear/Mach view of machine registers diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml new file mode 100644 index 0000000..243a4d9 --- /dev/null +++ b/powerpc/Asmexpand.ml @@ -0,0 +1,525 @@ +(* *********************************************************************) +(* *) +(* The Compcert verified compiler *) +(* *) +(* Xavier Leroy, INRIA Paris-Rocquencourt *) +(* *) +(* Copyright Institut National de Recherche en Informatique et en *) +(* Automatique. All rights reserved. This file is distributed *) +(* under the terms of the INRIA Non-Commercial License Agreement. *) +(* *) +(* *********************************************************************) + +(* Expanding built-ins and some pseudo-instructions by rewriting + of the PPC assembly code. *) + +open Datatypes +open Camlcoq +open Integers +open AST +open Memdata +open Asm + +(* Buffering the expanded code *) + +let current_code = ref ([]: instruction list) + +let emit i = current_code := i :: !current_code + +let emit_loadimm r n = + List.iter emit (Asmgen.loadimm r n []) + +let emit_addimm rd rs n = + List.iter emit (Asmgen.addimm rd rs n []) + +let get_code () = + let c = List.rev !current_code in current_code := []; c + +(* Generation of fresh labels *) + +let dummy_function = { fn_code = []; fn_sig = signature_main } +let current_function = ref dummy_function +let next_label = ref (None : label option) + +let new_label () = + let lbl = + match !next_label with + | Some l -> l + | None -> + (* on-demand computation of the next available label *) + List.fold_left + (fun next instr -> + match instr with + | Plabel l -> if P.lt l next then next else P.succ l + | _ -> next) + P.one (!current_function).fn_code + in + next_label := Some (P.succ lbl); + lbl + +let set_current_function f = + current_function := f; next_label := None + +(* Useful constants *) + +let _0 = Integers.Int.zero +let _1 = Integers.Int.one +let _2 = coqint_of_camlint 2l +let _4 = coqint_of_camlint 4l +let _6 = coqint_of_camlint 6l +let _8 = coqint_of_camlint 8l +let _m4 = coqint_of_camlint (-4l) +let _m8 = coqint_of_camlint (-8l) + +(* Handling of annotations *) + +let expand_annot_val txt targ args res = + emit (Pannot(EF_annot(txt, [AA_arg targ]), List.map (fun r -> APreg r) args)); + begin match args, res with + | [IR src], [IR dst] -> + if dst <> src then emit (Pmr(dst, src)) + | [FR src], [FR dst] -> + if dst <> src then emit (Pfmr(dst, src)) + | _, _ -> + assert false + end + +(* Handling of memcpy *) + +(* On the PowerPC, unaligned accesses to 16- and 32-bit integers are + fast, but unaligned accesses to 64-bit floats can be slow + (not so much on G5, but clearly so on Power7). + So, use 64-bit accesses only if alignment >= 4. + Note that lfd and stfd cannot trap on ill-formed floats. *) + +let expand_builtin_memcpy_small sz al src dst = + let rec copy ofs sz = + if sz >= 8 && al >= 4 && !Clflags.option_ffpu then begin + emit (Plfd(FPR13, Cint ofs, src)); + emit (Pstfd(FPR13, Cint ofs, dst)); + copy (Int.add ofs _8) (sz - 8) + end else if sz >= 4 then begin + emit (Plwz(GPR0, Cint ofs, src)); + emit (Pstw(GPR0, Cint ofs, dst)); + copy (Int.add ofs _4) (sz - 4) + end else if sz >= 2 then begin + emit (Plhz(GPR0, Cint ofs, src)); + emit (Psth(GPR0, Cint ofs, dst)); + copy (Int.add ofs _2) (sz - 2) + end else if sz >= 1 then begin + emit (Plbz(GPR0, Cint ofs, src)); + emit (Pstb(GPR0, Cint ofs, dst)); + copy (Int.add ofs _1) (sz - 1) + end in + copy _0 sz + +let expand_builtin_memcpy_big sz al src dst = + assert (sz >= 4); + emit_loadimm GPR0 (Z.of_uint (sz / 4)); + emit (Pmtctr GPR0); + let (s,d) = if dst <> GPR11 then (GPR11, GPR12) else (GPR12, GPR11) in + emit (Paddi(s, src, Cint _m4)); + emit (Paddi(d, dst, Cint _m4)); + let lbl = new_label() in + emit (Plabel lbl); + emit (Plwzu(GPR0, Cint _4, s)); + emit (Pstwu(GPR0, Cint _4, d)); + emit (Pbdnz lbl); + (* s and d lag behind by 4 bytes *) + match sz land 3 with + | 1 -> emit (Plbz(GPR0, Cint _4, s)); + emit (Pstb(GPR0, Cint _4, d)) + | 2 -> emit (Plhz(GPR0, Cint _4, s)); + emit (Psth(GPR0, Cint _4, d)) + | 3 -> emit (Plhz(GPR0, Cint _4, s)); + emit (Psth(GPR0, Cint _4, d)); + emit (Plbz(GPR0, Cint _6, s)); + emit (Pstb(GPR0, Cint _6, d)) + | _ -> () + +let expand_builtin_memcpy sz al args = + let (dst, src) = + match args with [IR d; IR s] -> (d, s) | _ -> assert false in + if sz <= (if !Clflags.option_ffpu && al >= 4 + then if !Clflags.option_Osize then 35 else 51 + else if !Clflags.option_Osize then 19 else 27) + then expand_builtin_memcpy_small sz al src dst + else expand_builtin_memcpy_big sz al src dst + +(* Handling of volatile reads and writes *) + +let expand_builtin_vload_common chunk base offset res = + match chunk, res with + | Mint8unsigned, IR res -> + emit (Plbz(res, offset, base)) + | Mint8signed, IR res -> + emit (Plbz(res, offset, base)); + emit (Pextsb(res, res)) + | Mint16unsigned, IR res -> + emit (Plhz(res, offset, base)) + | Mint16signed, IR res -> + emit (Plha(res, offset, base)) + | (Mint32 | Many32), IR res -> + emit (Plwz(res, offset, base)) + | Mfloat32, FR res -> + emit (Plfs(res, offset, base)) + | (Mfloat64 | Many64), FR res -> + emit (Plfd(res, offset, base)) + (* Mint64 is special-cased below *) + | _ -> + assert false + +let expand_builtin_vload chunk args res = + begin match args, res with + | [IR addr], [res] when chunk <> Mint64 -> + expand_builtin_vload_common chunk addr (Cint _0) res + | [IR addr], [IR res1; IR res2] when chunk = Mint64 -> + if addr <> res1 then begin + emit (Plwz(res1, Cint _0, addr)); + emit (Plwz(res2, Cint _4, addr)) + end else begin + emit (Plwz(res2, Cint _4, addr)); + emit (Plwz(res1, Cint _0, addr)) + end + | _ -> + assert false + end + +let expand_builtin_vload_global chunk id ofs args res = + begin match res with + | [res] when chunk <> Mint64 -> + emit (Paddis(GPR11, GPR0, Csymbol_high(id, ofs))); + expand_builtin_vload_common chunk GPR11 (Csymbol_low(id, ofs)) res + | [IR res1; IR res2] when chunk = Mint64 -> + emit (Paddis(res1, GPR0, Csymbol_high(id, ofs))); + emit (Plwz(res1, Csymbol_low(id, ofs), res1)); + let ofs = Int.add ofs _4 in + emit (Paddis(res2, GPR0, Csymbol_high(id, ofs))); + emit (Plwz(res2, Csymbol_low(id, ofs), res2)) + | _ -> + assert false + end + +let expand_builtin_vload_sda chunk id ofs args res = + begin match res with + | [res] when chunk <> Mint64 -> + expand_builtin_vload_common chunk GPR0 (Csymbol_sda(id, ofs)) res + | [IR res1; IR res2] when chunk = Mint64 -> + emit (Plwz(res1, Csymbol_sda(id, ofs), GPR0)); + let ofs = Int.add ofs _4 in + emit (Plwz(res2, Csymbol_sda(id, ofs), GPR0)) + | _ -> + assert false + end + +let expand_builtin_vstore_common chunk base offset src = + match chunk, src with + | (Mint8signed | Mint8unsigned), IR src -> + emit (Pstb(src, offset, base)) + | (Mint16signed | Mint16unsigned), IR src -> + emit (Psth(src, offset, base)) + | (Mint32 | Many32), IR src -> + emit (Pstw(src, offset, base)) + | Mfloat32, FR src -> + emit (Pstfs(src, offset, base)) + | (Mfloat64 | Many64), FR src -> + emit (Pstfd(src, offset, base)) + (* Mint64 is special-cased below *) + | _ -> + assert false + +let expand_builtin_vstore chunk args = + begin match args with + | [IR addr; src] when chunk <> Mint64 -> + expand_builtin_vstore_common chunk addr (Cint _0) src + | [IR addr; IR src1; IR src2] when chunk = Mint64 -> + emit (Pstw(src1, Cint _0, addr)); + emit (Pstw(src2, Cint _4, addr)) + | _ -> + assert false + end + +let expand_builtin_vstore_global chunk id ofs args = + begin match args with + | [src] when chunk <> Mint64 -> + let tmp = if src = IR GPR11 then GPR12 else GPR11 in + emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs))); + expand_builtin_vstore_common chunk tmp (Csymbol_low(id, ofs)) src + | [IR src1; IR src2] when chunk = Mint64 -> + let tmp = + if not (List.mem GPR12 [src1; src2]) then GPR12 else + if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in + emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs))); + emit (Pstw(src1, Csymbol_low(id, ofs), tmp)); + let ofs = Int.add ofs _4 in + emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs))); + emit (Pstw(src2, Csymbol_low(id, ofs), tmp)) + | _ -> + assert false + end + +let expand_builtin_vstore_sda chunk id ofs args = + begin match args with + | [src] when chunk <> Mint64 -> + expand_builtin_vstore_common chunk GPR0 (Csymbol_sda(id, ofs)) src + | [IR src1; IR src2] when chunk = Mint64 -> + emit (Pstw(src1, Csymbol_sda(id, ofs), GPR0)); + emit (Pstw(src2, Csymbol_sda(id, ofs), GPR0)) + | _ -> + assert false + end + +(* Handling of varargs *) + +let current_function_stacksize = ref 0l + +let align n a = (n + a - 1) land (-a) + +let rec next_arg_locations ir fr ofs = function + | [] -> + (ir, fr, ofs) + | (Tint | Tany32) :: l -> + if ir < 8 + then next_arg_locations (ir + 1) fr ofs l + else next_arg_locations ir fr (ofs + 4) l + | (Tfloat | Tsingle | Tany64) :: l -> + if fr < 8 + then next_arg_locations ir (fr + 1) ofs l + else next_arg_locations ir fr (align ofs 8 + 8) l + | Tlong :: l -> + if ir < 7 + then next_arg_locations (align ir 2 + 2) fr ofs l + else next_arg_locations ir fr (align ofs 8 + 8) l + +let expand_builtin_va_start r = + if not (!current_function).fn_sig.sig_cc.cc_vararg then + invalid_arg "Fatal error: va_start used in non-vararg function"; + let (ir, fr, ofs) = + next_arg_locations 0 0 0 (!current_function).fn_sig.sig_args in + emit_loadimm GPR0 (Z.of_uint ir); + emit (Pstb(GPR0, Cint _0, r)); + emit_loadimm GPR0 (Z.of_uint fr); + emit (Pstb(GPR0, Cint _1, r)); + emit_addimm GPR0 GPR1 (coqint_of_camlint + Int32.(add (add !current_function_stacksize 8l) + (of_int ofs))); + emit (Pstw(GPR0, Cint _4, r)); + emit_addimm GPR0 GPR1 (coqint_of_camlint + Int32.(sub !current_function_stacksize 96l)); + emit (Pstw(GPR0, Cint _8, r)) + +(* Auxiliary for 64-bit integer arithmetic built-ins. They expand to + two instructions, one computing the low 32 bits of the result, + followed by another computing the high 32 bits. In cases where + the first instruction would overwrite arguments to the second + instruction, we must go through GPR0 to hold the low 32 bits of the result. +*) + +let expand_int64_arith conflict rl fn = + if conflict then (fn GPR0; emit (Pmr(rl, GPR0))) else fn rl + +(* Handling of compiler-inlined builtins *) + +let expand_builtin_inline name args res = + (* Can use as temporaries: GPR0, FPR13 *) + match name, args, res with + (* Integer arithmetic *) + | "__builtin_mulhw", [IR a1; IR a2], [IR res] -> + emit (Pmulhw(res, a1, a2)) + | "__builtin_mulhwu", [IR a1; IR a2], [IR res] -> + emit (Pmulhwu(res, a1, a2)) + | "__builtin_cntlz", [IR a1], [IR res] -> + emit (Pcntlz(res, a1)) + | ("__builtin_bswap" | "__builtin_bswap32"), [IR a1], [IR res] -> + emit (Pstwu(a1, Cint _m8, GPR1)); + emit (Pcfi_adjust _8); + emit (Plwbrx(res, GPR0, GPR1)); + emit (Paddi(GPR1, GPR1, Cint _8)); + emit (Pcfi_adjust _m8) + | "__builtin_bswap16", [IR a1], [IR res] -> + emit (Prlwinm(GPR0, a1, _8, coqint_of_camlint 0x0000FF00l)); + emit (Prlwinm(res, a1, coqint_of_camlint 24l, + coqint_of_camlint 0x000000FFl)); + emit (Por(res, GPR0, res)) + (* Float arithmetic *) + | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] -> + emit (Pfmadd(res, a1, a2, a3)) + | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] -> + emit (Pfmsub(res, a1, a2, a3)) + | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] -> + emit (Pfnmadd(res, a1, a2, a3)) + | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] -> + emit (Pfnmsub(res, a1, a2, a3)) + | "__builtin_fabs", [FR a1], [FR res] -> + emit (Pfabs(res, a1)) + | "__builtin_fsqrt", [FR a1], [FR res] -> + emit (Pfsqrt(res, a1)) + | "__builtin_frsqrte", [FR a1], [FR res] -> + emit (Pfrsqrte(res, a1)) + | "__builtin_fres", [FR a1], [FR res] -> + emit (Pfres(res, a1)) + | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] -> + emit (Pfsel(res, a1, a2, a3)) + | "__builtin_fcti", [FR a1], [IR res] -> + emit (Pfctiw(FPR13, a1)); + emit (Pstfdu(FPR13, Cint _m8, GPR1)); + emit (Pcfi_adjust _8); + emit (Plwz(res, Cint _4, GPR1)); + emit (Paddi(GPR1, GPR1, Cint _8)); + emit (Pcfi_adjust _m8) + (* 64-bit integer arithmetic *) + | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> + expand_int64_arith (rl = ah) rl (fun rl' -> + emit (Psubfic(rl', al, Cint _0)); + emit (Psubfze(rh, ah))) + | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + expand_int64_arith (rl = ah || rl = bh) rl (fun rl' -> + emit (Paddc(rl', al, bl)); + emit (Padde(rh, ah, bh))) + | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> + expand_int64_arith (rl = ah || rl = bh) rl (fun rl' -> + emit (Psubfc(rl', bl, al)); + emit (Psubfe(rh, bh, ah))) + | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> + expand_int64_arith (rl = a || rl = b) rl (fun rl' -> + emit (Pmullw(rl, a, b)); + emit (Pmulhwu(rh, a, b))) + (* Memory accesses *) + | "__builtin_read16_reversed", [IR a1], [IR res] -> + emit (Plhbrx(res, GPR0, a1)) + | "__builtin_read32_reversed", [IR a1], [IR res] -> + emit (Plwbrx(res, GPR0, a1)) + | "__builtin_write16_reversed", [IR a1; IR a2], _ -> + emit (Psthbrx(a2, GPR0, a1)) + | "__builtin_write32_reversed", [IR a1; IR a2], _ -> + emit (Pstwbrx(a2, GPR0, a1)) + (* Synchronization *) + | "__builtin_eieio", [], _ -> + emit (Peieio) + | "__builtin_sync", [], _ -> + emit (Psync) + | "__builtin_isync", [], _ -> + emit (Pisync) + | "__builtin_trap", [], _ -> + emit (Ptrap) + (* Vararg stuff *) + | "__builtin_va_start", [IR a], _ -> + expand_builtin_va_start a + (* Catch-all *) + | _ -> + invalid_arg ("unrecognized builtin " ^ name) + +(* Calls to variadic functions: condition bit 6 must be set + if at least one argument is a float; clear otherwise. + Note that variadic functions cannot have arguments of type Tsingle. *) + +let set_cr6 sg = + if sg.sig_cc.cc_vararg then begin + if List.mem Tfloat sg.sig_args + then emit (Pcreqv(CRbit_6, CRbit_6, CRbit_6)) + else emit (Pcrxor(CRbit_6, CRbit_6, CRbit_6)) + end + +(* Expand instructions *) + +let num_crbit = function + | CRbit_0 -> 0 + | CRbit_1 -> 1 + | CRbit_2 -> 2 + | CRbit_3 -> 3 + | CRbit_6 -> 6 + +let expand_instruction instr = + match instr with + | Pallocframe(sz, ofs) -> + let variadic = (!current_function).fn_sig.sig_cc.cc_vararg in + let sz = camlint_of_coqint sz in + assert (ofs = Int.zero); + let sz = if variadic then Int32.add sz 96l else sz in + let adj = Int32.neg sz in + if adj >= -0x8000l then + emit (Pstwu(GPR1, Cint(coqint_of_camlint adj), GPR1)) + else begin + emit_loadimm GPR0 (coqint_of_camlint adj); + emit (Pstwxu(GPR1, GPR1, GPR0)) + end; + emit (Pcfi_adjust (coqint_of_camlint sz)); + if variadic then begin + emit (Pmflr GPR0); + emit (Pbl(intern_string "__compcert_va_saveregs", + {sig_args = []; sig_res = None; sig_cc = cc_default})); + emit (Pmtlr GPR0) + end; + current_function_stacksize := sz + | Pbctr sg | Pbctrl sg | Pbl(_, sg) | Pbs(_, sg) -> + set_cr6 sg; + emit instr + | Pfreeframe(sz, ofs) -> + let variadic = (!current_function).fn_sig.sig_cc.cc_vararg in + let sz = camlint_of_coqint sz in + let sz = if variadic then Int32.add sz 96l else sz in + if sz < 0x8000l then + emit (Paddi(GPR1, GPR1, Cint(coqint_of_camlint sz))) + else + emit (Plwz(GPR1, Cint ofs, GPR1)) + | Pfcti(r1, r2) -> + emit (Pfctiwz(FPR13, r2)); + emit (Pstfdu(FPR13, Cint _m8, GPR1)); + emit (Pcfi_adjust _8); + emit (Plwz(r1, Cint _4, GPR1)); + emit (Paddi(GPR1, GPR1, Cint _8)); + emit (Pcfi_adjust _m8) + | Pfmake(rd, r1, r2) -> + emit (Pstwu(r1, Cint _m8, GPR1)); + emit (Pcfi_adjust _8); + emit (Pstw(r2, Cint _4, GPR1)); + emit (Plfd(rd, Cint _0, GPR1)); + emit (Paddi(GPR1, GPR1, Cint _8)); + emit (Pcfi_adjust _m8); + | Pfxdp(r1, r2) -> + if r1 <> r2 then emit(Pfmr(r1, r2)) + | Pmfcrbit(r1, bit) -> + emit (Pmfcr r1); + emit (Prlwinm(r1, r1, Z.of_uint (1 + num_crbit bit), _1)) + | Pbuiltin(ef, args, res) -> + begin match ef with + | EF_builtin(name, sg) -> + expand_builtin_inline (extern_atom name) args res + | EF_vload chunk -> + expand_builtin_vload chunk args res + | EF_vstore chunk -> + expand_builtin_vstore chunk args + | EF_vload_global(chunk, id, ofs) -> + if symbol_is_small_data id ofs + then expand_builtin_vload_sda chunk id ofs args res + else expand_builtin_vload_global chunk id ofs args res + | EF_vstore_global(chunk, id, ofs) -> + if symbol_is_small_data id ofs + then expand_builtin_vstore_sda chunk id ofs args + else expand_builtin_vstore_global chunk id ofs args + | EF_memcpy(sz, al) -> + expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args + | EF_annot_val(txt, targ) -> + expand_annot_val txt targ args res + | EF_inline_asm txt -> + emit instr + | _ -> + assert false + end + | _ -> + emit instr + +let expand_function fn = + set_current_function fn; + current_code := []; + List.iter expand_instruction fn.fn_code; + let c = get_code() in + set_current_function dummy_function; + { fn with fn_code = c } + +let expand_fundef = function + | Internal f -> Internal (expand_function f) + | External ef -> External ef + +let expand_program (p: Asm.program) : Asm.program = + AST.transform_program expand_fundef p diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v index 5ca770d..5c4ffde 100644 --- a/powerpc/Asmgen.v +++ b/powerpc/Asmgen.v @@ -721,7 +721,8 @@ Definition transl_function (f: Mach.function) := OK (mkfunction f.(Mach.fn_sig) (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) :: Pmflr GPR0 :: - Pstw GPR0 (Cint f.(fn_retaddr_ofs)) GPR1 :: c)). + Pstw GPR0 (Cint f.(fn_retaddr_ofs)) GPR1 :: + Pcfi_rel_offset f.(fn_retaddr_ofs) :: c)). Definition transf_function (f: Mach.function) : res Asm.function := do tf <- transl_function f; diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v index 913fb50..2b52fe0 100644 --- a/powerpc/Asmgenproof.v +++ b/powerpc/Asmgenproof.v @@ -918,30 +918,36 @@ Local Transparent destroyed_by_jumptable. set (rs2 := nextinstr (rs0#GPR1 <- sp #GPR0 <- Vundef)). set (rs3 := nextinstr (rs2#GPR0 <- (rs0#LR))). set (rs4 := nextinstr rs3). + set (rs5 := nextinstr rs4). assert (EXEC_PROLOGUE: exec_straight tge x x.(fn_code) rs0 m' - x1 rs4 m3'). + x1 rs5 m3'). rewrite <- H5 at 2. simpl. - apply exec_straight_three with rs2 m2' rs3 m2'. + apply exec_straight_step with rs2 m2'. unfold exec_instr. rewrite C. fold sp. - rewrite <- (sp_val _ _ _ AG). rewrite F. auto. - simpl. auto. + rewrite <- (sp_val _ _ _ AG). rewrite F. auto. auto. + apply exec_straight_step with rs3 m2'. + simpl. auto. auto. + apply exec_straight_two with rs4 m3'. simpl. unfold store1. rewrite gpr_or_zero_not_zero. change (rs3 GPR1) with sp. change (rs3 GPR0) with (rs0 LR). simpl. rewrite Int.add_zero_l. simpl in P. rewrite Int.add_zero_l in P. rewrite ATLR. rewrite P. auto. congruence. auto. auto. auto. - left; exists (State rs4 m3'); split. + left; exists (State rs5 m3'); split. eapply exec_straight_steps_1; eauto. omega. constructor. econstructor; eauto. - change (rs4 PC) with (Val.add (Val.add (Val.add (rs0 PC) Vone) Vone) Vone). + change (rs5 PC) with (Val.add (Val.add (Val.add (Val.add (rs0 PC) Vone) Vone) Vone) Vone). rewrite ATPC. simpl. constructor; eauto. - subst x; simpl in g. unfold fn_code. eapply code_tail_next_int. omega. + subst x; simpl in g. unfold fn_code. eapply code_tail_next_int. omega. eapply code_tail_next_int. omega. + eapply code_tail_next_int. omega. + eapply code_tail_next_int. omega. constructor. - unfold rs4, rs3, rs2. - apply agree_nextinstr. apply agree_set_other; auto. apply agree_set_other; auto. + unfold rs5, rs4, rs3, rs2. + apply agree_nextinstr. apply agree_nextinstr. + apply agree_set_other; auto. apply agree_set_other; auto. apply agree_nextinstr. apply agree_set_other; auto. eapply agree_change_sp; eauto. unfold sp; congruence. congruence. diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml index e3f0724..691ecfb 100644 --- a/powerpc/PrintAsm.ml +++ b/powerpc/PrintAsm.ml @@ -102,6 +102,7 @@ let num_crbit = function | CRbit_1 -> 1 | CRbit_2 -> 2 | CRbit_3 -> 3 + | CRbit_6 -> 6 let crbit oc bit = fprintf oc "%d" (num_crbit bit) @@ -287,16 +288,6 @@ let rolm_mask n = assert (!count = 2 || (!count = 0 && !last)); (!mb, !me-1) -(* Built-ins. They come in three flavors: - - annotation statements: take their arguments in registers or stack - locations; generate no code; - - inlined by the compiler: take their arguments in arbitrary - registers; preserve all registers except the reserved temporaries - (GPR0, GPR11, GPR12, FPR13); - - inlined while printing asm code; take their arguments in - locations dictated by the calling conventions; preserve - callee-save regs only. *) - (* Handling of annotations *) let re_file_line = Str.regexp "#line:\\(.*\\):\\([1-9][0-9]*\\)$" @@ -309,361 +300,6 @@ let print_annot_stmt oc txt targs args = PrintAnnot.print_annot_stmt preg "R1" oc txt targs args end -let print_annot_val oc txt args res = - fprintf oc "%s annotation: " comment; - PrintAnnot.print_annot_val preg oc txt args; - match args, res with - | [IR src], [IR dst] -> - if dst <> src then fprintf oc " mr %a, %a\n" ireg dst ireg src - | [FR src], [FR dst] -> - if dst <> src then fprintf oc " fmr %a, %a\n" freg dst freg src - | _, _ -> - assert false - -(* Handling of memcpy *) - -(* On the PowerPC, unaligned accesses to 16- and 32-bit integers are - fast, but unaligned accesses to 64-bit floats can be slow - (not so much on G5, but clearly so on Power7). - So, use 64-bit accesses only if alignment >= 4. - Note that lfd and stfd cannot trap on ill-formed floats. *) - -let print_builtin_memcpy_small oc sz al src dst = - let rec copy ofs sz = - if sz >= 8 && al >= 4 && !Clflags.option_ffpu then begin - fprintf oc " lfd %a, %d(%a)\n" freg FPR13 ofs ireg src; - fprintf oc " stfd %a, %d(%a)\n" freg FPR13 ofs ireg dst; - copy (ofs + 8) (sz - 8) - end else if sz >= 4 then begin - fprintf oc " lwz %a, %d(%a)\n" ireg GPR0 ofs ireg src; - fprintf oc " stw %a, %d(%a)\n" ireg GPR0 ofs ireg dst; - copy (ofs + 4) (sz - 4) - end else if sz >= 2 then begin - fprintf oc " lhz %a, %d(%a)\n" ireg GPR0 ofs ireg src; - fprintf oc " sth %a, %d(%a)\n" ireg GPR0 ofs ireg dst; - copy (ofs + 2) (sz - 2) - end else if sz >= 1 then begin - fprintf oc " lbz %a, %d(%a)\n" ireg GPR0 ofs ireg src; - fprintf oc " stb %a, %d(%a)\n" ireg GPR0 ofs ireg dst; - copy (ofs + 1) (sz - 1) - end in - copy 0 sz - -let print_builtin_memcpy_big oc sz al src dst = - assert (sz >= 4); - fprintf oc " li %a, %d\n" ireg GPR0 (sz / 4); - fprintf oc " mtctr %a\n" ireg GPR0; - let (s,d) = if dst <> GPR11 then (GPR11, GPR12) else (GPR12, GPR11) in - fprintf oc " addi %a, %a, -4\n" ireg s ireg src; - fprintf oc " addi %a, %a, -4\n" ireg d ireg dst; - let lbl = new_label() in - fprintf oc "%a: lwzu %a, 4(%a)\n" label lbl ireg GPR0 ireg s; - fprintf oc " stwu %a, 4(%a)\n" ireg GPR0 ireg d; - fprintf oc " bdnz %a\n" label lbl; - (* s and d lag behind by 4 bytes *) - match sz land 3 with - | 1 -> fprintf oc " lbz %a, 4(%a)\n" ireg GPR0 ireg s; - fprintf oc " stb %a, 4(%a)\n" ireg GPR0 ireg d - | 2 -> fprintf oc " lhz %a, 4(%a)\n" ireg GPR0 ireg s; - fprintf oc " sth %a, 4(%a)\n" ireg GPR0 ireg d - | 3 -> fprintf oc " lhz %a, 4(%a)\n" ireg GPR0 ireg s; - fprintf oc " sth %a, 4(%a)\n" ireg GPR0 ireg d; - fprintf oc " lbz %a, 6(%a)\n" ireg GPR0 ireg s; - fprintf oc " stb %a, 6(%a)\n" ireg GPR0 ireg d - | _ -> () - -let print_builtin_memcpy oc sz al args = - let (dst, src) = - match args with [IR d; IR s] -> (d, s) | _ -> assert false in - fprintf oc "%s begin builtin __builtin_memcpy_aligned, size = %d, alignment = %d\n" - comment sz al; - if sz <= (if !Clflags.option_ffpu then 48 else 24) - then print_builtin_memcpy_small oc sz al src dst - else print_builtin_memcpy_big oc sz al src dst; - fprintf oc "%s end builtin __builtin_memcpy_aligned\n" comment - -(* Handling of volatile reads and writes *) - -let print_builtin_vload_common oc chunk base offset res = - match chunk, res with - | Mint8unsigned, IR res -> - fprintf oc " lbz %a, %a(%a)\n" ireg res constant offset ireg base - | Mint8signed, IR res -> - fprintf oc " lbz %a, %a(%a)\n" ireg res constant offset ireg base; - fprintf oc " extsb %a, %a\n" ireg res ireg res - | Mint16unsigned, IR res -> - fprintf oc " lhz %a, %a(%a)\n" ireg res constant offset ireg base - | Mint16signed, IR res -> - fprintf oc " lha %a, %a(%a)\n" ireg res constant offset ireg base - | (Mint32 | Many32), IR res -> - fprintf oc " lwz %a, %a(%a)\n" ireg res constant offset ireg base - | Mfloat32, FR res -> - fprintf oc " lfs %a, %a(%a)\n" freg res constant offset ireg base - | (Mfloat64 | Many64), FR res -> - fprintf oc " lfd %a, %a(%a)\n" freg res constant offset ireg base - (* Mint64 is special-cased below *) - | _ -> - assert false - -let print_builtin_vload oc chunk args res = - fprintf oc "%s begin builtin __builtin_volatile_read\n" comment; - begin match args, res with - | [IR addr], [res] when chunk <> Mint64 -> - print_builtin_vload_common oc chunk addr (Cint Integers.Int.zero) res - | [IR addr], [IR res1; IR res2] when chunk = Mint64 -> - if addr <> res1 then begin - fprintf oc " lwz %a, 0(%a)\n" ireg res1 ireg addr; - fprintf oc " lwz %a, 4(%a)\n" ireg res2 ireg addr - end else begin - fprintf oc " lwz %a, 4(%a)\n" ireg res2 ireg addr; - fprintf oc " lwz %a, 0(%a)\n" ireg res1 ireg addr - end - | _ -> - assert false - end; - fprintf oc "%s end builtin __builtin_volatile_read\n" comment - -let print_builtin_vload_global oc chunk id ofs args res = - fprintf oc "%s begin builtin __builtin_volatile_read\n" comment; - begin match res with - | [res] when chunk <> Mint64 -> - fprintf oc " addis %a, %a, %a\n" - ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); - print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res - | [IR res1; IR res2] when chunk = Mint64 -> - fprintf oc " addis %a, %a, %a\n" - ireg res1 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); - fprintf oc " lwz %a, %a(%a)\n" - ireg res1 constant (Csymbol_low(id, ofs)) ireg res1; - let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in - fprintf oc " addis %a, %a, %a\n" - ireg res2 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); - fprintf oc " lwz %a, %a(%a)\n" - ireg res2 constant (Csymbol_low(id, ofs)) ireg res2 - | _ -> - assert false - end; - fprintf oc "%s end builtin __builtin_volatile_read\n" comment - -let print_builtin_vstore_common oc chunk base offset src = - match chunk, src with - | (Mint8signed | Mint8unsigned), IR src -> - fprintf oc " stb %a, %a(%a)\n" ireg src constant offset ireg base - | (Mint16signed | Mint16unsigned), IR src -> - fprintf oc " sth %a, %a(%a)\n" ireg src constant offset ireg base - | (Mint32 | Many32), IR src -> - fprintf oc " stw %a, %a(%a)\n" ireg src constant offset ireg base - | Mfloat32, FR src -> - fprintf oc " stfs %a, %a(%a)\n" freg src constant offset ireg base - | (Mfloat64 | Many64), FR src -> - fprintf oc " stfd %a, %a(%a)\n" freg src constant offset ireg base - (* Mint64 is special-cased below *) - | _ -> - assert false - -let print_builtin_vstore oc chunk args = - fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; - begin match args with - | [IR addr; src] when chunk <> Mint64 -> - print_builtin_vstore_common oc chunk addr (Cint Integers.Int.zero) src - | [IR addr; IR src1; IR src2] when chunk = Mint64 -> - fprintf oc " stw %a, 0(%a)\n" ireg src1 ireg addr; - fprintf oc " stw %a, 4(%a)\n" ireg src2 ireg addr - | _ -> - assert false - end; - fprintf oc "%s end builtin __builtin_volatile_write\n" comment - -let print_builtin_vstore_global oc chunk id ofs args = - fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; - begin match args with - | [src] when chunk <> Mint64 -> - let tmp = if src = IR GPR11 then GPR12 else GPR11 in - fprintf oc " addis %a, %a, %a\n" - ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); - print_builtin_vstore_common oc chunk tmp (Csymbol_low(id, ofs)) src - | [IR src1; IR src2] when chunk = Mint64 -> - let tmp = - if not (List.mem GPR12 [src1; src2]) then GPR12 else - if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in - fprintf oc " addis %a, %a, %a\n" - ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); - fprintf oc " stw %a, %a(%a)\n" - ireg src1 constant (Csymbol_low(id, ofs)) ireg tmp; - let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in - fprintf oc " addis %a, %a, %a\n" - ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs)); - fprintf oc " stw %a, %a(%a)\n" - ireg src2 constant (Csymbol_low(id, ofs)) ireg tmp - | _ -> - assert false - end; - fprintf oc "%s end builtin __builtin_volatile_write\n" comment - -(* Handling of varargs *) - -let current_function_stacksize = ref 0l -let current_function_sig = - ref { sig_args = []; sig_res = None; sig_cc = cc_default } - -let align n a = (n + a - 1) land (-a) - -let rec next_arg_locations ir fr ofs = function - | [] -> - (ir, fr, ofs) - | (Tint | Tany32) :: l -> - if ir < 8 - then next_arg_locations (ir + 1) fr ofs l - else next_arg_locations ir fr (ofs + 4) l - | (Tfloat | Tsingle | Tany64) :: l -> - if fr < 8 - then next_arg_locations ir (fr + 1) ofs l - else next_arg_locations ir fr (align ofs 8 + 8) l - | Tlong :: l -> - if ir < 7 - then next_arg_locations (align ir 2 + 2) fr ofs l - else next_arg_locations ir fr (align ofs 8 + 8) l - -let print_builtin_va_start oc r = - if not (!current_function_sig).sig_cc.cc_vararg then - invalid_arg "Fatal error: va_start used in non-vararg function"; - let (ir, fr, ofs) = - next_arg_locations 0 0 0 (!current_function_sig).sig_args in - fprintf oc " li %a, %d\n" ireg GPR0 ir; - fprintf oc " stb %a, 0(%a)\n" ireg GPR0 ireg r; - fprintf oc " li %a, %d\n" ireg GPR0 fr; - fprintf oc " stb %a, 1(%a)\n" ireg GPR0 ireg r; - fprintf oc " addi %a, %a, %ld\n" ireg GPR0 ireg GPR1 - Int32.(add (add !current_function_stacksize 8l) - (of_int ofs)); - fprintf oc " stw %a, 4(%a)\n" ireg GPR0 ireg r; - fprintf oc " addi %a, %a, %ld\n" ireg GPR0 ireg GPR1 - Int32.(sub !current_function_stacksize 96l); - fprintf oc " stw %a, 8(%a)\n" ireg GPR0 ireg r - -(* Handling of compiler-inlined builtins *) - -let print_builtin_inline oc name args res = - fprintf oc "%s begin builtin %s\n" comment name; - (* Can use as temporaries: GPR0, FPR13 *) - begin match name, args, res with - (* Integer arithmetic *) - | "__builtin_mulhw", [IR a1; IR a2], [IR res] -> - fprintf oc " mulhw %a, %a, %a\n" ireg res ireg a1 ireg a2 - | "__builtin_mulhwu", [IR a1; IR a2], [IR res] -> - fprintf oc " mulhwu %a, %a, %a\n" ireg res ireg a1 ireg a2 - | "__builtin_cntlz", [IR a1], [IR res] -> - fprintf oc " cntlzw %a, %a\n" ireg res ireg a1 - | ("__builtin_bswap" | "__builtin_bswap32"), [IR a1], [IR res] -> - fprintf oc " stwu %a, -8(%a)\n" ireg a1 ireg GPR1; - cfi_adjust oc 8l; - fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg GPR1; - fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1; - cfi_adjust oc (-8l) - | "__builtin_bswap16", [IR a1], [IR res] -> - fprintf oc " rlwinm %a, %a, 8, 16, 23\n" ireg GPR0 ireg a1; - fprintf oc " rlwinm %a, %a, 24, 24, 31\n" ireg res ireg a1; - fprintf oc " or %a, %a, %a\n" ireg res ireg GPR0 ireg res - (* Float arithmetic *) - | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] -> - fprintf oc " fmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] -> - fprintf oc " fmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] -> - fprintf oc " fnmadd %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] -> - fprintf oc " fnmsub %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fabs", [FR a1], [FR res] -> - fprintf oc " fabs %a, %a\n" freg res freg a1 - | "__builtin_fsqrt", [FR a1], [FR res] -> - fprintf oc " fsqrt %a, %a\n" freg res freg a1 - | "__builtin_frsqrte", [FR a1], [FR res] -> - fprintf oc " frsqrte %a, %a\n" freg res freg a1 - | "__builtin_fres", [FR a1], [FR res] -> - fprintf oc " fres %a, %a\n" freg res freg a1 - | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] -> - fprintf oc " fsel %a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3 - | "__builtin_fcti", [FR a1], [IR res] -> - fprintf oc " fctiw %a, %a\n" freg FPR13 freg a1; - fprintf oc " stfdu %a, -8(%a)\n" freg FPR13 ireg GPR1; - cfi_adjust oc 8l; - fprintf oc " lwz %a, 4(%a)\n" ireg res ireg GPR1; - fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1; - cfi_adjust oc (-8l) - (* 64-bit integer arithmetic *) - | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> - if rl = ah then begin - fprintf oc " subfic %a, %a, 0\n" ireg GPR0 ireg al; - fprintf oc " subfze %a, %a\n" ireg rh ireg ah; - fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 - end else begin - fprintf oc " subfic %a, %a, 0\n" ireg rl ireg al; - fprintf oc " subfze %a, %a\n" ireg rh ireg ah - end - | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> - if rl = ah || rl = bh then begin - fprintf oc " addc %a, %a, %a\n" ireg GPR0 ireg al ireg bl; - fprintf oc " adde %a, %a, %a\n" ireg rh ireg ah ireg bh; - fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 - end else begin - fprintf oc " addc %a, %a, %a\n" ireg rl ireg al ireg bl; - fprintf oc " adde %a, %a, %a\n" ireg rh ireg ah ireg bh - end - | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> - if rl = ah || rl = bh then begin - fprintf oc " subfc %a, %a, %a\n" ireg GPR0 ireg bl ireg al; - fprintf oc " subfe %a, %a, %a\n" ireg rh ireg bh ireg ah; - fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 - end else begin - fprintf oc " subfc %a, %a, %a\n" ireg rl ireg bl ireg al; - fprintf oc " subfe %a, %a, %a\n" ireg rh ireg bh ireg ah - end - | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> - if rl = a || rl = b then begin - fprintf oc " mullw %a, %a, %a\n" ireg GPR0 ireg a ireg b; - fprintf oc " mulhwu %a, %a, %a\n" ireg rh ireg a ireg b; - fprintf oc " mr %a, %a\n" ireg rl ireg GPR0 - end else begin - fprintf oc " mullw %a, %a, %a\n" ireg rl ireg a ireg b; - fprintf oc " mulhwu %a, %a, %a\n" ireg rh ireg a ireg b - end - (* Memory accesses *) - | "__builtin_read16_reversed", [IR a1], [IR res] -> - fprintf oc " lhbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 - | "__builtin_read32_reversed", [IR a1], [IR res] -> - fprintf oc " lwbrx %a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1 - | "__builtin_write16_reversed", [IR a1; IR a2], _ -> - fprintf oc " sthbrx %a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1 - | "__builtin_write32_reversed", [IR a1; IR a2], _ -> - fprintf oc " stwbrx %a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1 - (* Synchronization *) - | "__builtin_eieio", [], _ -> - fprintf oc " eieio\n" - | "__builtin_sync", [], _ -> - fprintf oc " sync\n" - | "__builtin_isync", [], _ -> - fprintf oc " isync\n" - | "__builtin_trap", [], _ -> - fprintf oc " trap\n" - (* Vararg stuff *) - | "__builtin_va_start", [IR a], _ -> - print_builtin_va_start oc a - (* Catch-all *) - | _ -> - invalid_arg ("unrecognized builtin " ^ name) - end; - fprintf oc "%s end builtin %s\n" comment name - -(* Calls to variadic functions: condition bit 6 must be set - if at least one argument is a float; clear otherwise *) - -let set_cr6 oc sg = - if sg.sig_cc.cc_vararg then begin - if List.mem Tfloat sg.sig_args - then fprintf oc " creqv 6, 6, 6\n" - else fprintf oc " crxor 6, 6, 6\n" - end - (* Determine if the displacement of a conditional branch fits the short form *) let short_cond_branch tbl pc lbl_dest = @@ -681,6 +317,8 @@ let jumptables : (int * label list) list ref = ref [] let print_instruction oc tbl pc fallthrough = function | Padd(r1, r2, r3) -> fprintf oc " add %a, %a, %a\n" ireg r1 ireg r2 ireg r3 + | Paddc(r1, r2, r3) -> + fprintf oc " addc %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Padde(r1, r2, r3) -> fprintf oc " adde %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Paddi(r1, r2, c) -> @@ -692,28 +330,7 @@ let print_instruction oc tbl pc fallthrough = function | Paddze(r1, r2) -> fprintf oc " addze %a, %a\n" ireg r1 ireg r2 | Pallocframe(sz, ofs) -> - let sz = camlint_of_coqint sz - and ofs = camlint_of_coqint ofs in - assert (ofs = 0l); - let sz = - if (!current_function_sig).sig_cc.cc_vararg - then Int32.add sz 96l - else sz in - let adj = Int32.neg sz in - if adj >= -0x8000l then - fprintf oc " stwu %a, %ld(%a)\n" ireg GPR1 adj ireg GPR1 - else begin - fprintf oc " addis %a, 0, %ld\n" ireg GPR0 (Int32.shift_right_logical adj 16); - fprintf oc " ori %a, %a, %ld\n" ireg GPR0 ireg GPR0 (Int32.logand adj 0xFFFFl); - fprintf oc " stwux %a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR0 - end; - cfi_adjust oc sz; - if (!current_function_sig).sig_cc.cc_vararg then begin - fprintf oc " mflr %a\n" ireg GPR0; - fprintf oc " bl __compcert_va_saveregs\n"; - fprintf oc " mtlr %a\n" ireg GPR0 - end; - current_function_stacksize := sz + assert false | Pand_(r1, r2, r3) -> fprintf oc " and. %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pandc(r1, r2, r3) -> @@ -725,11 +342,11 @@ let print_instruction oc tbl pc fallthrough = function | Pb lbl -> fprintf oc " b %a\n" label (transl_label lbl) | Pbctr sg -> - set_cr6 oc sg; fprintf oc " bctr\n" | Pbctrl sg -> - set_cr6 oc sg; fprintf oc " bctrl\n" + | Pbdnz lbl -> + fprintf oc " bdnz %a\n" label (transl_label lbl) | Pbf(bit, lbl) -> if !Clflags.option_faligncondbranchs > 0 then fprintf oc " .balign %d\n" !Clflags.option_faligncondbranchs; @@ -742,10 +359,8 @@ let print_instruction oc tbl pc fallthrough = function fprintf oc "%a:\n" label next end | Pbl(s, sg) -> - set_cr6 oc sg; fprintf oc " bl %a\n" symbol s | Pbs(s, sg) -> - set_cr6 oc sg; fprintf oc " b %a\n" symbol s | Pblr -> fprintf oc " blr\n" @@ -781,12 +396,20 @@ let print_instruction oc tbl pc fallthrough = function fprintf oc " cmpw %a, %a, %a\n" creg 0 ireg r1 ireg r2 | Pcmpwi(r1, c) -> fprintf oc " cmpwi %a, %a, %a\n" creg 0 ireg r1 constant c + | Pcntlz(r1, r2) -> + fprintf oc " cntlz %a, %a\n" ireg r1 ireg r2 + | Pcreqv(c1, c2, c3) -> + fprintf oc " creqv %a, %a, %a\n" crbit c1 crbit c2 crbit c3 | Pcror(c1, c2, c3) -> fprintf oc " cror %a, %a, %a\n" crbit c1 crbit c2 crbit c3 + | Pcrxor(c1, c2, c3) -> + fprintf oc " crxor %a, %a, %a\n" crbit c1 crbit c2 crbit c3 | Pdivw(r1, r2, r3) -> fprintf oc " divw %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pdivwu(r1, r2, r3) -> fprintf oc " divwu %a, %a, %a\n" ireg r1 ireg r2 ireg r3 + | Peieio -> + fprintf oc " eieio\n" | Peqv(r1, r2, r3) -> fprintf oc " eqv %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pextsb(r1, r2) -> @@ -794,16 +417,7 @@ let print_instruction oc tbl pc fallthrough = function | Pextsh(r1, r2) -> fprintf oc " extsh %a, %a\n" ireg r1 ireg r2 | Pfreeframe(sz, ofs) -> - let sz = camlint_of_coqint sz - and ofs = camlint_of_coqint ofs in - let sz = - if (!current_function_sig).sig_cc.cc_vararg - then Int32.add sz 96l - else sz in - if sz < 0x8000l then - fprintf oc " addi %a, %a, %ld\n" ireg GPR1 ireg GPR1 sz - else - fprintf oc " lwz %a, %ld(%a)\n" ireg GPR1 ofs ireg GPR1 + assert false | Pfabs(r1, r2) | Pfabss(r1, r2) -> fprintf oc " fabs %a, %a\n" freg r1 freg r2 | Pfadd(r1, r2, r3) -> @@ -813,28 +427,17 @@ let print_instruction oc tbl pc fallthrough = function | Pfcmpu(r1, r2) -> fprintf oc " fcmpu %a, %a, %a\n" creg 0 freg r1 freg r2 | Pfcti(r1, r2) -> - fprintf oc "%s begin pseudoinstr %a = fcti(%a)\n" comment ireg r1 freg r2; - fprintf oc " fctiwz %a, %a\n" freg FPR13 freg r2; - fprintf oc " stfdu %a, -8(%a)\n" freg FPR13 ireg GPR1; - cfi_adjust oc 8l; - fprintf oc " lwz %a, 4(%a)\n" ireg r1 ireg GPR1; - fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1; - cfi_adjust oc (-8l); - fprintf oc "%s end pseudoinstr fcti\n" comment + assert false + | Pfctiw(r1, r2) -> + fprintf oc " fctiw %a, %a\n" freg r1 freg r2 + | Pfctiwz(r1, r2) -> + fprintf oc " fctiwz %a, %a\n" freg r1 freg r2 | Pfdiv(r1, r2, r3) -> fprintf oc " fdiv %a, %a, %a\n" freg r1 freg r2 freg r3 | Pfdivs(r1, r2, r3) -> fprintf oc " fdivs %a, %a, %a\n" freg r1 freg r2 freg r3 | Pfmake(rd, r1, r2) -> - fprintf oc "%s begin pseudoinstr %a = fmake(%a, %a)\n" - comment freg rd ireg r1 ireg r2; - fprintf oc " stwu %a, -8(%a)\n" ireg r1 ireg GPR1; - cfi_adjust oc 8l; - fprintf oc " stw %a, 4(%a)\n" ireg r2 ireg GPR1; - fprintf oc " lfd %a, 0(%a)\n" freg rd ireg GPR1; - fprintf oc " addi %a, %a, 8\n" ireg GPR1 ireg GPR1; - cfi_adjust oc (-8l); - fprintf oc "%s end pseudoinstr fmake\n" comment + assert false | Pfmr(r1, r2) -> fprintf oc " fmr %a, %a\n" freg r1 freg r2 | Pfmul(r1, r2, r3) -> @@ -846,12 +449,29 @@ let print_instruction oc tbl pc fallthrough = function | Pfrsp(r1, r2) -> fprintf oc " frsp %a, %a\n" freg r1 freg r2 | Pfxdp(r1, r2) -> - if r1 <> r2 then - fprintf oc " fmr %a, %a\n" freg r1 freg r2 + assert false | Pfsub(r1, r2, r3) -> fprintf oc " fsub %a, %a, %a\n" freg r1 freg r2 freg r3 | Pfsubs(r1, r2, r3) -> fprintf oc " fsubs %a, %a, %a\n" freg r1 freg r2 freg r3 + | Pfmadd(r1, r2, r3, r4) -> + fprintf oc " fmadd %a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4 + | Pfmsub(r1, r2, r3, r4) -> + fprintf oc " fmsub %a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4 + | Pfnmadd(r1, r2, r3, r4) -> + fprintf oc " fnmadd %a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4 + | Pfnmsub(r1, r2, r3, r4) -> + fprintf oc " fnmsub %a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4 + | Pfsqrt(r1, r2) -> + fprintf oc " fsqrt %a, %a\n" freg r1 freg r2 + | Pfrsqrte(r1, r2) -> + fprintf oc " frsqrte %a, %a\n" freg r1 freg r2 + | Pfres(r1, r2) -> + fprintf oc " fres %a, %a\n" freg r1 freg r2 + | Pfsel(r1, r2, r3, r4) -> + fprintf oc " fsel %a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4 + | Pisync -> + fprintf oc " isync\n" | Plbz(r1, c, r2) -> fprintf oc " lbz %a, %a(%a)\n" ireg r1 constant c ireg r2 | Plbzx(r1, r2, r3) -> @@ -860,16 +480,6 @@ let print_instruction oc tbl pc fallthrough = function fprintf oc " lfd %a, %a(%a)\n" freg r1 constant c ireg r2 | Plfdx(r1, r2, r3) | Plfdx_a(r1, r2, r3) -> fprintf oc " lfdx %a, %a, %a\n" freg r1 ireg r2 ireg r3 - | Plfi(r1, c) -> - let lbl = new_label() in - fprintf oc " addis %a, 0, %a\n" ireg GPR12 label_high lbl; - fprintf oc " lfd %a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat c); - float_literals := (lbl, camlint64_of_coqint (Floats.Float.to_bits c)) :: !float_literals; - | Plfis(r1, c) -> - let lbl = new_label() in - fprintf oc " addis %a, 0, %a\n" ireg GPR12 label_high lbl; - fprintf oc " lfs %a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat32 c); - float32_literals := (lbl, camlint_of_coqint (Floats.Float32.to_bits c)) :: !float32_literals; | Plfs(r1, c, r2) -> fprintf oc " lfs %a, %a(%a)\n" freg r1 constant c ireg r2 | Plfsx(r1, r2, r3) -> @@ -878,20 +488,36 @@ let print_instruction oc tbl pc fallthrough = function fprintf oc " lha %a, %a(%a)\n" ireg r1 constant c ireg r2 | Plhax(r1, r2, r3) -> fprintf oc " lhax %a, %a, %a\n" ireg r1 ireg r2 ireg r3 + | Plhbrx(r1, r2, r3) -> + fprintf oc " lhbrx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Plhz(r1, c, r2) -> fprintf oc " lhz %a, %a(%a)\n" ireg r1 constant c ireg r2 | Plhzx(r1, r2, r3) -> fprintf oc " lhzx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 + | Plfi(r1, c) -> + let lbl = new_label() in + fprintf oc " addis %a, 0, %a\n" ireg GPR12 label_high lbl; + fprintf oc " lfd %a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat c); + float_literals := (lbl, camlint64_of_coqint (Floats.Float.to_bits c)) :: !float_literals; + | Plfis(r1, c) -> + let lbl = new_label() in + fprintf oc " addis %a, 0, %a\n" ireg GPR12 label_high lbl; + fprintf oc " lfs %a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat32 c); + float32_literals := (lbl, camlint_of_coqint (Floats.Float32.to_bits c)) :: !float32_literals; + | Plwbrx(r1, r2, r3) -> + fprintf oc " lwbrx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Plwz(r1, c, r2) | Plwz_a(r1, c, r2) -> fprintf oc " lwz %a, %a(%a)\n" ireg r1 constant c ireg r2 + | Plwzu(r1, c, r2) -> + fprintf oc " lwzu %a, %a(%a)\n" ireg r1 constant c ireg r2 | Plwzx(r1, r2, r3) | Plwzx_a(r1, r2, r3) -> fprintf oc " lwzx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 + | Pmfcr(r1) -> + fprintf oc " mfcr %a\n" ireg r1 | Pmfcrbit(r1, bit) -> - fprintf oc " mfcr %a\n" ireg r1; - fprintf oc " rlwinm %a, %a, %d, 31, 31\n" ireg r1 ireg r1 (1 + num_crbit bit) + assert false | Pmflr(r1) -> - fprintf oc " mflr %a\n" ireg r1; - cfi_rel_offset oc "lr" 8l + fprintf oc " mflr %a\n" ireg r1 | Pmr(r1, r2) -> fprintf oc " mr %a, %a\n" ireg r1 ireg r2 | Pmtctr(r1) -> @@ -942,6 +568,8 @@ let print_instruction oc tbl pc fallthrough = function fprintf oc " stbx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pstfd(r1, c, r2) | Pstfd_a(r1, c, r2) -> fprintf oc " stfd %a, %a(%a)\n" freg r1 constant c ireg r2 + | Pstfdu(r1, c, r2) -> + fprintf oc " stfdu %a, %a(%a)\n" freg r1 constant c ireg r2 | Pstfdx(r1, r2, r3) | Pstfdx_a(r1, r2, r3) -> fprintf oc " stfdx %a, %a, %a\n" freg r1 ireg r2 ireg r3 | Pstfs(r1, c, r2) -> @@ -952,16 +580,30 @@ let print_instruction oc tbl pc fallthrough = function fprintf oc " sth %a, %a(%a)\n" ireg r1 constant c ireg r2 | Psthx(r1, r2, r3) -> fprintf oc " sthx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 + | Psthbrx(r1, r2, r3) -> + fprintf oc " sthbrx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pstw(r1, c, r2) | Pstw_a(r1, c, r2) -> fprintf oc " stw %a, %a(%a)\n" ireg r1 constant c ireg r2 + | Pstwu(r1, c, r2) -> + fprintf oc " stwu %a, %a(%a)\n" ireg r1 constant c ireg r2 | Pstwx(r1, r2, r3) | Pstwx_a(r1, r2, r3) -> fprintf oc " stwx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 + | Pstwxu(r1, r2, r3) -> + fprintf oc " stwxu %a, %a, %a\n" ireg r1 ireg r2 ireg r3 + | Pstwbrx(r1, r2, r3) -> + fprintf oc " stwbrx %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Psubfc(r1, r2, r3) -> fprintf oc " subfc %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Psubfe(r1, r2, r3) -> fprintf oc " subfe %a, %a, %a\n" ireg r1 ireg r2 ireg r3 + | Psubfze(r1, r2) -> + fprintf oc " subfze %a, %a\n" ireg r1 ireg r2 | Psubfic(r1, r2, c) -> fprintf oc " subfic %a, %a, %a\n" ireg r1 ireg r2 constant c + | Psync -> + fprintf oc " sync\n" + | Ptrap -> + fprintf oc " trap\n" | Pxor(r1, r2, r3) -> fprintf oc " xor %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Pxori(r1, r2, c) -> @@ -974,21 +616,6 @@ let print_instruction oc tbl pc fallthrough = function fprintf oc "%a:\n" label (transl_label lbl) | Pbuiltin(ef, args, res) -> begin match ef with - | EF_builtin(name, sg) -> - print_builtin_inline oc (extern_atom name) args res - | EF_vload chunk -> - print_builtin_vload oc chunk args res - | EF_vstore chunk -> - print_builtin_vstore oc chunk args - | EF_vload_global(chunk, id, ofs) -> - print_builtin_vload_global oc chunk id ofs args res - | EF_vstore_global(chunk, id, ofs) -> - print_builtin_vstore_global oc chunk id ofs args - | EF_memcpy(sz, al) -> - print_builtin_memcpy oc (Int32.to_int (camlint_of_coqint sz)) - (Int32.to_int (camlint_of_coqint al)) args - | EF_annot_val(txt, targ) -> - print_annot_val oc (extern_atom txt) args res | EF_inline_asm txt -> fprintf oc "%s begin inline assembly\n" comment; fprintf oc " %s\n" (extern_atom txt); @@ -1003,6 +630,10 @@ let print_instruction oc tbl pc fallthrough = function | _ -> assert false end + | Pcfi_adjust n -> + cfi_adjust oc (camlint_of_coqint n) + | Pcfi_rel_offset n -> + cfi_rel_offset oc "lr" (camlint_of_coqint n) (* Determine if an instruction falls through *) @@ -1017,44 +648,15 @@ let instr_fall_through = function PowerPC instructions. We can over-approximate. *) let instr_size = function - | Pallocframe(sz, ofs) -> 3 | Pbf(bit, lbl) -> 2 | Pbt(bit, lbl) -> 2 - | Pbtbl(r, tbl) -> 4 - | Pfcti(r1, r2) -> 4 - | Pfmake(rd, r1, r2) -> 4 + | Pbtbl(r, tbl) -> 5 | Plfi(r1, c) -> 2 - | Pmfcrbit(r1, bit) -> 2 - | Pstfs(r1, c, r2) -> 2 - | Pstfsx(r1, r2, r3) -> 2 + | Plfis(r1, c) -> 2 | Plabel lbl -> 0 - | Pbuiltin(ef, args, res) -> - begin match ef with - | EF_builtin(name, sg) -> - begin match extern_atom name with - | "__builtin_bswap" | "__builtin_bswap32" | "__builtin_bswap16" -> 3 - | "__builtin_fcti" -> 4 - | _ -> 1 - end - | EF_vload chunk -> - if chunk = Mint8signed then 2 else 1 - | EF_vstore chunk -> - if chunk = Mfloat32 then 2 else 1 - | EF_vload_global(chunk, id, ofs) -> - if chunk = Mint8signed then 3 else 2 - | EF_vstore_global(chunk, id, ofs) -> - if chunk = Mfloat32 then 3 else 2 - | EF_memcpy(sz, al) -> - let sz = Int32.to_int (camlint_of_coqint sz) in - if sz <= 64 then (sz / 4) * 2 + 6 else 11 - | EF_annot_val(txt, targ) -> - 0 - | EF_inline_asm txt -> - 8 (* reasonable? default *) - | _ -> - assert false - end + | Pbuiltin(ef, args, res) -> 0 | Pannot(ef, args) -> 0 + | Pcfi_adjust _ | Pcfi_rel_offset _ -> 0 | _ -> 1 (* Build a table label -> estimated position in generated code. @@ -1094,7 +696,6 @@ let print_function oc name fn = float_literals := []; float32_literals := []; jumptables := []; - current_function_sig := fn.fn_sig; let (text, lit, jmptbl) = match C2C.atom_sections name with | [t;l;j] -> (t, l, j) diff --git a/test/Makefile b/test/Makefile index 5771523..ab44be5 100644 --- a/test/Makefile +++ b/test/Makefile @@ -11,3 +11,6 @@ bench: clean: for i in $(DIRS); do $(MAKE) -C $$i clean; done + +ccheck: + for i in $(DIRS); do $(MAKE) -C $$i ccheck; done diff --git a/test/c/Makefile b/test/c/Makefile index 1486666..a81a9d5 100644 --- a/test/c/Makefile +++ b/test/c/Makefile @@ -2,6 +2,10 @@ include ../../Makefile.config CCOMP=../../ccomp CCOMPFLAGS=-stdlib ../../runtime -dc -dclight -dasm +ifeq ($(CCHECKLINK),true) +CCHECK=../../cchecklink +CCOMPFLAGS+= -sdump +endif CFLAGS=-O1 -Wall @@ -38,6 +42,12 @@ test: fi; \ done +ccheck: + @for i in $(PROGS); do \ + echo "---- $$i"; \ + $(CCHECK) -exe $$i.compcert $$i.sdump; \ + done + test_gcc: @for i in $(PROGS); do \ if ./$$i.gcc | cmp -s - Results/$$i; \ @@ -69,4 +79,4 @@ cminor_roundtrip: clean: rm -f *.compcert *.gcc - rm -f *.compcert.c *.light.c *.parsed.c *.s *.o *~ + rm -f *.compcert.c *.light.c *.parsed.c *.s *.o *.sdump *~ diff --git a/test/compression/Makefile b/test/compression/Makefile index 8db55dd..e35e1a1 100644 --- a/test/compression/Makefile +++ b/test/compression/Makefile @@ -1,7 +1,13 @@ +include ../../Makefile.config + CC=../../ccomp CFLAGS=-U__GNUC__ -stdlib ../../runtime -dclight -dasm LIBS= TIME=xtime -o /dev/null -mintime 1.0 +ifeq ($(CCHECKLINK),true) +CCHECK=../../cchecklink +CFLAGS+= -sdump +endif EXE=arcode lzw lzss @@ -51,10 +57,18 @@ bench: done rm -f $(TESTCOMPR) +ccheck: + @echo "---- arcode" + @$(CCHECK) -exe arcode $(ARCODE_OBJS:.o=.sdump) + @echo "---- lzw" + @$(CCHECK) -exe lzw $(LZW_OBJS:.o=.sdump) + @echo "---- lzss" + @$(CCHECK) -exe lzss $(LZSS_OBJS:.o=.sdump) + include .depend clean: - rm -f *.o *.light.c *.s $(EXE) + rm -f *.o *.light.c *.s *.sdump $(EXE) depend: gcc -MM *.c > .depend diff --git a/test/raytracer/Makefile b/test/raytracer/Makefile index a4b8894..c6eb190 100644 --- a/test/raytracer/Makefile +++ b/test/raytracer/Makefile @@ -4,6 +4,10 @@ CC=../../ccomp CFLAGS=-stdlib ../../runtime -dparse -dclight -dasm -fstruct-return LIBS=$(LIBMATH) TIME=xtime -mintime 2.0 +ifeq ($(CCHECKLINK),true) +CCHECK=../../cchecklink +CFLAGS+= -sdump +endif OBJS=memory.o gmllexer.o gmlparser.o eval.o \ arrays.o vector.o matrix.o object.o intersect.o surface.o light.o \ @@ -15,7 +19,7 @@ render: $(OBJS) $(CC) $(CFLAGS) -o render $(OBJS) $(LIBS) clean: - rm -f *.o *.parsed.c *.light.c *.s *.ppm render + rm -f *.o *.parsed.c *.light.c *.s *.sdump *.ppm render include .depend @@ -55,3 +59,8 @@ test: bench: @echo -n "raytracer: "; $(TIME) ./render < kal.gml + +ccheck: + @echo "---- render" + @$(CCHECK) -exe render *.sdump + diff --git a/test/regression/Makefile b/test/regression/Makefile index 189dbd8..3583676 100644 --- a/test/regression/Makefile +++ b/test/regression/Makefile @@ -2,6 +2,10 @@ include ../../Makefile.config CCOMP=../../ccomp CCOMPFLAGS=-stdlib ../../runtime -dparse -dc -dclight -dasm -fall +ifeq ($(CCHECKLINK),true) +CCHECK=../../cchecklink +CCOMPFLAGS+= -sdump +endif LIBS=$(LIBMATH) @@ -48,7 +52,7 @@ all_s: $(TESTS:%=%.s) $(TESTS_COMP:%=%.s) $(TESTS_DIFF:%=%.s) $(EXTRAS:%=%.s) clean: rm -f *.compcert - rm -f *.parsed.c *.compcert.c *.light.c *.s *.o *~ + rm -f *.parsed.c *.compcert.c *.light.c *.s *.o *.sdump *~ test: @for i in $(TESTS) $(TESTS_COMP); do \ @@ -81,3 +85,10 @@ test: done bench: + +ccheck: + @for i in $(TESTS) $(TESTS_COMP); do \ + echo "---- $$i"; \ + $(CCHECK) -exe $$i.compcert $$i.sdump; \ + done + diff --git a/test/spass/Makefile b/test/spass/Makefile index 6797475..6a4cd59 100644 --- a/test/spass/Makefile +++ b/test/spass/Makefile @@ -2,6 +2,10 @@ include ../../Makefile.config CC=../../ccomp CFLAGS=-stdlib ../../runtime -dparse -dclight -dasm -fstruct-return +ifeq ($(CCHECKLINK),true) +CCHECK=../../cchecklink +CFLAGS+= -sdump +endif SRCS=analyze.c clause.c clock.c closure.c cnf.c component.c \ condensing.c context.c defs.c dfgparser.c dfgscanner.c doc-proof.c \ @@ -19,7 +23,7 @@ spass: $(SRCS:.c=.o) clean: rm -f spass - rm -f *.o *.s *.parsed.c *.light.c + rm -f *.o *.s *.parsed.c *.light.c *.sdump test: ./spass small_problem.dfg | grep 'Proof found' @@ -30,6 +34,10 @@ TIME=xtime -o /dev/null # Xavier's hack bench: @echo -n "spass: "; $(TIME) ./spass problem.dfg +ccheck: + @echo "---- spass" + @$(CCHECK) -exe spass *.sdump + depend: gcc -MM $(SRCS) > .depend -- cgit v1.2.3