From 04d0d602ef7245fd566debd91bcb148acd9ed067 Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Mon, 28 Jul 2014 12:13:15 +0000
Subject: PowerPC port: refactored the expansion of built-in functions and
 pseudo-instructions so that it does not need to be re-done in cchecklink.
 cchecklink: updated accordingly. testsuite: compile with -sdump and run
 cchecklink if supported.

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2553 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 Changelog                 |    4 +
 arm/Asmexpand.ml          |   18 +
 checklink/Asm_printers.ml |   47 +-
 checklink/Check.ml        | 1164 +++++++++++++--------------------------------
 checklink/Frameworks.ml   |    8 +-
 checklink/Fuzz.ml         |   19 +-
 driver/Driver.ml          |    5 +-
 extraction/extraction.v   |    1 +
 ia32/Asmexpand.ml         |   18 +
 powerpc/Asm.v             |   99 +++-
 powerpc/Asmexpand.ml      |  525 ++++++++++++++++++++
 powerpc/Asmgen.v          |    3 +-
 powerpc/Asmgenproof.v     |   24 +-
 powerpc/PrintAsm.ml       |  567 ++++------------------
 test/Makefile             |    3 +
 test/c/Makefile           |   12 +-
 test/compression/Makefile |   16 +-
 test/raytracer/Makefile   |   11 +-
 test/regression/Makefile  |   13 +-
 test/spass/Makefile       |   10 +-
 20 files changed, 1200 insertions(+), 1367 deletions(-)
 create mode 100644 arm/Asmexpand.ml
 create mode 100644 ia32/Asmexpand.ml
 create mode 100644 powerpc/Asmexpand.ml

diff --git a/Changelog b/Changelog
index 604618c..8a2af33 100644
--- a/Changelog
+++ b/Changelog
@@ -22,6 +22,10 @@
   registers that can contain data of unknown types (e.g. float32 or float64)
   but known sizes.
 
+- PowerPC port: refactored the expansion of built-in functions and
+  pseudo-instructions so that it does not need to be re-done in
+  cchecklink.  Updated the cchecklink validator accordingly.
+
 
 Release 2.3pl2, 2014-05-15
 ==========================
diff --git a/arm/Asmexpand.ml b/arm/Asmexpand.ml
new file mode 100644
index 0000000..4baaac3
--- /dev/null
+++ b/arm/Asmexpand.ml
@@ -0,0 +1,18 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Expanding built-ins and some pseudo-instructions by rewriting
+   of the ARM assembly code.  Currently not done, this expansion
+   is performed on the fly in PrintAsm. *)
+
+let expand_program p = p
+
diff --git a/checklink/Asm_printers.ml b/checklink/Asm_printers.ml
index 9bb2009..1f737c2 100644
--- a/checklink/Asm_printers.ml
+++ b/checklink/Asm_printers.ml
@@ -88,6 +88,7 @@ let string_of_preg = function
 | CR0_1         -> "CR0_1"
 | CR0_2         -> "CR0_2"
 | CR0_3         -> "CR0_3"
+| CR1_2         -> "CR1_2"
 
 let string_of_external_function e =
   match e with
@@ -117,8 +118,19 @@ let string_of_crbit = function
 | CRbit_1    -> "CRbit_1"
 | CRbit_2    -> "CRbit_2"
 | CRbit_3    -> "CRbit_3"
+| CRbit_6    -> "CRbit_6"
 
-let string_of_memory_chunk mc = "MEMORY_CHUNK"
+let string_of_memory_chunk = function
+  | Mint8signed -> "int8s"
+  | Mint8unsigned -> "int8u"
+  | Mint16signed -> "int16s"
+  | Mint16unsigned -> "int16u"
+  | Mint32 -> "int32"
+  | Mint64 -> "int64"
+  | Mfloat32 -> "float32"
+  | Mfloat64 -> "float64"
+  | Many32 -> "any32"
+  | Many64 -> "any64"
 
 let string_of_annot_param = function
 | APreg (p0)-> "APreg(" ^ string_of_preg p0 ^ ")"
@@ -126,6 +138,7 @@ let string_of_annot_param = function
 
 let string_of_instruction = function
 | Padd      (i0, i1, i2)        -> "Padd(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Paddc     (i0, i1, i2)        -> "Paddc(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Padde     (i0, i1, i2)        -> "Padde(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Paddi     (i0, i1, c2)        -> "Paddi(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")"
 | Paddic    (i0, i1, c2)        -> "Paddic(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")"
@@ -139,6 +152,7 @@ let string_of_instruction = function
 | Pb        (l0)                -> "Pb(" ^ string_of_label l0 ^ ")"
 | Pbctr sg                      -> "Pbctr"
 | Pbctrl sg                     -> "Pbctrl"
+| Pbdnz     (l1)                -> "Pbdnz(" ^ string_of_label l1 ^ ")"
 | Pbf       (c0, l1)            -> "Pbf(" ^ string_of_crbit c0 ^ ", " ^ string_of_label l1 ^ ")"
 | Pbl       (i0, sg)            -> "Pbl(" ^ string_of_ident i0 ^ ")"
 | Pbs       (i0, sg)            -> "Pbs(" ^ string_of_ident i0 ^ ")"
@@ -149,9 +163,13 @@ let string_of_instruction = function
 | Pcmplwi   (i0, c1)            -> "Pcmplwi(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ")"
 | Pcmpw     (i0, i1)            -> "Pcmpw(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")"
 | Pcmpwi    (i0, c1)            -> "Pcmpwi(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ")"
+| Pcntlz    (i0, i1)            -> "Pcntlz(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")"
+| Pcreqv    (c0, c1, c2)        -> "Pcreqv(" ^ string_of_crbit c0 ^ ", " ^ string_of_crbit c1 ^ ", " ^ string_of_crbit c2 ^ ")"
 | Pcror     (c0, c1, c2)        -> "Pcror(" ^ string_of_crbit c0 ^ ", " ^ string_of_crbit c1 ^ ", " ^ string_of_crbit c2 ^ ")"
+| Pcrxor    (c0, c1, c2)        -> "Pcrxor(" ^ string_of_crbit c0 ^ ", " ^ string_of_crbit c1 ^ ", " ^ string_of_crbit c2 ^ ")"
 | Pdivw     (i0, i1, i2)        -> "Pdivw(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pdivwu    (i0, i1, i2)        -> "Pdivwu(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Peieio                        -> "Peieio"
 | Peqv      (i0, i1, i2)        -> "Peqv(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pextsb    (i0, i1)            -> "Pextsb(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")"
 | Pextsh    (i0, i1)            -> "Pextsh(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")"
@@ -162,6 +180,8 @@ let string_of_instruction = function
 | Pfadds    (f0, f1, f2)        -> "Pfadds(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")"
 | Pfcmpu    (f0, f1)            -> "Pfcmpu(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")"
 | Pfcti     (i0, f1)            -> "Pfcti(" ^ string_of_ireg i0 ^ ", " ^ string_of_freg f1 ^ ")"
+| Pfctiw    (f0, f1)            -> "Pfctiw(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")"
+| Pfctiwz   (f0, f1)            -> "Pfctiwz(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")"
 | Pfdiv     (f0, f1, f2)        -> "Pfdiv(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")"
 | Pfdivs    (f0, f1, f2)        -> "Pfdivs(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")"
 | Pfmake    (f0, i1, i2)        -> "Pfmake(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
@@ -174,6 +194,15 @@ let string_of_instruction = function
 | Pfxdp     (f0, f1)            -> "Pfxdp(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")"
 | Pfsub     (f0, f1, f2)        -> "Pfsub(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")"
 | Pfsubs    (f0, f1, f2)        -> "Pfsubs(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ")"
+| Pfmadd    (f0, f1, f2, f3)    -> "Pfmadd(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")"
+| Pfmsub    (f0, f1, f2, f3)    -> "Pfmsub(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")"
+| Pfnmadd   (f0, f1, f2, f3)    -> "Pfnmadd(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")"
+| Pfnmsub   (f0, f1, f2, f3)    -> "Pfnmsub(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")"
+| Pfsqrt    (f0, f1)            -> "Pfsqrt(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")"
+| Pfrsqrte  (f0, f1)            -> "Pfrsqrte(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")"
+| Pfres     (f0, f1)            -> "Pfres(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ")"
+| Pfsel     (f0, f1, f2, f3)    -> "Pfsel(" ^ string_of_freg f0 ^ ", " ^ string_of_freg f1 ^ ", " ^ string_of_freg f2 ^ ", " ^ string_of_freg f3 ^ ")"
+| Pisync                        -> "Pisync"
 | Plbz      (i0, c1, i2)        -> "Plbz(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plbzx     (i0, i1, i2)        -> "Plbzx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plfd      (f0, c1, i2)        -> "Plfd(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
@@ -184,14 +213,18 @@ let string_of_instruction = function
 | Plfsx     (f0, i1, i2)        -> "Plfsx(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plha      (i0, c1, i2)        -> "Plha(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plhax     (i0, i1, i2)        -> "Plhax(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Plhbrx    (i0, i1, i2)        -> "Plhbrx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plhz      (i0, c1, i2)        -> "Plhz(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plhzx     (i0, i1, i2)        -> "Plhzx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plfi      (f0, f1)            -> "Plfi(" ^ string_of_freg f0 ^ ", " ^ string_of_ffloat f1 ^ ")"
 | Plfis     (f0, f1)            -> "Plfis(" ^ string_of_freg f0 ^ ", " ^ string_of_ffloat32 f1 ^ ")"
+| Plwbrx    (i0, i1, i2)        -> "Plwbrx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plwz      (i0, c1, i2)        -> "Plwz(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Plwzu     (i0, c1, i2)        -> "Plwzu(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plwz_a    (i0, c1, i2)        -> "Plwz_a(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plwzx     (i0, i1, i2)        -> "Plwzx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Plwzx_a   (i0, i1, i2)        -> "Plwzx_a(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Pmfcr     (i0)                -> "Pmfcr(" ^ string_of_ireg i0 ^ ")"
 | Pmfcrbit  (i0, c1)            -> "Pmfcrbit(" ^ string_of_ireg i0 ^ ", " ^ string_of_crbit c1 ^ ")"
 | Pmflr     (i0)                -> "Pmflr(" ^ string_of_ireg i0 ^ ")"
 | Pmr       (i0, i1)            -> "Pmr(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")"
@@ -217,32 +250,42 @@ let string_of_instruction = function
 | Pstbx     (i0, i1, i2)        -> "Pstbx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstfd     (f0, c1, i2)        -> "Pstfd(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstfd_a   (f0, c1, i2)        -> "Pstfd_a(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Pstfdu    (f0, c1, i2)        -> "Pstfdu(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstfdx    (f0, i1, i2)        -> "Pstfdx(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstfdx_a  (f0, i1, i2)        -> "Pstfdx_a(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstfs     (f0, c1, i2)        -> "Pstfs(" ^ string_of_freg f0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstfsx    (f0, i1, i2)        -> "Pstfsx(" ^ string_of_freg f0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Psth      (i0, c1, i2)        -> "Psth(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Psthx     (i0, i1, i2)        -> "Psthx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Psthbrx   (i0, i1, i2)        -> "Psthbrx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstw      (i0, c1, i2)        -> "Pstw(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstw_a    (i0, c1, i2)        -> "Pstw_a(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Pstwu     (i0, c1, i2)        -> "Pstwu(" ^ string_of_ireg i0 ^ ", " ^ string_of_constant c1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstwx     (i0, i1, i2)        -> "Pstwx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pstwx_a   (i0, i1, i2)        -> "Pstwx_a(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Pstwxu    (i0, i1, i2)        -> "Pstwxu(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Pstwbrx   (i0, i1, i2)        -> "Pstwbrx(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Psubfc    (i0, i1, i2)        -> "Psubfc(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Psubfe    (i0, i1, i2)        -> "Psubfe(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
+| Psubfze   (i0, i1)            -> "Psubfze(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ")"
 | Psubfic   (i0, i1, c2)        -> "Psubfic(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")"
+| Psync                         -> "Psync"
+| Ptrap                         -> "Ptrap"
 | Pxor      (i0, i1, i2)        -> "Pxor(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_ireg i2 ^ ")"
 | Pxori     (i0, i1, c2)        -> "Pxori(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")"
 | Pxoris    (i0, i1, c2)        -> "Pxoris(" ^ string_of_ireg i0 ^ ", " ^ string_of_ireg i1 ^ ", " ^ string_of_constant c2 ^ ")"
 | Plabel    (l0)                -> "Plabel(" ^ string_of_label l0 ^ ")"
 | Pbuiltin  (e0, p1, p2)        -> "Pbuiltin(" ^ string_of_external_function e0 ^ ", " ^ string_of_list string_of_preg ", " p1 ^ ", " ^ string_of_list string_of_preg ", " p2 ^ ")"
 | Pannot    (e0, a1)            -> "Pannot(" ^ string_of_external_function e0 ^ ", " ^ string_of_list string_of_annot_param ", " a1 ^ ")"
+| Pcfi_adjust n                 -> "Pcfi_adjust(" ^ string_of_coq_Z n ^ ")"
+| Pcfi_rel_offset n             -> "Pcfi_rel_offset(" ^ string_of_coq_Z n ^ ")"
 
 let string_of_init_data = function
 | Init_int8(i)    -> "Init_int8("    ^ string_of_int (z_int_lax i)  ^ ")"
 | Init_int16(i)   -> "Init_int16("   ^ string_of_int (z_int_lax i)  ^ ")"
 | Init_int32(i)   -> "Init_int32("   ^ string_of_int32i (z_int32 i) ^ ")"
 | Init_int64(i)   -> "Init_int64("   ^ string_of_int64i (z_int64 i) ^ ")"
-| Init_float32(f) -> "Init_float32(" ^ string_of_ffloat f           ^ ")"
+| Init_float32(f) -> "Init_float32(" ^ string_of_ffloat32 f         ^ ")"
 | Init_float64(f) -> "Init_float64(" ^ string_of_ffloat f           ^ ")"
 | Init_space(z)   -> "Init_space("   ^ string_of_int (z_int z)      ^ ")"
 | Init_addrof(ident, ofs) ->
diff --git a/checklink/Check.ml b/checklink/Check.ml
index 9f842b8..7eb3ea3 100644
--- a/checklink/Check.ml
+++ b/checklink/Check.ml
@@ -68,8 +68,8 @@ let check_st_bind atom (sym: elf32_sym): s_framework -> s_framework =
 (** Adapted from CompCert *)
 let name_of_section_Linux: section_name -> string = function
 | Section_text -> ".text"
-| Section_data i -> if i then ".data" else ".bss"
-| Section_small_data i -> if i then ".sdata" else ".sbss"
+| Section_data i -> if i then ".data" else "COMM"
+| Section_small_data i -> if i then ".sdata" else "COMM"
 | Section_const -> ".rodata"
 | Section_small_const -> ".sdata2"
 | Section_string -> ".rodata"
@@ -277,10 +277,12 @@ let freg_arr: freg array =
     FPR23; FPR24; FPR25; FPR26; FPR27; FPR28; FPR29; FPR30; FPR31
   |]
 
-let crbit_arr: crbit array =
-  [|
-    CRbit_0; CRbit_1; CRbit_2; CRbit_3
-  |]
+let num_crbit = function
+  | CRbit_0 -> 0
+  | CRbit_1 -> 1
+  | CRbit_2 -> 2
+  | CRbit_3 -> 3
+  | CRbit_6 -> 6
 
 type checker = f_framework -> f_framework or_err
 let check (cond: bool) (msg: string): checker =
@@ -301,22 +303,20 @@ let match_int32s a b: checker =
   ) a b
 (** We compare floats by their bit representation, so that 0.0 and -0.0 are
     different. *)
-let match_floats (a: Floats.float) (b: float): checker =
-  let a = Int64.bits_of_float (camlfloat_of_coqfloat a) in
-  let b = Int64.bits_of_float b in
+let match_floats (a: Floats.float) (b: int64): checker =
+  let a = camlint64_of_coqint (Floats.Float.to_bits a) in
   check_eq (
     Printf.sprintf "match_floats %s %s" (string_of_int64 a) (string_of_int64 b)
   ) a b
-let match_floats32 (a: Floats.float32) (b: float): checker =
-  let a = Int64.bits_of_float (camlfloat_of_coqfloat32 a) in
-  let b = Int64.bits_of_float b in
+let match_floats32 (a: Floats.float32) (b: int32): checker =
+  let a = camlint_of_coqint (Floats.Float32.to_bits a) in
   check_eq (
-    Printf.sprintf "match_floats %s %s" (string_of_int64 a) (string_of_int64 b)
+    Printf.sprintf "match_floats %s %s" (string_of_int32 a) (string_of_int32 b)
   ) a b
 let match_crbits cb eb =
-  let eb = crbit_arr.(eb) in
+  let cb = num_crbit cb in
   check_eq (
-    Printf.sprintf "match_crbits %s %s" (string_of_crbit cb) (string_of_crbit eb)
+    Printf.sprintf "match_crbits %d %d" cb eb
   ) cb eb
 let match_iregs  cr er =
   let er = ireg_arr.(er) in
@@ -575,195 +575,6 @@ let rec match_jmptbl lbllist vaddr size ffw =
       end
   end
 
-(** Matches [ecode] against the expected CR6 magic before a function call.
-*)
-let match_set_cr6 sg ecode =
-  if sg.sig_cc.cc_vararg then
-    if List.mem Tfloat sg.sig_args then
-      match ecode with
-      | CREQV(6, 6, 6) :: ecode' -> Some ecode'
-      | _ -> None
-    else
-      match ecode with
-      | CRXOR(6, 6, 6) :: ecode' -> Some ecode'
-      | _ -> None
-  else Some ecode
-
-(** Matches [ecode] agains the expected code for a small memory copy
-    pseudo-instruction. Returns a triple containing the updated framework,
-    the remaining ELF code, and the updated program counter.
-*)
-let match_memcpy_small ecode pc sz al src dst (fw: f_framework)
-    : (f_framework * ecode * int32) or_err =
-  let error = ERR("match_memcpy_small") in
-  let rec match_memcpy_small_aux ofs sz ecode pc (fw: f_framework) =
-    let ofs32 = Safe32.of_int ofs in
-    if sz >= 8 && al >= 4
-    then (
-      match ecode with
-      |   LFD (frD0, rA0, d0) ::
-          STFD(frS1, rA1, d1) :: es ->
-          OK(fw)
-          >>= match_fregs  FPR13 frD0
-          >>= match_iregs  src   rA0
-          >>= match_int32s ofs32 (exts d0)
-          >>= match_fregs  FPR13 frS1
-          >>= match_iregs  dst   rA1
-          >>= match_int32s ofs32 (exts d1)
-          >>= match_memcpy_small_aux (ofs + 8) (sz - 8) es (Int32.add 8l pc)
-      | _ -> error
-    )
-    else if sz >= 4
-    then (
-      match ecode with
-      |   LWZ(rD0, rA0, d0) ::
-          STW(rS1, rA1, d1) :: es ->
-          OK(fw)
-          >>= match_iregs  GPR0  rD0
-          >>= match_iregs  src   rA0
-          >>= match_int32s ofs32 (exts d0)
-          >>= match_iregs  GPR0  rS1
-          >>= match_iregs  dst   rA1
-          >>= match_int32s ofs32 (exts d0)
-          >>= match_memcpy_small_aux (ofs + 4) (sz - 4) es (Int32.add 8l pc)
-      | _ -> error
-    )
-    else if sz >= 2
-    then (
-      match ecode with
-      |   LHZ(rD0, rA0, d0) ::
-          STH(rS1, rA1, d1) :: es ->
-          OK(fw)
-          >>= match_iregs  GPR0  rD0
-          >>= match_iregs  src   rA0
-          >>= match_int32s ofs32 (exts d0)
-          >>= match_iregs  GPR0  rS1
-          >>= match_iregs  dst   rA1
-          >>= match_int32s ofs32 (exts d0)
-          >>= match_memcpy_small_aux (ofs + 2) (sz - 2) es (Int32.add 8l pc)
-      | _ -> error
-    )
-    else if sz >= 1
-    then (
-      match ecode with
-      |   LBZ(rD0, rA0, d0) ::
-          STB(rS1, rA1, d1) :: es ->
-          OK(fw)
-          >>= match_iregs  GPR0  rD0
-          >>= match_iregs  src   rA0
-          >>= match_int32s ofs32 (exts d0)
-          >>= match_iregs  GPR0  rS1
-          >>= match_iregs  dst   rA1
-          >>= match_int32s ofs32 (exts d0)
-          >>= match_memcpy_small_aux (ofs + 1) (sz - 1) es (Int32.add 8l pc)
-      | _ -> error
-    )
-    else OK(fw, ecode, pc)
-  in match_memcpy_small_aux 0 sz ecode pc fw
-
-(** Matches [ecode] agains the expected code for a big memory copy
-    pseudo-instruction. Returns a triple containing the updated framework,
-    the remaining ELF code, and the updated program counter.
-*)
-let match_memcpy_big ecode pc sz al src dst fw
-    : (f_framework * ecode * int32) or_err =
-  let error = ERR("match_memcpy_big") in
-  match ecode with
-  |   ADDI (rD0, rA0, simm0)           :: (* pc *)
-      MTSPR(rS1, spr1)                 ::
-      ADDI (rD2, rA2, simm2)           ::
-      ADDI (rD3, rA3, simm3)           ::
-      LWZU (rD4, rA4, d4)              :: (* pc + 16 <-  *)
-      STWU (rS5, rA5, d5)              :: (*           | *)
-      BCx  (bo6,  bi6,  bd6, aa6, lk6) :: (* pc + 24 --  *)
-      es ->
-      let sz' = Safe32.of_int (sz / 4) in
-      let (s, d) = if dst <> GPR11 then (GPR11, GPR12) else (GPR12, GPR11) in
-      let target_vaddr = Int32.(add 16l pc) in
-      let dest_vaddr = Int32.(add (add 24l pc) (mul 4l (exts bd6))) in
-      OK(fw)
-      >>= match_iregs  GPR0  rD0
-      >>= match_iregs  GPR0  rA0
-      >>= match_int32s sz'   (exts simm0)
-      >>= match_iregs  GPR0  rS1
-      >>= match_ctr    spr1
-      >>= match_iregs  s     rD2
-      >>= match_iregs  src   rA2
-      >>= match_int32s (-4l) (exts simm2)
-      >>= match_iregs  d     rD3
-      >>= match_iregs  dst   rA3
-      >>= match_int32s (-4l) (exts simm3)
-      >>= match_iregs  GPR0  rD4
-      >>= match_iregs  s     rA4
-      >>= match_int32s 4l    (exts d4)
-      >>= match_iregs  GPR0  rS5
-      >>= match_iregs  d     rA5
-      >>= match_int32s 4l    (exts d5)
-      >>= (fun ffw ->
-        bitmatch bo6 with
-        | { 16:5:int } -> OK(ffw)
-        | { _ }        -> ERR("bitmatch bo")
-      )
-      >>= match_ints   bi6   0
-      >>= match_int32s dest_vaddr target_vaddr
-      >>= match_bools  false aa6
-      >>= match_bools  false lk6
-      >>= (fun fw ->
-        match sz land 3 with
-        | 1 ->
-            begin match es with
-            |   LBZ(rD0, rA0, d0) ::
-                STB(rS1, rA1, d1) :: es ->
-                OK(fw)
-                >>= match_iregs  GPR0 rD0
-                >>= match_iregs  s    rA0
-                >>= match_int32s 4l   (exts d0)
-                >>= match_iregs  GPR0 rS1
-                >>= match_iregs  d    rA1
-                >>= match_int32s 4l   (exts d1)
-                >>= (fun fw -> OK(fw, es, Int32.add 36l pc))
-            | _ -> error
-            end
-        | 2 ->
-            begin match es with
-            |   LHZ(rD0, rA0, d0) ::
-                STH(rS1, rA1, d1) :: es ->
-                OK(fw)
-                >>= match_iregs  GPR0 rD0
-                >>= match_iregs  s    rA0
-                >>= match_int32s 4l   (exts d0)
-                >>= match_iregs  GPR0 rS1
-                >>= match_iregs  d    rA1
-                >>= match_int32s 4l   (exts d1)
-                >>= (fun fw -> OK(fw, es , Int32.add 36l pc))
-            | _ -> error
-            end
-        | 3 ->
-            begin match es with
-            |   LHZ(rD0, rA0, d0) ::
-                STH(rS1, rA1, d1) ::
-                LBZ(rD2, rA2, d2) ::
-                STB(rS3, rA3, d3) :: es ->
-                OK(fw)
-                >>= match_iregs  GPR0 rD0
-                >>= match_iregs  s    rA0
-                >>= match_int32s 4l   (exts d0)
-                >>= match_iregs  GPR0 rS1
-                >>= match_iregs  d    rA1
-                >>= match_int32s 4l   (exts d1)
-                >>= match_iregs  GPR0 rD2
-                >>= match_iregs  s    rA2
-                >>= match_int32s 6l   (exts d2)
-                >>= match_iregs  GPR0 rS3
-                >>= match_iregs  d    rA3
-                >>= match_int32s 6l   (exts d3)
-                >>= (fun fw -> OK(fw, es, Int32.add 44l pc))
-            | _ -> error
-            end
-        | _ -> OK(fw, es, Int32.add 28l pc)
-      )
-  | _ -> error
-
 let match_bo_bt_bool bo =
   bitmatch bo with
   | { false:1; true:1; true:1; false:1; false:1 } -> true
@@ -774,6 +585,11 @@ let match_bo_bf_bool bo =
   | { false:1; false:1; true:1; false:1; false:1 } -> true
   | { _ } -> false
 
+let match_bo_bdnz_bool bo =
+  bitmatch bo with
+  | { true:1; false:1; false:1; false:1; false:1 } -> true
+  | { _ } -> false
+
 let match_bo_bt bo: checker = fun ffw ->
   bitmatch bo with
   | { false:1; true:1; true:1; false:1; false:1 } -> OK(ffw)
@@ -869,6 +685,18 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Paddc(rd, r1, r2) ->
+          begin match ecode with
+          | ADDCx(rD, rA, rB, oe, rc) :: es ->
+              OK(fw)
+              >>= match_iregs rd    rD
+              >>= match_iregs r1    rA
+              >>= match_iregs r2    rB
+              >>= match_bools false oe
+              >>= match_bools false rc
+              >>= recur_simpl
+          | _ -> error
+          end
       | Padde(rd, r1, r2) ->
           begin match ecode with
           | ADDEx(rD, rA, rB, oe, rc) :: es ->
@@ -931,34 +759,7 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
-      | Pallocframe(sz, ofs) ->
-          begin match ecode with
-          | STWU(rS, rA, d) :: es ->
-              OK(fw)
-              >>= match_iregs   GPR1 rS
-              >>= match_iregs   GPR1 rA
-              >>= match_z_int32 sz   (Int32.neg (exts d))
-              >>= match_z_int32 ofs  0l
-              >>= recur_simpl
-          |   ADDIS   (rD0, rA0, simm0)  ::
-              ORI     (rS1, rA1, uimm1)  ::
-              STWUX   (rS2, rA2, rB2)    :: es ->
-              let sz32 = Int32.neg (z_int32 sz) in
-              let sz_hi = Int32.shift_right_logical sz32 16 in
-              let sz_lo = Int32.logand sz32 0xFFFFl in
-              OK(fw)
-              >>= match_iregs  GPR12 rD0
-              >>= match_iregs  GPR0  rA0
-              >>= match_int32s sz_hi (Safe32.of_int simm0)
-              >>= match_iregs  GPR12 rS1
-              >>= match_iregs  GPR12 rA1
-              >>= match_int32s sz_lo (Safe32.of_int uimm1)
-              >>= match_iregs  GPR1  rS2
-              >>= match_iregs  GPR1  rA2
-              >>= match_iregs  GPR12 rB2
-              >>= compare_code cs es (Int32.add 12l pc)
-          | _ -> error
-          end
+      | Pallocframe(sz, ofs) -> error
       | Pandc(rd, r1, r2) ->
           begin match ecode with
           | ANDCx(rS, rA, rB, rc) :: es ->
@@ -1016,8 +817,8 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
           | _ -> error
           end
       | Pbctr sg ->
-          begin match match_set_cr6 sg ecode with
-          | Some(BCCTRx(bo, bi, lk) :: es) ->
+          begin match ecode with
+          | BCCTRx(bo, bi, lk) :: es ->
               OK(fw)
               >>= match_bo_ctr bo
               >>= match_ints  0     bi
@@ -1026,8 +827,8 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
           | _ -> error
           end
       | Pbctrl sg ->
-          begin match match_set_cr6 sg ecode with
-          | Some(BCCTRx(bo, bi, lk) :: es) ->
+          begin match ecode with
+          | BCCTRx(bo, bi, lk) :: es ->
               OK(fw)
               >>= match_bo_ctr bo
               >>= match_ints  0    bi
@@ -1035,6 +836,18 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Pbdnz(lbl) ->
+          begin match ecode with
+          | BCx (bo,  bi,  bd, aa, lk) :: es when match_bo_bdnz_bool bo ->
+              let lblvaddr = Int32.(add pc (mul 4l (exts bd))) in
+              OK(fw)
+              >>= match_ints 0 bi
+              >>= lblmap_unify lbl lblvaddr     
+              >>= match_bools  false aa
+              >>= match_bools  false lk
+              >>= recur_simpl
+          | _ -> error
+          end
       | Pbf(bit, lbl) ->
           begin match ecode with
           | BCx(bo, bi, bd, aa, lk) :: es when match_bo_bf_bool bo ->
@@ -1064,8 +877,8 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
           | _ -> error
           end
       | Pbl(ident, sg) ->
-          begin match match_set_cr6 sg ecode with
-          | Some(Bx(li, aa, lk) :: es) ->
+          begin match ecode with
+          | Bx(li, aa, lk) :: es ->
               let dest = Int32.(add pc (mul 4l (exts li))) in
               OK(fw)
               >>= (ff_sf ^%=? idmap_unify ident dest)
@@ -1085,8 +898,8 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
           | _ -> error
           end
       | Pbs(ident, sg) ->
-          begin match match_set_cr6 sg ecode with
-          | Some(Bx(li, aa, lk) :: es) ->
+          begin match ecode with
+          | Bx(li, aa, lk) :: es ->
               let dest = Int32.(add pc (mul 4l (exts li))) in
               OK(fw)
               >>= match_bools false aa
@@ -1156,386 +969,12 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
           end
       | Pbuiltin(ef, args, res) ->
           begin match ef with
-          | EF_builtin(name, sg) ->
-              begin match Hashtbl.find
-                  (fw |. ff_sf).ident_to_name name, args, res with
-                  | "__builtin_mulhw", [IR a1; IR a2], [IR res] ->
-                      begin match ecode with
-                      | MULHWx(rD, rA, rB, rc) :: es ->
-                          OK(fw)
-                          >>= match_iregs res   rD
-                          >>= match_iregs a1    rA
-                          >>= match_iregs a2    rB
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_mulhwu", [IR a1; IR a2], [IR res] ->
-                      begin match ecode with
-                      | MULHWUx(rD, rA, rB, rc) :: es ->
-                          OK(fw)
-                          >>= match_iregs res   rD
-                          >>= match_iregs a1    rA
-                          >>= match_iregs a2    rB
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_cntlz", [IR a1], [IR res] ->
-                      begin match ecode with
-                      | CNTLZWx(rS, rA, rc) :: es ->
-                          OK(fw)
-                          >>= match_iregs a1    rS
-                          >>= match_iregs res   rA
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | ("__builtin_bswap"|"__builtin_bswap32"), [IR a1], [IR res] ->
-                      begin match ecode with
-                      |   STWU (rS0, rA0, d0)    ::
-                          LWBRX(rD1, rA1, rB1)   ::
-                          ADDI (rD2, rA2, simm2) :: es ->
-                          OK(fw)
-                          >>= match_iregs  a1    rS0
-                          >>= match_iregs  GPR1  rA0
-                          >>= match_int32s (-8l) (exts d0)
-                          >>= match_iregs  res   rD1
-                          >>= match_iregs  GPR0  rA1
-                          >>= match_iregs  GPR1  rB1
-                          >>= match_iregs  GPR1  rD2
-                          >>= match_iregs  GPR1  rA2
-                          >>= match_int32s 8l    (exts simm2)
-                          >>= compare_code cs es (Int32.add 12l pc)
-                      | _ -> error
-                      end
-                  | "__builtin_bswap16", [IR a1], [IR res] ->
-                      begin match ecode with
-                      |   RLWINMx(rS1, rA1, sh1, mb1, me1, rc1) ::
-                          RLWINMx(rS2, rA2, sh2, mb2, me2, rc2) ::
-                          ORx(rS3, rA3, rB3, rc3) :: es ->
-                          OK(fw)
-                          >>= match_iregs  GPR0  rS1
-                          >>= match_iregs  a1    rA1
-                          >>= check_eq "bswap16-1" sh1 8
-                          >>= check_eq "bswap16-2" mb1 16
-                          >>= check_eq "bswap16-3" me1 23
-                          >>= match_iregs  res   rS2
-                          >>= match_iregs  a1    rA2
-                          >>= check_eq "bswap16-4" sh2 24
-                          >>= check_eq "bswap16-5" mb2 24
-                          >>= check_eq "bswap16-6" me2 31
-                          >>= match_iregs  res   rS3
-                          >>= match_iregs  GPR0  rA3
-                          >>= match_iregs  res   rB3
-                          >>= compare_code cs es (Int32.add 12l pc)
-                      | _ -> error
-                      end
-                  | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] ->
-                      begin match ecode with
-                      | FMADDx(frD, frA, frB, frC, rc) :: es ->
-                          OK(fw)
-                          >>= match_fregs res frD
-                          >>= match_fregs a1 frA
-                          >>= match_fregs a3 frB
-                          >>= match_fregs a2 frC
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] ->
-                      begin match ecode with
-                      | FMSUBx(frD, frA, frB, frC, rc) :: es ->
-                          OK(fw)
-                          >>= match_fregs res frD
-                          >>= match_fregs a1 frA
-                          >>= match_fregs a3 frB
-                          >>= match_fregs a2 frC
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] ->
-                      begin match ecode with
-                      | FNMADDx(frD, frA, frB, frC, rc) :: es ->
-                          OK(fw)
-                          >>= match_fregs res frD
-                          >>= match_fregs a1 frA
-                          >>= match_fregs a3 frB
-                          >>= match_fregs a2 frC
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] ->
-                      begin match ecode with
-                      | FNMSUBx(frD, frA, frB, frC, rc) :: es ->
-                          OK(fw)
-                          >>= match_fregs res frD
-                          >>= match_fregs a1 frA
-                          >>= match_fregs a3 frB
-                          >>= match_fregs a2 frC
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_fabs", [FR a1], [FR res] ->
-                      begin match ecode with
-                      | FABSx(frD, frB, rc) :: es ->
-                          OK(fw)
-                          >>= match_fregs res   frD
-                          >>= match_fregs a1    frB
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_fsqrt", [FR a1], [FR res] ->
-                      begin match ecode with
-                      | FSQRTx(frD, frB, rc) :: es ->
-                          OK(fw)
-                          >>= match_fregs res   frD
-                          >>= match_fregs a1    frB
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_frsqrte", [FR a1], [FR res] ->
-                      begin match ecode with
-                      | FRSQRTEx(frD, frB, rc) :: es ->
-                          OK(fw)
-                          >>= match_fregs res   frD
-                          >>= match_fregs a1    frB
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_fres", [FR a1], [FR res] ->
-                      begin match ecode with
-                      | FRESx(frD, frB, rc) :: es ->
-                          OK(fw)
-                          >>= match_fregs res   frD
-                          >>= match_fregs a1    frB
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] ->
-                      begin match ecode with
-                      | FSELx(frD, frA, frB, frC, rc) :: es ->
-                          OK(fw)
-                          >>= match_fregs res frD
-                          >>= match_fregs a1 frA
-                          >>= match_fregs a3 frB
-                          >>= match_fregs a2 frC
-                          >>= match_bools false rc
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_fcti", [FR r1], [IR rd] ->
-                      begin match ecode with
-                      | FCTIWx(frD0, frB0, rc0)   ::
-                        STFDU  (frS1, rA1,  d1)    ::
-                        LWZ    (rD2,  rA2,  d2)    ::
-                        ADDI   (rD3,  rA3,  simm3) :: es ->
-                          OK(fw)
-                          >>= match_fregs  FPR13 frD0
-                          >>= match_fregs  r1    frB0
-                          >>= match_bools  false rc0
-                          >>= match_fregs  FPR13 frS1
-                          >>= match_iregs  GPR1  rA1
-                          >>= match_int32s (-8l) (exts d1)
-                          >>= match_iregs  rd    rD2
-                          >>= match_iregs  GPR1  rA2
-                          >>= match_int32s 4l    (exts d2)
-                          >>= match_iregs  GPR1  rD3
-                          >>= match_iregs  GPR1  rA3
-                          >>= match_int32s 8l    (exts simm3)
-                          >>= compare_code cs es (Int32.add 16l pc)
-                      | _ -> error
-                      end
-                  | "__builtin_read16_reversed", [IR a1], [IR res] ->
-                      begin match ecode with
-                      | LHBRX(rD, rA, rB):: es ->
-                          OK(fw)
-                          >>= match_iregs res  rD
-                          >>= match_iregs GPR0 rA
-                          >>= match_iregs a1   rB
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_read32_reversed", [IR a1], [IR res] ->
-                      begin match ecode with
-                      | LWBRX(rD, rA, rB) :: es ->
-                          OK(fw)
-                          >>= match_iregs res  rD
-                          >>= match_iregs GPR0 rA
-                          >>= match_iregs a1   rB
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_write16_reversed", [IR a1; IR a2], _ ->
-                      begin match ecode with
-                      | STHBRX(rS, rA, rB) :: es ->
-                          OK(fw)
-                          >>= match_iregs a2   rS
-                          >>= match_iregs GPR0 rA
-                          >>= match_iregs a1   rB
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_write32_reversed", [IR a1; IR a2], _ ->
-                      begin match ecode with
-                      | STWBRX(rS, rA, rB) :: es ->
-                          OK(fw)
-                          >>= match_iregs a2   rS
-                          >>= match_iregs GPR0 rA
-                          >>= match_iregs a1   rB
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_eieio", [], _ ->
-                      begin match ecode with
-                      | EIEIO :: es ->
-                          OK(fw)
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_sync", [], _ ->
-                      begin match ecode with
-                      | SYNC :: es ->
-                          OK(fw)
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_isync", [], _ ->
-                      begin match ecode with
-                      | ISYNC :: es ->
-                          OK(fw)
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | "__builtin_trap", [], _ ->
-                      begin match ecode with
-                      | TW(tO, rA, rB) :: es ->
-                          OK(fw)
-                          >>= (fun ffw ->
-                            bitmatch tO with
-                            | { 31 : 5 : int } -> OK(ffw)
-                            | { _ } -> ERR("bitmatch")
-                          )
-                          >>= match_iregs GPR0 rA
-                          >>= match_iregs GPR0 rB
-                          >>= recur_simpl
-                      | _ -> error
-                      end
-                  | _ -> error
-              end
-          | EF_vload(chunk) ->
-              begin match args with
-              | [IR addr] ->
-                  OK(fw)
-                  >>= check_builtin_vload_common
-                    cs ecode pc chunk addr (Cint Integers.Int.zero) res
-              | _ -> fatal "Unexpected args in EF_vload, please report."
-              end
-          | EF_vstore(chunk) ->
-              begin match args with
-              | [IR addr; src] ->
-                  OK(fw)
-                  >>= check_builtin_vstore_common
-                    cs ecode pc chunk addr (Cint Integers.Int.zero) src
-              | _ -> fatal "Unexpected args in EF_vstore, please report."
-              end
-          | EF_vload_global(chunk, ident, ofs) ->
-              begin match ecode with
-              | ADDIS(rD, rA, simm) :: es ->
-                  let high = Csymbol_high(ident, ofs) in
-                  OK(fw)
-                  >>= match_iregs  GPR11 rD
-                  >>= match_iregs  GPR0  rA
-                  >>= match_csts   high  (Safe32.of_int simm)
-                  >>= check_builtin_vload_common
-                    cs es (Int32.add pc 4l) chunk GPR11
-                    (Csymbol_low(ident, ofs)) res
-              | _ -> error
-              end
-          | EF_vstore_global(chunk, ident, ofs) ->
-              begin match args with
-              | [src] ->
-                  begin match ecode with
-                  | ADDIS(rD, rA, simm) :: es ->
-                      let tmp =
-                        if src = IR GPR11
-                        then GPR12
-                        else GPR11
-                      in
-                      let high = Csymbol_high(ident, ofs) in
-                      OK(fw)
-                      >>= match_iregs  tmp   rD
-                      >>= match_iregs  GPR0  rA
-                      >>= match_csts   high  (Safe32.of_int simm)
-                      >>= check_builtin_vstore_common
-                        cs es (Int32.add pc 4l) chunk tmp
-                        (Csymbol_low(ident, ofs)) src
-                  | _ -> error
-                  end
-              | _ -> fatal "Unexpected args in EF_vstore_global, please report."
-              end
-          | EF_memcpy(sz, al) ->
-              let sz = z_int sz in
-              let al = z_int al in
-              begin match args with
-              | [IR dst; IR src] ->
-                  if sz <= 48
-                  then (
-                    match match_memcpy_small ecode pc sz al src dst fw with
-                    | ERR(s) -> ERR(s)
-                    | OK(fw, es, pc) -> compare_code cs es pc fw
-                  )
-                  else (
-                    match match_memcpy_big ecode pc sz al src dst fw with
-                    | ERR(s) -> ERR(s)
-                    | OK(fw, es, pc) -> compare_code cs es pc fw
-                  )
-              | _ -> error
-              end
-          | EF_annot_val(text, targ) ->
-              begin match args, res with
-              | IR src :: _, [IR dst] ->
-                  if dst <> src
-                  then (
-                    match ecode with
-                    | ORx(rS, rA, rB, rc) :: es ->
-                        OK(fw)
-                        >>= match_iregs src rS
-                        >>= match_iregs dst rA
-                        >>= match_iregs src rB
-                        >>= match_bools false rc
-                        >>= recur_simpl
-                    | _ -> error
-                  )
-                  else compare_code cs ecode pc fw
-              | FR src :: _, [FR dst] ->
-                  if dst <> src
-                  then (
-                    match ecode with
-                    | FMRx(frD, frB, rc) :: es ->
-                        OK(fw)
-                        >>= match_fregs dst frD
-                        >>= match_fregs src frB
-                        >>= match_bools false rc
-                        >>= recur_simpl
-                    | _ -> error
-                  )
-                  else compare_code cs ecode pc fw
-              | _ -> error
-              end
-          | EF_annot(_, _)    -> fatal "Unexpected EF_annot, please report."
-          | EF_external(_, _) -> fatal "Unexpected EF_external, please report."
-          | EF_free           -> fatal "Unexpected EF_free, please report."
-          | EF_malloc         -> fatal "Unexpected EF_malloc, please report."
           | EF_inline_asm(_)  -> fatal "Unsupported: inline asm statement."
+          | _                 -> fatal "Unexpected Pbuiltin, please report."
           end
+      | Pcfi_adjust _ | Pcfi_rel_offset _ ->
+          OK(fw)
+          >>= compare_code cs ecode pc
       | Pcmplw(r1, r2) ->
           begin match ecode with
           | CMPL(crfD, l, rA, rB) :: es ->
@@ -1580,6 +1019,26 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Pcntlz(r1, r2) ->
+          begin match ecode with
+          | CNTLZWx(rS, rA, rc) :: es ->
+              OK(fw)
+              >>= match_iregs r2    rS
+              >>= match_iregs r1    rA
+              >>= match_bools false rc
+              >>= recur_simpl
+          | _ -> error
+          end
+      | Pcreqv(bd, b1, b2) ->
+          begin match ecode with
+          | CREQV(crbD, crbA, crbB) :: es ->
+              OK(fw)
+              >>= match_crbits bd crbD
+              >>= match_crbits b1 crbA
+              >>= match_crbits b2 crbB
+              >>= recur_simpl
+          | _ -> error
+          end
       | Pcror(bd, b1, b2) ->
           begin match ecode with
           | CROR(crbD, crbA, crbB) :: es ->
@@ -1590,6 +1049,16 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Pcrxor(bd, b1, b2) ->
+          begin match ecode with
+          | CRXOR(crbD, crbA, crbB) :: es ->
+              OK(fw)
+              >>= match_crbits bd crbD
+              >>= match_crbits b1 crbA
+              >>= match_crbits b2 crbB
+              >>= recur_simpl
+          | _ -> error
+          end
       | Pdivw(rd, r1, r2) ->
           begin match ecode with
           | DIVWx(rD, rA, rB, oe, rc) :: es ->
@@ -1614,6 +1083,13 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Peieio ->
+          begin match ecode with
+          | EIEIO :: es ->
+              OK(fw)
+              >>= recur_simpl
+          | _ -> error
+          end
       | Peqv(rd, r1, r2) ->
           begin match ecode with
           | EQVx(rS, rA, rB, rc) :: es ->
@@ -1688,25 +1164,25 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
           | _ -> error
           end
       | Pfcti(rd, r1) ->
+          error
+      | Pfctiw(rd, r1) ->
           begin match ecode with
-          |   FCTIWZx(frD0, frB0, rc0)   ::
-              STFDU  (frS1, rA1,  d1)    ::
-              LWZ    (rD2,  rA2,  d2)    ::
-              ADDI   (rD3,  rA3,  simm3) :: es ->
+          | FCTIWx(frD0, frB0, rc0) :: es ->
               OK(fw)
-              >>= match_fregs  FPR13 frD0
+              >>= match_fregs  rd    frD0
               >>= match_fregs  r1    frB0
               >>= match_bools  false rc0
-              >>= match_fregs  FPR13 frS1
-              >>= match_iregs  GPR1  rA1
-              >>= match_int32s (-8l) (exts d1)
-              >>= match_iregs  rd    rD2
-              >>= match_iregs  GPR1  rA2
-              >>= match_int32s 4l    (exts d2)
-              >>= match_iregs  GPR1  rD3
-              >>= match_iregs  GPR1  rA3
-              >>= match_int32s 8l    (exts simm3)
-              >>= compare_code cs es (Int32.add 16l pc)
+              >>= recur_simpl
+          | _ -> error
+          end
+      | Pfctiwz(rd, r1) ->
+          begin match ecode with
+          | FCTIWZx(frD0, frB0, rc0) :: es ->
+              OK(fw)
+              >>= match_fregs  rd    frD0
+              >>= match_fregs  r1    frB0
+              >>= match_bools  false rc0
+              >>= recur_simpl
           | _ -> error
           end
       | Pfdiv(rd, r1, r2) ->
@@ -1732,27 +1208,7 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
           | _ -> error
           end
       | Pfmake(rd, r1, r2) ->
-          begin match ecode with
-          |   STWU  (rS0, rA0, d0)    ::
-              STW   (rS1, rA1, d1)    ::
-              LFD   (frD2, rA2, d2)   ::
-              ADDI  (rD3, rA3, simm3) :: es ->
-              OK(fw)
-              >>= match_iregs  r1    rS0
-              >>= match_iregs  GPR1  rA0
-              >>= match_int32s (-8l) (exts d0)
-              >>= match_iregs  r2    rS1
-              >>= match_iregs  GPR1  rA1
-              >>= match_int32s 4l    (exts d1)
-              >>= match_fregs  rd    frD2
-              >>= match_iregs  GPR1  rA2
-              >>= match_int32s 0l    (exts d2)
-              >>= match_iregs  GPR1  rD3
-              >>= match_iregs  GPR1  rA3
-              >>= match_int32s 8l    (exts simm3)
-              >>= compare_code cs es (Int32.add 16l pc)
-          | _ -> error
-          end
+          error
       | Pfmr(rd, r1) ->
           begin match ecode with
           | FMRx(frD, frB, rc) :: es ->
@@ -1796,21 +1252,7 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
           | _ -> error
           end
       | Pfreeframe(sz, ofs) ->
-          begin match ecode with
-          | ADDI(rD, rA, simm) :: es ->
-              OK(fw)
-              >>= match_iregs   GPR1 rD
-              >>= match_iregs   GPR1 rA
-              >>= match_z_int32 sz (exts simm)
-              >>= recur_simpl
-          | LWZ(rD, rA, d) :: es ->
-              OK(fw)
-              >>= match_iregs   GPR1 rD
-              >>= match_iregs   GPR1 rA
-              >>= match_z_int32 ofs  (exts d)
-              >>= recur_simpl
-          | _ -> error
-          end
+          error
       | Pfrsp(rd, r1) ->
           begin match ecode with
           | FRSPx(frD, frB, rc) :: es ->
@@ -1822,16 +1264,7 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
           | _ -> error
           end
       | Pfxdp(rd, r1) ->
-          if rd = r1 then OK(fw) >>= compare_code cs ecode pc else 
-          begin match ecode with
-          | FMRx(frD, frB, rc) :: es ->
-              OK(fw)
-              >>= match_fregs rd    frD
-              >>= match_fregs r1    frB
-              >>= match_bools false rc
-              >>= recur_simpl
-          | _ -> error
-          end
+          error
       | Pfsub(rd, r1, r2) ->
           begin match ecode with
           | FSUBx(frD, frA, frB, rc) :: es ->
@@ -1854,6 +1287,103 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Pfmadd(rd, r1, r2, r3) ->
+          begin match ecode with
+          | FMADDx(frD, frA, frB, frC, rc) :: es ->
+              OK(fw)
+              >>= match_fregs rd frD
+              >>= match_fregs r1 frA
+              >>= match_fregs r3 frB
+              >>= match_fregs r2 frC
+              >>= match_bools false rc
+              >>= recur_simpl
+           | _ -> error
+           end
+      | Pfmsub(rd, r1, r2, r3) ->
+          begin match ecode with
+          | FMSUBx(frD, frA, frB, frC, rc) :: es ->
+              OK(fw)
+              >>= match_fregs rd frD
+              >>= match_fregs r1 frA
+              >>= match_fregs r3 frB
+              >>= match_fregs r2 frC
+              >>= match_bools false rc
+              >>= recur_simpl
+           | _ -> error
+           end
+      | Pfnmadd(rd, r1, r2, r3) ->
+          begin match ecode with
+          | FNMADDx(frD, frA, frB, frC, rc) :: es ->
+              OK(fw)
+              >>= match_fregs rd frD
+              >>= match_fregs r1 frA
+              >>= match_fregs r3 frB
+              >>= match_fregs r2 frC
+              >>= match_bools false rc
+              >>= recur_simpl
+           | _ -> error
+           end
+      | Pfnmsub(rd, r1, r2, r3) ->
+          begin match ecode with
+          | FNMSUBx(frD, frA, frB, frC, rc) :: es ->
+              OK(fw)
+              >>= match_fregs rd frD
+              >>= match_fregs r1 frA
+              >>= match_fregs r3 frB
+              >>= match_fregs r2 frC
+              >>= match_bools false rc
+              >>= recur_simpl
+           | _ -> error
+           end
+      | Pfsqrt(rd, r1) ->
+          begin match ecode with
+          | FSQRTx(frD, frB, rc) :: es ->
+              OK(fw)
+              >>= match_fregs rd   frD
+              >>= match_fregs r1   frB
+              >>= match_bools false rc
+              >>= recur_simpl
+          | _ -> error
+          end
+      | Pfrsqrte(rd, r1) ->
+          begin match ecode with
+          | FRSQRTEx(frD, frB, rc) :: es ->
+              OK(fw)
+              >>= match_fregs rd   frD
+              >>= match_fregs r1   frB
+              >>= match_bools false rc
+              >>= recur_simpl
+          | _ -> error
+          end
+      | Pfres(rd, r1) ->
+          begin match ecode with
+          | FRESx(frD, frB, rc) :: es ->
+              OK(fw)
+              >>= match_fregs rd   frD
+              >>= match_fregs r1   frB
+              >>= match_bools false rc
+              >>= recur_simpl
+          | _ -> error
+          end
+      | Pfsel(rd, r1, r2, r3) ->
+          begin match ecode with
+          | FSELx(frD, frA, frB, frC, rc) :: es ->
+              OK(fw)
+              >>= match_fregs rd frD
+              >>= match_fregs r1 frA
+              >>= match_fregs r3 frB
+              >>= match_fregs r2 frC
+              >>= match_bools false rc
+              >>= recur_simpl
+          | _ -> error
+          end
+      | Pisync ->
+          begin match ecode with
+          | ISYNC :: es ->
+              OK(fw)
+              >>= recur_simpl
+          | _ -> error
+          end
       | Plabel(lbl) ->
           OK(fw)
           >>= lblmap_unify lbl pc
@@ -1942,7 +1472,7 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
                 | Some(bs, pofs, psize) ->
                     let f =
                       bitmatch bs with
-                      | { f : 64 : int } -> Int64.float_of_bits f
+                      | { f : 64 : int } -> f
                     in
                     OK(fw)
                     >>= (fun ffw ->
@@ -1994,7 +1524,7 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
                 | Some(bs, pofs, psize) ->
                     let f =
                       bitmatch bs with
-                      | { f : 32 : int } -> Int32.float_of_bits f
+                      | { f : 32 : int } -> f
                     in
                     OK(fw)
                     >>= (fun ffw ->
@@ -2014,7 +1544,7 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
                     >>= match_iregs  GPR12 rD0
                     >>= match_iregs  GPR0  rA0
                     >>= match_fregs  r1    frD1
-                    >>= match_floats32 c     f
+                    >>= match_floats32 c   f
                     >>^ (ff_ef ^%= add_range pofs psize 4 (Float32_literal(f)))
                     >>= match_iregs  GPR12 rA1
                     >>= continue
@@ -2079,6 +1609,16 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Plhbrx(rd, r1, r2) ->
+          begin match ecode with
+          | LHBRX(rD, rA, rB):: es ->
+              OK(fw)
+              >>= match_iregs rd rD
+              >>= match_iregs r1 rA
+              >>= match_iregs r2 rB
+              >>= recur_simpl
+          | _ -> error
+          end
       | Plhz(rd, Csymbol_sda(ident, ofs), r1) ->
           begin match ecode with
           | LHZ(rD, rA, d) :: es ->
@@ -2108,6 +1648,16 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Plwbrx(rd, r1, r2) ->
+          begin match ecode with
+          | LWBRX(rD, rA, rB):: es ->
+              OK(fw)
+              >>= match_iregs rd rD
+              >>= match_iregs r1 rA
+              >>= match_iregs r2 rB
+              >>= recur_simpl
+          | _ -> error
+          end
       | Plwz(rd, Csymbol_sda(ident, ofs), r1) ->
           begin match ecode with
           | LWZ(rD, rA, d) :: es ->
@@ -2127,6 +1677,16 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Plwzu(rd, cst, r1) ->
+          begin match ecode with
+          | LWZU(rD, rA, d) :: es ->
+              OK(fw)
+              >>= match_iregs rd  rD
+              >>= match_iregs r1  rA
+              >>= match_csts  cst (exts d)
+              >>= recur_simpl
+          | _ -> error
+          end
       | Plwzx(rd, r1, r2) | Plwzx_a(rd, r1, r2) ->
           begin match ecode with
           | LWZX(rD, rA, rB) :: es ->
@@ -2137,21 +1697,16 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
-      | Pmfcrbit(rd, bit) ->
+      | Pmfcr rd ->
           begin match ecode with
-          |   MFCR   (rD0)                          ::
-              RLWINMx(rS1, rA1, sh1, mb1, me1, rc1) :: es ->
+          | MFCR (rD0) :: es ->
               OK(fw)
               >>= match_iregs  rd    rD0
-              >>= match_iregs  rd    rS1
-              >>= match_iregs  rd    rA1
-              >>= match_crbits bit   (sh1 - 1)
-              >>= match_ints   31    mb1
-              >>= match_ints   31    me1
-              >>= match_bools  false rc1
-              >>= compare_code cs es (Int32.add 8l pc)
+              >>= recur_simpl
           | _ -> error
-          end
+          end 
+          | Pmfcrbit(rd, bit) ->
+          error
       | Pmflr(r) ->
           begin match ecode with
           | MFSPR(rD, spr) :: es ->
@@ -2395,6 +1950,16 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Pstfdu(rd, cst, r1) ->
+          begin match ecode with
+          | STFDU(frS, rA, d) :: es ->
+              OK(fw)
+              >>= match_fregs rd  frS
+              >>= match_iregs r1  rA
+              >>= match_csts  cst (exts d)
+              >>= recur_simpl
+          | _ -> error
+          end
       | Pstfdx(rd, r1, r2) | Pstfdx_a(rd, r1, r2) ->
           begin match ecode with
           | STFDX(frS, rA, rB) :: es ->
@@ -2445,6 +2010,16 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Psthbrx(rd, r1, r2) ->
+          begin match ecode with
+          | STHBRX(rS, rA, rB) :: es ->
+              OK(fw)
+              >>= match_iregs rd rS
+              >>= match_iregs r1 rA
+              >>= match_iregs r2 rB
+              >>= recur_simpl
+          | _ -> error
+          end
       | Pstw(rd, cst, r1) | Pstw_a(rd, cst, r1) ->
           begin match ecode with
           | STW(rS, rA, d) :: es ->
@@ -2455,6 +2030,16 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Pstwu(rd, cst, r1) ->
+          begin match ecode with
+          | STWU(rS, rA, d) :: es ->
+              OK(fw)
+              >>= match_iregs rd  rS
+              >>= match_iregs r1  rA
+              >>= match_csts  cst (exts d)
+              >>= recur_simpl
+          | _ -> error
+          end
       | Pstwx(rd, r1, r2) | Pstwx_a(rd, r1, r2) ->
           begin match ecode with
           | STWX(rS, rA, rB) :: es ->
@@ -2465,6 +2050,26 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Pstwbrx(rd, r1, r2) ->
+          begin match ecode with
+          | STWBRX(rS, rA, rB) :: es ->
+              OK(fw)
+              >>= match_iregs rd rS
+              >>= match_iregs r1 rA
+              >>= match_iregs r2 rB
+              >>= recur_simpl
+          | _ -> error
+          end
+      | Pstwxu(rd, r1, r2) ->
+          begin match ecode with
+          | STWUX(rS, rA, rB) :: es ->
+              OK(fw)
+              >>= match_iregs rd rS
+              >>= match_iregs r1 rA
+              >>= match_iregs r2 rB
+              >>= recur_simpl
+          | _ -> error
+          end
       | Psubfc(rd, r1, r2) ->
           begin match ecode with
           | SUBFCx(rD, rA, rB, oe, rc) :: es ->
@@ -2489,6 +2094,17 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Psubfze(rd, r1) ->
+          begin match ecode with
+          | SUBFZEx(rD, rA, oe, rc) :: es ->
+              OK(fw)
+              >>= match_iregs rd    rD
+              >>= match_iregs r1    rA
+              >>= match_bools false oe
+              >>= match_bools false rc
+              >>= recur_simpl
+          | _ -> error
+          end
       | Psubfic(rd, r1, cst) ->
           begin match ecode with
           | SUBFIC(rD, rA, simm) :: es ->
@@ -2499,6 +2115,27 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
+      | Psync ->
+          begin match ecode with
+          | SYNC :: es ->
+              OK(fw)
+              >>= recur_simpl
+          | _ -> error
+          end
+      | Ptrap ->
+          begin match ecode with
+          | TW(tO, rA, rB) :: es ->
+              OK(fw)
+              >>= (fun ffw ->
+                bitmatch tO with
+                | { 31 : 5 : int } -> OK(ffw)
+                | { _ } -> ERR("bitmatch")
+              )
+              >>= match_iregs GPR0 rA
+              >>= match_iregs GPR0 rB
+              >>= recur_simpl
+          | _ -> error
+          end
       | Pxor(rd, r1, r2) ->
           begin match ecode with
           | XORx(rS, rA, rB, rc) :: es ->
@@ -2530,142 +2167,6 @@ let rec compare_code ccode ecode pc: checker = fun fw ->
               >>= recur_simpl
           | _ -> error
           end
-and check_builtin_vload_common ccode ecode pc chunk addr offset res fw =
-  let error = ERR("Non-matching instructions") in
-  let recur_simpl = compare_code ccode (List.tl ecode) (Int32.add pc 4l) in
-  begin match chunk, res with
-  | Mint8unsigned, [IR res] ->
-      begin match ecode with
-      | LBZ(rD, rA, d) :: es ->
-          OK(fw)
-          >>= match_iregs  res    rD
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | Mint8signed, [IR res] ->
-      begin match ecode with
-      |   LBZ   (rD0, rA0, d0)  ::
-          EXTSBx(rS1, rA1, rc1) :: es ->
-          OK(fw)
-          >>= match_iregs  res    rD0
-          >>= match_iregs  addr   rA0
-          >>= match_csts   offset (exts d0)
-          >>= match_iregs  res    rS1
-          >>= match_iregs  res    rA1
-          >>= match_bools  false  rc1
-          >>= compare_code ccode es (Int32.add 8l pc)
-      | _ -> error
-      end
-  | Mint16unsigned, [IR res] ->
-      begin match ecode with
-      | LHZ(rD, rA, d) :: es ->
-          OK(fw)
-          >>= match_iregs  res    rD
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | Mint16signed, [IR res] ->
-      begin match ecode with
-      | LHA(rD, rA, d) :: es ->
-          OK(fw)
-          >>= match_iregs  res    rD
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | Mint32, [IR res] ->
-      begin match ecode with
-      | LWZ(rD, rA, d) :: es ->
-          OK(fw)
-          >>= match_iregs  res    rD
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | Mfloat32, [FR res] ->
-      begin match ecode with
-      | LFS(frD, rA, d) :: es ->
-          OK(fw)
-          >>= match_fregs  res    frD
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | Mfloat64, [FR res] ->
-      begin match ecode with
-      | LFD(frD, rA, d) :: es ->
-          OK(fw)
-          >>= match_fregs  res    frD
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | _ -> error
-  end
-and check_builtin_vstore_common ccode ecode pc chunk addr offset src fw =
-  let recur_simpl = compare_code ccode (List.tl ecode) (Int32.add pc 4l) in
-  let error = ERR("Non-matching instructions") in
-  begin match chunk, src with
-  | (Mint8signed | Mint8unsigned), IR src ->
-      begin match ecode with
-      | STB(rS, rA, d) :: es ->
-          OK(fw)
-          >>= match_iregs  src    rS
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | (Mint16signed | Mint16unsigned), IR src ->
-      begin match ecode with
-      | STH(rS, rA, d) :: es ->
-          OK(fw)
-          >>= match_iregs  src    rS
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | Mint32, IR src ->
-      begin match ecode with
-      | STW(rS, rA, d) :: es ->
-          OK(fw)
-          >>= match_iregs  src    rS
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | Mfloat32, FR src ->
-      begin match ecode with
-      | STFS(frS, rA, d) :: es ->
-          OK(fw)
-          >>= match_fregs  src    frS
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | Mfloat64, FR src ->
-      begin match ecode with
-      | STFD(frS, rA, d) :: es ->
-          OK(fw)
-          >>= match_fregs  src    frS
-          >>= match_iregs  addr   rA
-          >>= match_csts   offset (exts d)
-          >>= recur_simpl
-      | _ -> error
-      end
-  | _ -> error
-  end
 
 (** A work element is a triple giving a CompCert ident for the function to
     analyze, its name as a string, and the actual code. It is not obvious how
@@ -2822,7 +2323,7 @@ let compare_data (l: init_data list) (bs: bitstring) (sfw: s_framework)
         | Init_float32(f) -> (
           bitmatch bs with
           | { j : 32 : int; bs : -1 : bitstring } ->
-              if camlfloat_of_coqfloat f = Int32.float_of_bits j
+              if camlint_of_coqint (Floats.Float32.to_bits f) = j
               then compare_data_aux l bs (s + 4) sfw
               else ERR("Wrong float32")
           | { _ } -> error
@@ -2830,7 +2331,7 @@ let compare_data (l: init_data list) (bs: bitstring) (sfw: s_framework)
         | Init_float64(f) -> (
           bitmatch bs with
           | { j : 64 : int; bs : -1 : bitstring } ->
-              if camlfloat_of_coqfloat f = Int64.float_of_bits j
+              if camlint64_of_coqint (Floats.Float.to_bits f) = j
               then compare_data_aux l bs (s + 8) sfw
               else ERR("Wrong float64")
           | { _ } -> error
@@ -3276,6 +2777,7 @@ let warn_sections_remapping efw =
   print_debug "Checking remapped sections";
   StringMap.fold
     (fun c_name (e_name, conflicts) efw ->
+      if c_name = "COMM" then efw else
       if StringSet.is_empty conflicts
       then
         match e_name with
diff --git a/checklink/Frameworks.ml b/checklink/Frameworks.ml
index 0f7ec44..30c0b38 100644
--- a/checklink/Frameworks.ml
+++ b/checklink/Frameworks.ml
@@ -24,8 +24,8 @@ type byte_chunk_desc =
   | Zero_symbol
   | Stub               of string
   | Jumptable
-  | Float_literal      of float
-  | Float32_literal    of float
+  | Float_literal      of int64
+  | Float32_literal    of int32
   | Padding
   | Unknown            of string
 
@@ -208,7 +208,7 @@ let string_of_byte_chunk_desc = function
 | Zero_symbol -> "Symbol 0"
 | Stub(s) -> "Stub for: " ^ s
 | Jumptable -> "Jump table"
-| Float_literal(f) -> "Float literal: " ^ string_of_float f
-| Float32_literal(f) -> "Float32 literal: " ^ string_of_float f
+| Float_literal(f) -> "Float literal: " ^ string_of_int64 f
+| Float32_literal(f) -> "Float32 literal: " ^ string_of_int32 f
 | Padding -> "Padding"
 | Unknown(s) -> "???" ^ (if !verbose_elfmap then s else "")
diff --git a/checklink/Fuzz.ml b/checklink/Fuzz.ml
index d7947ee..dc98493 100644
--- a/checklink/Fuzz.ml
+++ b/checklink/Fuzz.ml
@@ -75,7 +75,6 @@ let fuzz_check elfmap bs byte old sdumps =
 let ok_fuzz elfmap str byte fuzz =
   let (a, b, _, r) = full_range_of_byte elfmap byte in
   let a = Safe32.to_int a in
-  let b = Safe32.to_int b in
   let old = Char.code str.[byte] in
   let fuz = Char.code fuzz in
   match r with
@@ -98,13 +97,7 @@ let ok_fuzz elfmap str byte fuzz =
           && ((old land 0xf = 0) || (fuz land 0xf = 0))
       )
   | Symtab_function(_) -> true
-  | Data_symbol(_)     ->
-      (* False positive: 0. becomes -0. *)
-      not (
-        (byte + 7 <= b)
-        && (fuz = 0x80) (* sign bit *)
-        && String.sub str byte 8 = "\000\000\000\000\000\000\000\000"
-      )
+  | Data_symbol(_)     -> true
   | Function_symbol(_) ->
       let opcode = Char.code str.[byte - 3] in
       (* False positive: rlwinm with bitmask 0 31 = bitmask n (n - 1) *)
@@ -113,14 +106,8 @@ let ok_fuzz elfmap str byte fuzz =
   | Zero_symbol        -> false
   | Stub(_)            -> true
   | Jumptable          -> true
-  | Float_literal(_)   -> (* FIXME: this shouldn't be a false positive! *)
-      (* False positive: 0. becomes -0. *)
-      not (
-        (byte = a)
-        && (fuz = 0x80) (* sign bit *)
-        && String.sub str byte 8 = "\000\000\000\000\000\000\000\000"
-      )
-  | Float32_literal(_)  -> true
+  | Float_literal(_)   -> true
+  | Float32_literal(_) -> true
   (* padding is allowed to be non-null, but won't be recognized as padding, but
      as unknown, which is not an ERROR *)
   | Padding            -> false
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 556a476..09451eb 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -152,7 +152,8 @@ let compile_c_ast sourcename csyntax ofile =
   (* Convert to Asm *)
   let asm =
     match Compiler.transf_c_program csyntax with
-    | Errors.OK x -> x
+    | Errors.OK asm ->
+        Asmexpand.expand_program (Unusedglob.transf_program asm)
     | Errors.Error msg ->
         print_error stderr msg;
         exit 2 in
@@ -161,7 +162,7 @@ let compile_c_ast sourcename csyntax ofile =
     dump_asm asm (output_filename sourcename ".c" ".sdump");
   (* Print Asm in text form *)
   let oc = open_out ofile in
-  PrintAsm.print_program oc (Unusedglob.transf_program asm);
+  PrintAsm.print_program oc asm;
   close_out oc
 
 (* From C source to asm *)
diff --git a/extraction/extraction.v b/extraction/extraction.v
index f0033af..d987629 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -157,4 +157,5 @@ Separate Extraction
    RTL.instr_defs RTL.instr_uses
    Machregs.mregs_for_operation Machregs.mregs_for_builtin
    Machregs.two_address_op Machregs.is_stack_reg
+   AST.signature_main
    Parser.translation_unit_file.
diff --git a/ia32/Asmexpand.ml b/ia32/Asmexpand.ml
new file mode 100644
index 0000000..9a458c3
--- /dev/null
+++ b/ia32/Asmexpand.ml
@@ -0,0 +1,18 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Expanding built-ins and some pseudo-instructions by rewriting
+   of the IA32 assembly code.  Currently not done, this expansion
+   is performed on the fly in PrintAsm. *)
+
+let expand_program p = p
+
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index a7e5eaf..ab52ca5 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -69,7 +69,8 @@ Inductive preg: Type :=
   | CR0_0: preg                         (**r bit 0 of the condition register  *)
   | CR0_1: preg                         (**r bit 1 of the condition register  *)
   | CR0_2: preg                         (**r bit 2 of the condition register  *)
-  | CR0_3: preg.                        (**r bit 3 of the condition register  *)
+  | CR0_3: preg                         (**r bit 3 of the condition register  *)
+  | CR1_2: preg.                        (**r bit 6 of the condition register  *)
 
 Coercion IR: ireg >-> preg.
 Coercion FR: freg >-> preg.
@@ -114,14 +115,15 @@ Inductive constant: Type :=
   range.  Of course, our PPC generator (file [Asmgen]) is
   careful to respect this range. *)
 
-(** Bits in the condition register.  We are only interested in the
-  first 4 bits. *)
+(** Bits in the condition register.  We are only interested in bits
+  0, 1, 2, 3 and 6. *)
 
 Inductive crbit: Type :=
   | CRbit_0: crbit
   | CRbit_1: crbit
   | CRbit_2: crbit
-  | CRbit_3: crbit.
+  | CRbit_3: crbit
+  | CRbit_6: crbit.
 
 (** The instruction set.  Most instructions correspond exactly to
   actual instructions of the PowerPC processor. See the PowerPC
@@ -134,12 +136,13 @@ Definition label := positive.
 
 Inductive instruction : Type :=
   | Padd: ireg -> ireg -> ireg -> instruction                 (**r integer addition *)
-  | Padde: ireg -> ireg -> ireg -> instruction                 (**r integer addition with carry *)
+  | Paddc: ireg -> ireg -> ireg -> instruction                (**r integer addition and set carry *)
+  | Padde: ireg -> ireg -> ireg -> instruction                (**r integer addition with carry *)
   | Paddi: ireg -> ireg -> constant -> instruction            (**r add immediate *)
-  | Paddic: ireg -> ireg -> constant -> instruction            (**r add immediate and set carry *)
+  | Paddic: ireg -> ireg -> constant -> instruction           (**r add immediate and set carry *)
   | Paddis: ireg -> ireg -> constant -> instruction           (**r add immediate high *)
   | Paddze: ireg -> ireg -> instruction                       (**r add carry *)
-  | Pallocframe: Z -> int -> instruction                      (**r allocate new stack frame *)
+  | Pallocframe: Z -> int -> instruction                      (**r allocate new stack frame (pseudo) *)
   | Pand_: ireg -> ireg -> ireg -> instruction                (**r bitwise and *)
   | Pandc: ireg -> ireg -> ireg -> instruction                (**r bitwise and-complement *)
   | Pandi_: ireg -> ireg -> constant -> instruction           (**r and immediate and set conditions *)
@@ -147,32 +150,39 @@ Inductive instruction : Type :=
   | Pb: label -> instruction                                  (**r unconditional branch *)
   | Pbctr: signature -> instruction                           (**r branch to contents of register CTR *)
   | Pbctrl: signature -> instruction                          (**r branch to contents of CTR and link *)
+  | Pbdnz: label -> instruction                               (**r decrement CTR and branch if not zero *)
   | Pbf: crbit -> label -> instruction                        (**r branch if false *)
   | Pbl: ident -> signature -> instruction                    (**r branch and link *)
   | Pbs: ident -> signature -> instruction                    (**r branch to symbol *)
   | Pblr: instruction                                         (**r branch to contents of register LR *)
   | Pbt: crbit -> label -> instruction                        (**r branch if true *)
-  | Pbtbl: ireg -> list label -> instruction                  (**r N-way branch through a jump table *)
+  | Pbtbl: ireg -> list label -> instruction                  (**r N-way branch through a jump table (pseudo) *)
   | Pcmplw: ireg -> ireg -> instruction                       (**r unsigned integer comparison *)
   | Pcmplwi: ireg -> constant -> instruction                  (**r same, with immediate argument *)
   | Pcmpw: ireg -> ireg -> instruction                        (**r signed integer comparison *)
   | Pcmpwi: ireg -> constant -> instruction                   (**r same, with immediate argument *)
+  | Pcntlz: ireg -> ireg -> instruction                       (**r count leading zeros *)
+  | Pcreqv: crbit -> crbit -> crbit -> instruction            (**r not-xor between condition bits *)
   | Pcror: crbit -> crbit -> crbit -> instruction             (**r or between condition bits *)
+  | Pcrxor: crbit -> crbit -> crbit -> instruction            (**r xor between condition bits *)
   | Pdivw: ireg -> ireg -> ireg -> instruction                (**r signed division *)
   | Pdivwu: ireg -> ireg -> ireg -> instruction               (**r unsigned division *)
+  | Peieio: instruction                                       (**r EIEIO barrier *)
   | Peqv: ireg -> ireg -> ireg -> instruction                 (**r bitwise not-xor *)
   | Pextsb: ireg -> ireg -> instruction                       (**r 8-bit sign extension *)
   | Pextsh: ireg -> ireg -> instruction                       (**r 16-bit sign extension *)
-  | Pfreeframe: Z -> int -> instruction                       (**r deallocate stack frame and restore previous frame *)
+  | Pfreeframe: Z -> int -> instruction                       (**r deallocate stack frame and restore previous frame (pseudo) *)
   | Pfabs: freg -> freg -> instruction                        (**r float absolute value *)
   | Pfabss: freg -> freg -> instruction                       (**r float absolute value *)
   | Pfadd: freg -> freg -> freg -> instruction                (**r float addition *)
   | Pfadds: freg -> freg -> freg -> instruction               (**r float addition *)
   | Pfcmpu: freg -> freg -> instruction                       (**r float comparison *)
-  | Pfcti: ireg -> freg -> instruction                        (**r float-to-signed-int conversion *)
+  | Pfcti: ireg -> freg -> instruction                        (**r float-to-signed-int conversion, round towards 0 (pseudo) *)
+  | Pfctiw: freg -> freg -> instruction                       (**r float-to-signed-int conversion, round by default *) 
+  | Pfctiwz: freg -> freg -> instruction                      (**r float-to-signed-int conversion, round towards 0 *) 
   | Pfdiv: freg -> freg -> freg -> instruction                (**r float division *)
   | Pfdivs: freg -> freg -> freg -> instruction               (**r float division *)
-  | Pfmake: freg -> ireg -> ireg -> instruction               (**r build a float from 2 ints *)
+  | Pfmake: freg -> ireg -> ireg -> instruction               (**r build a float from 2 ints (pseudo) *)
   | Pfmr: freg -> freg -> instruction                         (**r float move *)
   | Pfmul: freg -> freg -> freg -> instruction                (**r float multiply *)
   | Pfmuls: freg -> freg -> freg -> instruction               (**r float multiply *)
@@ -182,6 +192,15 @@ Inductive instruction : Type :=
   | Pfxdp: freg -> freg -> instruction                        (**r float extend to double precision (pseudo) *)
   | Pfsub: freg -> freg -> freg -> instruction                (**r float subtraction *)
   | Pfsubs: freg -> freg -> freg -> instruction               (**r float subtraction *)
+  | Pfmadd: freg -> freg -> freg -> freg -> instruction       (**r fused multiply-add *)
+  | Pfmsub: freg -> freg -> freg -> freg -> instruction       (**r fused multiply-sub *)
+  | Pfnmadd: freg -> freg -> freg -> freg -> instruction      (**r fused neg-multiply-add *)
+  | Pfnmsub: freg -> freg -> freg -> freg -> instruction      (**r fused neg-multiply-sub *)
+  | Pfsqrt: freg -> freg -> instruction                       (**r square root *)
+  | Pfrsqrte: freg -> freg -> instruction                     (**r approximate reciprocal of square root *)
+  | Pfres: freg -> freg -> instruction                        (**r approximate inverse *)
+  | Pfsel: freg -> freg -> freg -> freg -> instruction        (**r FP conditional move *)
+  | Pisync: instruction                                       (**r ISYNC barrier *)
   | Plbz: ireg -> constant -> ireg -> instruction             (**r load 8-bit unsigned int *)
   | Plbzx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Plfd: freg -> constant -> ireg -> instruction             (**r load 64-bit float *)
@@ -192,15 +211,19 @@ Inductive instruction : Type :=
   | Plfsx: freg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Plha: ireg -> constant -> ireg -> instruction             (**r load 16-bit signed int *)
   | Plhax: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
+  | Plhbrx: ireg -> ireg -> ireg -> instruction               (**r load 16-bit int and reverse endianness *)
   | Plhz: ireg -> constant -> ireg -> instruction             (**r load 16-bit unsigned int *)
   | Plhzx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Plfi: freg -> float -> instruction                        (**r load float constant *)
   | Plfis: freg -> float32 -> instruction                     (**r load float constant *)
   | Plwz: ireg -> constant -> ireg -> instruction             (**r load 32-bit int *)
+  | Plwzu: ireg -> constant -> ireg -> instruction            (**r load 32-bit int with update *)
   | Plwzx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Plwz_a: ireg -> constant -> ireg -> instruction           (**r load 32-bit quantity to int reg *)
   | Plwzx_a: ireg -> ireg -> ireg -> instruction              (**r same, with 2 index regs *)
-  | Pmfcrbit: ireg -> crbit -> instruction                    (**r move condition bit to reg *)
+  | Plwbrx: ireg -> ireg -> ireg -> instruction               (**r load 32-bit int and reverse endianness *)
+  | Pmfcr: ireg -> instruction                                (**r move condition register to reg *)
+  | Pmfcrbit: ireg -> crbit -> instruction                    (**r move condition bit to reg (pseudo) *)
   | Pmflr: ireg -> instruction                                (**r move LR to reg *)
   | Pmr: ireg -> ireg -> instruction                          (**r integer move *)
   | Pmtctr: ireg -> instruction                               (**r move ireg to CTR *)
@@ -224,6 +247,7 @@ Inductive instruction : Type :=
   | Pstb: ireg -> constant -> ireg -> instruction             (**r store 8-bit int *)
   | Pstbx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
   | Pstfd: freg -> constant -> ireg -> instruction            (**r store 64-bit float *)
+  | Pstfdu: freg -> constant -> ireg -> instruction           (**r store 64-bit float with update *)
   | Pstfdx: freg -> ireg -> ireg -> instruction               (**r same, with 2 index regs *)
   | Pstfd_a: freg -> constant -> ireg -> instruction          (**r store 64-bit quantity from float reg *)
   | Pstfdx_a: freg -> ireg -> ireg -> instruction             (**r same, with 2 index regs *)
@@ -231,19 +255,28 @@ Inductive instruction : Type :=
   | Pstfsx: freg -> ireg -> ireg -> instruction               (**r same, with 2 index regs *)
   | Psth: ireg -> constant -> ireg -> instruction             (**r store 16-bit int *)
   | Psthx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
+  | Psthbrx: ireg -> ireg -> ireg -> instruction              (**r store 16-bit int with reverse endianness *)
   | Pstw: ireg -> constant -> ireg -> instruction             (**r store 32-bit int *)
+  | Pstwu: ireg -> constant -> ireg -> instruction            (**r store 32-bit int with update *)
   | Pstwx: ireg -> ireg -> ireg -> instruction                (**r same, with 2 index regs *)
+  | Pstwxu: ireg -> ireg -> ireg -> instruction               (**r same, with 2 index regs and update *)
   | Pstw_a: ireg -> constant -> ireg -> instruction           (**r store 32-bit quantity from int reg *)
   | Pstwx_a: ireg -> ireg -> ireg -> instruction              (**r same, with 2 index regs *)
+  | Pstwbrx: ireg -> ireg -> ireg -> instruction              (**r store 32-bit int with reverse endianness *)
   | Psubfc: ireg -> ireg -> ireg -> instruction               (**r reversed integer subtraction *)
   | Psubfe: ireg -> ireg -> ireg -> instruction               (**r reversed integer subtraction with carry *)
+  | Psubfze: ireg -> ireg -> instruction                      (**r integer opposite with carry *)
   | Psubfic: ireg -> ireg -> constant -> instruction          (**r integer subtraction from immediate *)
+  | Psync: instruction                                        (**r SYNC barrier *)
+  | Ptrap: instruction                                        (**r unconditional trap *)
   | Pxor: ireg -> ireg -> ireg -> instruction                 (**r bitwise xor *)
   | Pxori: ireg -> ireg -> constant -> instruction            (**r bitwise xor with immediate *)
   | Pxoris: ireg -> ireg -> constant -> instruction           (**r bitwise xor with immediate high *)
   | Plabel: label -> instruction                              (**r define a code label *)
-  | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function *)
-  | Pannot: external_function -> list annot_param -> instruction (**r annotation statement *)
+  | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function (pseudo) *)
+  | Pannot: external_function -> list annot_param -> instruction (**r annotation statement (pseudo) *)
+  | Pcfi_adjust: int -> instruction                           (**r .cfi_adjust debug directive *)
+  | Pcfi_rel_offset: int -> instruction                       (**r .cfi_rel_offset lr debug directive *)
 
 with annot_param : Type :=
   | APreg: preg -> annot_param
@@ -528,6 +561,7 @@ Definition reg_of_crbit (bit: crbit) :=
   | CRbit_1 => CR0_1
   | CRbit_2 => CR0_2
   | CRbit_3 => CR0_3
+  | CRbit_6 => CR1_2
   end.
 
 Definition compare_sint (rs: regset) (v1 v2: val) :=
@@ -564,6 +598,9 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
   match i with
   | Padd rd r1 r2 =>
       Next (nextinstr (rs#rd <- (Val.add rs#r1 rs#r2))) m
+  | Paddc rd r1 r2 =>
+      Next (nextinstr (rs#rd <- (Val.add rs#r1 rs#r2)
+                        #CARRY <- (Val.add_carry rs#r1 rs#r2 Vzero))) m
   | Padde rd r1 r2 =>
       Next (nextinstr (rs #rd <- (Val.add (Val.add rs#r1 rs#r2) rs#CARRY)
                         #CARRY <- (Val.add_carry rs#r1 rs#r2 rs#CARRY))) m
@@ -819,10 +856,44 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
       Next (nextinstr (rs#rd <- (Val.xor rs#r1 (const_high cst)))) m
   | Plabel lbl =>
       Next (nextinstr rs) m
+  | Pcfi_rel_offset ofs =>
+      Next (nextinstr rs) m
   | Pbuiltin ef args res =>
       Stuck    (**r treated specially below *)
   | Pannot ef args =>
       Stuck    (**r treated specially below *)
+  (** The following instructions and directives are not generated directly by Asmgen,
+      so we do not model them. *)
+  | Pbdnz _
+  | Pcntlz _ _
+  | Pcreqv _ _ _
+  | Pcrxor _ _ _
+  | Peieio
+  | Pfctiw _ _
+  | Pfctiwz _ _
+  | Pfmadd _ _ _ _
+  | Pfmsub _ _ _ _
+  | Pfnmadd _ _ _ _
+  | Pfnmsub _ _ _ _
+  | Pfsqrt _ _
+  | Pfrsqrte _ _
+  | Pfres _ _
+  | Pfsel _ _ _ _
+  | Plwbrx _ _ _
+  | Pisync
+  | Plhbrx _ _ _
+  | Plwzu _ _ _
+  | Pmfcr _
+  | Pstwbrx _ _ _
+  | Pstfdu _ _ _
+  | Psthbrx _ _ _
+  | Pstwu _ _ _
+  | Pstwxu _ _ _
+  | Psubfze _ _
+  | Psync
+  | Ptrap
+  | Pcfi_adjust _ =>
+      Stuck
   end.
 
 (** Translation of the LTL/Linear/Mach view of machine registers
diff --git a/powerpc/Asmexpand.ml b/powerpc/Asmexpand.ml
new file mode 100644
index 0000000..243a4d9
--- /dev/null
+++ b/powerpc/Asmexpand.ml
@@ -0,0 +1,525 @@
+(* *********************************************************************)
+(*                                                                     *)
+(*              The Compcert verified compiler                         *)
+(*                                                                     *)
+(*          Xavier Leroy, INRIA Paris-Rocquencourt                     *)
+(*                                                                     *)
+(*  Copyright Institut National de Recherche en Informatique et en     *)
+(*  Automatique.  All rights reserved.  This file is distributed       *)
+(*  under the terms of the INRIA Non-Commercial License Agreement.     *)
+(*                                                                     *)
+(* *********************************************************************)
+
+(* Expanding built-ins and some pseudo-instructions by rewriting
+   of the PPC assembly code. *)
+
+open Datatypes
+open Camlcoq
+open Integers
+open AST
+open Memdata
+open Asm
+
+(* Buffering the expanded code *)
+
+let current_code = ref ([]: instruction list)
+
+let emit i = current_code := i :: !current_code
+
+let emit_loadimm r n =
+  List.iter emit (Asmgen.loadimm r n [])
+
+let emit_addimm rd rs n =
+  List.iter emit (Asmgen.addimm rd rs n [])
+
+let get_code () =
+  let c = List.rev !current_code in current_code := []; c
+
+(* Generation of fresh labels *)
+
+let dummy_function = { fn_code = []; fn_sig = signature_main }
+let current_function = ref dummy_function
+let next_label = ref (None : label option)
+
+let new_label () =
+  let lbl =
+    match !next_label with
+    | Some l -> l
+    | None ->
+        (* on-demand computation of the next available label *)
+        List.fold_left
+          (fun next instr ->
+            match instr with
+            | Plabel l -> if P.lt l next then next else P.succ l
+            | _ -> next)
+          P.one (!current_function).fn_code
+  in
+    next_label := Some (P.succ lbl);
+    lbl
+
+let set_current_function f =
+  current_function := f; next_label := None
+
+(* Useful constants *)
+
+let _0 = Integers.Int.zero
+let _1 = Integers.Int.one
+let _2 = coqint_of_camlint 2l
+let _4 = coqint_of_camlint 4l
+let _6 = coqint_of_camlint 6l
+let _8 = coqint_of_camlint 8l
+let _m4 = coqint_of_camlint (-4l)
+let _m8 = coqint_of_camlint (-8l)
+
+(* Handling of annotations *)
+
+let expand_annot_val txt targ args res =
+  emit (Pannot(EF_annot(txt, [AA_arg targ]), List.map (fun r -> APreg r) args));
+  begin match args, res with
+  | [IR src], [IR dst] ->
+      if dst <> src then emit (Pmr(dst, src))
+  | [FR src], [FR dst] ->
+      if dst <> src then emit (Pfmr(dst, src))
+  | _, _ ->
+      assert false
+  end
+
+(* Handling of memcpy *)
+
+(* On the PowerPC, unaligned accesses to 16- and 32-bit integers are
+   fast, but unaligned accesses to 64-bit floats can be slow
+   (not so much on G5, but clearly so on Power7).
+   So, use 64-bit accesses only if alignment >= 4.
+   Note that lfd and stfd cannot trap on ill-formed floats. *)
+
+let expand_builtin_memcpy_small sz al src dst =
+  let rec copy ofs sz =
+    if sz >= 8 && al >= 4 && !Clflags.option_ffpu then begin
+      emit (Plfd(FPR13, Cint ofs, src));
+      emit (Pstfd(FPR13, Cint ofs, dst));
+      copy (Int.add ofs _8) (sz - 8)
+    end else if sz >= 4 then begin
+      emit (Plwz(GPR0, Cint ofs, src));
+      emit (Pstw(GPR0, Cint ofs, dst));
+      copy (Int.add ofs _4) (sz - 4)
+    end else if sz >= 2 then begin
+      emit (Plhz(GPR0, Cint ofs, src));
+      emit (Psth(GPR0, Cint ofs, dst));
+      copy (Int.add ofs _2) (sz - 2)
+    end else if sz >= 1 then begin
+      emit (Plbz(GPR0, Cint ofs, src));
+      emit (Pstb(GPR0, Cint ofs, dst));
+      copy (Int.add ofs _1) (sz - 1)
+    end in
+  copy _0 sz
+
+let expand_builtin_memcpy_big sz al src dst =
+  assert (sz >= 4);
+  emit_loadimm GPR0 (Z.of_uint (sz / 4));
+  emit (Pmtctr GPR0);
+  let (s,d) = if dst <> GPR11 then (GPR11, GPR12) else (GPR12, GPR11) in
+  emit (Paddi(s, src, Cint _m4));
+  emit (Paddi(d, dst, Cint _m4));
+  let lbl = new_label() in
+  emit (Plabel lbl);
+  emit (Plwzu(GPR0, Cint _4, s));
+  emit (Pstwu(GPR0, Cint _4, d));
+  emit (Pbdnz lbl);
+  (* s and d lag behind by 4 bytes *)
+  match sz land 3 with
+  | 1 -> emit (Plbz(GPR0, Cint _4, s));
+         emit (Pstb(GPR0, Cint _4, d))
+  | 2 -> emit (Plhz(GPR0, Cint _4, s));
+         emit (Psth(GPR0, Cint _4, d))
+  | 3 -> emit (Plhz(GPR0, Cint _4, s));
+         emit (Psth(GPR0, Cint _4, d));
+         emit (Plbz(GPR0, Cint _6, s));
+         emit (Pstb(GPR0, Cint _6, d))
+  | _ -> ()
+
+let expand_builtin_memcpy sz al args =
+  let (dst, src) =
+    match args with [IR d; IR s] -> (d, s) | _ -> assert false in
+  if sz <= (if !Clflags.option_ffpu && al >= 4
+            then if !Clflags.option_Osize then 35 else 51
+	    else if !Clflags.option_Osize then 19 else 27)
+  then expand_builtin_memcpy_small sz al src dst
+  else expand_builtin_memcpy_big sz al src dst
+
+(* Handling of volatile reads and writes *)
+
+let expand_builtin_vload_common chunk base offset res =
+  match chunk, res with
+  | Mint8unsigned, IR res ->
+      emit (Plbz(res, offset, base))
+  | Mint8signed, IR res ->
+      emit (Plbz(res, offset, base));
+      emit (Pextsb(res, res))
+  | Mint16unsigned, IR res ->
+      emit (Plhz(res, offset, base))
+  | Mint16signed, IR res ->
+      emit (Plha(res, offset, base))
+  | (Mint32 | Many32), IR res ->
+      emit (Plwz(res, offset, base))
+  | Mfloat32, FR res ->
+      emit (Plfs(res, offset, base))
+  | (Mfloat64 | Many64), FR res ->
+      emit (Plfd(res, offset, base))
+  (* Mint64 is special-cased below *)
+  | _ ->
+      assert false
+
+let expand_builtin_vload chunk args res =
+  begin match args, res with
+  | [IR addr], [res] when chunk <> Mint64 ->
+      expand_builtin_vload_common chunk addr (Cint _0) res
+  | [IR addr], [IR res1; IR res2] when chunk = Mint64 ->
+      if addr <> res1 then begin
+        emit (Plwz(res1, Cint _0, addr));
+        emit (Plwz(res2, Cint _4, addr))
+      end else begin
+        emit (Plwz(res2, Cint _4, addr));
+        emit (Plwz(res1, Cint _0, addr))
+      end
+  | _ ->
+      assert false
+  end
+
+let expand_builtin_vload_global chunk id ofs args res =
+  begin match res with
+  | [res] when chunk <> Mint64 ->
+      emit (Paddis(GPR11, GPR0, Csymbol_high(id, ofs)));
+      expand_builtin_vload_common chunk GPR11 (Csymbol_low(id, ofs)) res
+  | [IR res1; IR res2] when chunk = Mint64 ->
+      emit (Paddis(res1, GPR0, Csymbol_high(id, ofs)));
+      emit (Plwz(res1, Csymbol_low(id, ofs), res1));
+      let ofs = Int.add ofs _4 in
+      emit (Paddis(res2, GPR0, Csymbol_high(id, ofs)));
+      emit (Plwz(res2, Csymbol_low(id, ofs), res2))
+  | _ ->
+      assert false
+  end
+
+let expand_builtin_vload_sda chunk id ofs args res =
+  begin match res with
+  | [res] when chunk <> Mint64 ->
+      expand_builtin_vload_common chunk GPR0 (Csymbol_sda(id, ofs)) res
+  | [IR res1; IR res2] when chunk = Mint64 ->
+      emit (Plwz(res1, Csymbol_sda(id, ofs), GPR0));
+      let ofs = Int.add ofs _4 in
+      emit (Plwz(res2, Csymbol_sda(id, ofs), GPR0))
+  | _ ->
+      assert false
+  end
+
+let expand_builtin_vstore_common chunk base offset src =
+  match chunk, src with
+  | (Mint8signed | Mint8unsigned), IR src ->
+      emit (Pstb(src, offset, base))
+  | (Mint16signed | Mint16unsigned), IR src ->
+      emit (Psth(src, offset, base))
+  | (Mint32 | Many32), IR src ->
+      emit (Pstw(src, offset, base))
+  | Mfloat32, FR src ->
+      emit (Pstfs(src, offset, base))
+  | (Mfloat64 | Many64), FR src ->
+      emit (Pstfd(src, offset, base))
+  (* Mint64 is special-cased below *)
+  | _ ->
+      assert false
+
+let expand_builtin_vstore chunk args =
+  begin match args with
+  | [IR addr; src] when chunk <> Mint64 ->
+      expand_builtin_vstore_common chunk addr (Cint _0) src
+  | [IR addr; IR src1; IR src2] when chunk = Mint64 ->
+      emit (Pstw(src1, Cint _0, addr));
+      emit (Pstw(src2, Cint _4, addr))
+  | _ ->
+      assert false
+  end
+
+let expand_builtin_vstore_global chunk id ofs args =
+  begin match args with
+  | [src] when chunk <> Mint64 ->
+      let tmp = if src = IR GPR11 then GPR12 else GPR11 in
+      emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
+      expand_builtin_vstore_common chunk tmp (Csymbol_low(id, ofs)) src
+  | [IR src1; IR src2] when chunk = Mint64 ->
+      let tmp =
+        if not (List.mem GPR12 [src1; src2]) then GPR12 else
+        if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in
+      emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
+      emit (Pstw(src1, Csymbol_low(id, ofs), tmp));
+      let ofs = Int.add ofs _4 in
+      emit (Paddis(tmp, GPR0, Csymbol_high(id, ofs)));
+      emit (Pstw(src2, Csymbol_low(id, ofs), tmp))
+  | _ ->
+      assert false
+  end
+
+let expand_builtin_vstore_sda chunk id ofs args =
+  begin match args with
+  | [src] when chunk <> Mint64 ->
+      expand_builtin_vstore_common chunk GPR0 (Csymbol_sda(id, ofs)) src
+  | [IR src1; IR src2] when chunk = Mint64 ->
+      emit (Pstw(src1, Csymbol_sda(id, ofs), GPR0));
+      emit (Pstw(src2, Csymbol_sda(id, ofs), GPR0))
+  | _ ->
+      assert false
+  end
+
+(* Handling of varargs *)
+
+let current_function_stacksize = ref 0l
+
+let align n a = (n + a - 1) land (-a)
+
+let rec next_arg_locations ir fr ofs = function
+  | [] ->
+      (ir, fr, ofs)
+  | (Tint | Tany32) :: l ->
+      if ir < 8
+      then next_arg_locations (ir + 1) fr ofs l
+      else next_arg_locations ir fr (ofs + 4) l
+  | (Tfloat | Tsingle | Tany64) :: l ->
+      if fr < 8
+      then next_arg_locations ir (fr + 1) ofs l
+      else next_arg_locations ir fr (align ofs 8 + 8) l
+  | Tlong :: l ->
+      if ir < 7
+      then next_arg_locations (align ir 2 + 2) fr ofs l
+      else next_arg_locations ir fr (align ofs 8 + 8) l
+
+let expand_builtin_va_start r =
+  if not (!current_function).fn_sig.sig_cc.cc_vararg then
+    invalid_arg "Fatal error: va_start used in non-vararg function";
+  let (ir, fr, ofs) =
+    next_arg_locations 0 0 0 (!current_function).fn_sig.sig_args in
+  emit_loadimm GPR0 (Z.of_uint ir);
+  emit (Pstb(GPR0, Cint _0, r));
+  emit_loadimm GPR0 (Z.of_uint fr);
+  emit (Pstb(GPR0, Cint _1, r));
+  emit_addimm GPR0 GPR1 (coqint_of_camlint
+                           Int32.(add (add !current_function_stacksize 8l)
+                                      (of_int ofs)));
+  emit (Pstw(GPR0, Cint _4, r));
+  emit_addimm GPR0 GPR1 (coqint_of_camlint
+                           Int32.(sub !current_function_stacksize 96l));
+  emit (Pstw(GPR0, Cint _8, r))
+
+(* Auxiliary for 64-bit integer arithmetic built-ins.  They expand to
+   two instructions, one computing the low 32 bits of the result,
+   followed by another computing the high 32 bits.  In cases where
+   the first instruction would overwrite arguments to the second
+   instruction, we must go through GPR0 to hold the low 32 bits of the result.
+*)
+
+let expand_int64_arith conflict rl fn =
+  if conflict then (fn GPR0; emit (Pmr(rl, GPR0))) else fn rl
+
+(* Handling of compiler-inlined builtins *)
+
+let expand_builtin_inline name args res =
+  (* Can use as temporaries: GPR0, FPR13 *)
+  match name, args, res with
+  (* Integer arithmetic *)
+  | "__builtin_mulhw", [IR a1; IR a2], [IR res] ->
+      emit (Pmulhw(res, a1, a2))
+  | "__builtin_mulhwu", [IR a1; IR a2], [IR res] ->
+      emit (Pmulhwu(res, a1, a2))
+  | "__builtin_cntlz", [IR a1], [IR res] ->
+      emit (Pcntlz(res, a1))
+  | ("__builtin_bswap" | "__builtin_bswap32"), [IR a1], [IR res] ->
+      emit (Pstwu(a1, Cint _m8, GPR1));
+      emit (Pcfi_adjust _8);
+      emit (Plwbrx(res, GPR0, GPR1));
+      emit (Paddi(GPR1, GPR1, Cint _8));
+      emit (Pcfi_adjust _m8)
+  | "__builtin_bswap16", [IR a1], [IR res] ->
+      emit (Prlwinm(GPR0, a1, _8, coqint_of_camlint 0x0000FF00l));
+      emit (Prlwinm(res, a1, coqint_of_camlint 24l,
+                                  coqint_of_camlint 0x000000FFl));
+      emit (Por(res, GPR0, res))
+  (* Float arithmetic *)
+  | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] ->
+      emit (Pfmadd(res, a1, a2, a3))
+  | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] ->
+      emit (Pfmsub(res, a1, a2, a3))
+  | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] ->
+      emit (Pfnmadd(res, a1, a2, a3))
+  | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] ->
+      emit (Pfnmsub(res, a1, a2, a3))
+  | "__builtin_fabs", [FR a1], [FR res] ->
+      emit (Pfabs(res, a1))
+  | "__builtin_fsqrt", [FR a1], [FR res] ->
+      emit (Pfsqrt(res, a1))
+  | "__builtin_frsqrte", [FR a1], [FR res] ->
+      emit (Pfrsqrte(res, a1))
+  | "__builtin_fres", [FR a1], [FR res] ->
+      emit (Pfres(res, a1))
+  | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] ->
+      emit (Pfsel(res, a1, a2, a3))
+  | "__builtin_fcti", [FR a1], [IR res] ->
+      emit (Pfctiw(FPR13, a1));
+      emit (Pstfdu(FPR13, Cint _m8, GPR1));
+      emit (Pcfi_adjust _8);
+      emit (Plwz(res, Cint _4, GPR1));
+      emit (Paddi(GPR1, GPR1, Cint _8));
+      emit (Pcfi_adjust _m8)
+  (* 64-bit integer arithmetic *)
+  | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
+      expand_int64_arith (rl = ah) rl (fun rl' ->
+        emit (Psubfic(rl', al, Cint _0));
+        emit (Psubfze(rh, ah)))
+  | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      expand_int64_arith (rl = ah || rl = bh) rl (fun rl' ->
+        emit (Paddc(rl', al, bl));
+        emit (Padde(rh, ah, bh)))
+  | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      expand_int64_arith (rl = ah || rl = bh) rl (fun rl' ->
+        emit (Psubfc(rl', bl, al));
+        emit (Psubfe(rh, bh, ah)))
+  | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
+      expand_int64_arith (rl = a || rl = b) rl (fun rl' ->
+        emit (Pmullw(rl, a, b));
+        emit (Pmulhwu(rh, a, b)))
+  (* Memory accesses *)
+  | "__builtin_read16_reversed", [IR a1], [IR res] ->
+      emit (Plhbrx(res, GPR0, a1))
+  | "__builtin_read32_reversed", [IR a1], [IR res] ->
+      emit (Plwbrx(res, GPR0, a1))
+  | "__builtin_write16_reversed", [IR a1; IR a2], _ ->
+      emit (Psthbrx(a2, GPR0, a1))
+  | "__builtin_write32_reversed", [IR a1; IR a2], _ ->
+      emit (Pstwbrx(a2, GPR0, a1))
+  (* Synchronization *)
+  | "__builtin_eieio", [], _ ->
+      emit (Peieio)
+  | "__builtin_sync", [], _ ->
+      emit (Psync)
+  | "__builtin_isync", [], _ ->
+      emit (Pisync)
+  | "__builtin_trap", [], _ ->
+      emit (Ptrap)
+  (* Vararg stuff *)
+  | "__builtin_va_start", [IR a], _ ->
+      expand_builtin_va_start a
+  (* Catch-all *)
+  | _ ->
+      invalid_arg ("unrecognized builtin " ^ name)
+
+(* Calls to variadic functions: condition bit 6 must be set
+   if at least one argument is a float; clear otherwise.
+   Note that variadic functions cannot have arguments of type Tsingle. *)
+
+let set_cr6 sg =
+  if sg.sig_cc.cc_vararg then begin
+    if List.mem Tfloat sg.sig_args
+    then emit (Pcreqv(CRbit_6, CRbit_6, CRbit_6))
+    else emit (Pcrxor(CRbit_6, CRbit_6, CRbit_6))
+  end
+
+(* Expand instructions *)
+
+let num_crbit = function
+  | CRbit_0 -> 0
+  | CRbit_1 -> 1
+  | CRbit_2 -> 2
+  | CRbit_3 -> 3
+  | CRbit_6 -> 6
+
+let expand_instruction instr =
+  match instr with
+  | Pallocframe(sz, ofs) ->
+      let variadic = (!current_function).fn_sig.sig_cc.cc_vararg in
+      let sz = camlint_of_coqint sz in
+      assert (ofs = Int.zero);
+      let sz = if variadic then Int32.add sz 96l else sz in
+      let adj = Int32.neg sz in
+      if adj >= -0x8000l then
+        emit (Pstwu(GPR1, Cint(coqint_of_camlint adj), GPR1))
+      else begin
+        emit_loadimm GPR0 (coqint_of_camlint adj);
+        emit (Pstwxu(GPR1, GPR1, GPR0))
+      end;
+      emit (Pcfi_adjust (coqint_of_camlint sz));
+      if variadic then begin
+        emit (Pmflr GPR0);
+        emit (Pbl(intern_string "__compcert_va_saveregs",
+                  {sig_args = []; sig_res = None; sig_cc = cc_default}));
+        emit (Pmtlr GPR0)
+      end;
+      current_function_stacksize := sz
+  | Pbctr sg | Pbctrl sg | Pbl(_, sg) | Pbs(_, sg) ->
+      set_cr6 sg;
+      emit instr
+  | Pfreeframe(sz, ofs) ->
+      let variadic = (!current_function).fn_sig.sig_cc.cc_vararg in
+      let sz = camlint_of_coqint sz in
+      let sz = if variadic then Int32.add sz 96l else sz in
+      if sz < 0x8000l then
+        emit (Paddi(GPR1, GPR1, Cint(coqint_of_camlint sz)))
+      else
+        emit (Plwz(GPR1, Cint ofs, GPR1))
+  | Pfcti(r1, r2) ->
+      emit (Pfctiwz(FPR13, r2));
+      emit (Pstfdu(FPR13, Cint _m8, GPR1));
+      emit (Pcfi_adjust _8);
+      emit (Plwz(r1, Cint _4, GPR1));
+      emit (Paddi(GPR1, GPR1, Cint _8));
+      emit (Pcfi_adjust _m8)
+  | Pfmake(rd, r1, r2) ->
+      emit (Pstwu(r1, Cint _m8, GPR1));
+      emit (Pcfi_adjust _8);
+      emit (Pstw(r2, Cint _4, GPR1));
+      emit (Plfd(rd, Cint _0, GPR1));
+      emit (Paddi(GPR1, GPR1, Cint _8));
+      emit (Pcfi_adjust _m8);
+  | Pfxdp(r1, r2) ->
+      if r1 <> r2 then emit(Pfmr(r1, r2))
+  | Pmfcrbit(r1, bit) ->
+      emit (Pmfcr r1);
+      emit (Prlwinm(r1, r1, Z.of_uint (1 + num_crbit bit), _1))
+  | Pbuiltin(ef, args, res) ->
+      begin match ef with
+      | EF_builtin(name, sg) ->
+          expand_builtin_inline (extern_atom name) args res
+      | EF_vload chunk ->
+          expand_builtin_vload chunk args res
+      | EF_vstore chunk ->
+          expand_builtin_vstore chunk args
+      | EF_vload_global(chunk, id, ofs) ->
+          if symbol_is_small_data id ofs
+          then expand_builtin_vload_sda chunk id ofs args res
+          else expand_builtin_vload_global chunk id ofs args res
+      | EF_vstore_global(chunk, id, ofs) ->
+          if symbol_is_small_data id ofs
+          then expand_builtin_vstore_sda chunk id ofs args
+          else expand_builtin_vstore_global chunk id ofs args
+      | EF_memcpy(sz, al) ->
+          expand_builtin_memcpy (Z.to_int sz) (Z.to_int al) args
+      | EF_annot_val(txt, targ) ->
+          expand_annot_val txt targ args res
+      | EF_inline_asm txt ->
+          emit instr
+      | _ ->
+          assert false
+      end
+  | _ ->
+      emit instr
+
+let expand_function fn =
+  set_current_function fn;
+  current_code := [];
+  List.iter expand_instruction fn.fn_code;
+  let c = get_code() in
+  set_current_function dummy_function;
+  { fn with fn_code = c }
+
+let expand_fundef = function
+  | Internal f -> Internal (expand_function f)
+  | External ef -> External ef
+
+let expand_program (p: Asm.program) : Asm.program =
+  AST.transform_program expand_fundef p
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index 5ca770d..5c4ffde 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -721,7 +721,8 @@ Definition transl_function (f: Mach.function) :=
   OK (mkfunction f.(Mach.fn_sig)
        (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
         Pmflr GPR0 ::
-        Pstw GPR0 (Cint f.(fn_retaddr_ofs)) GPR1 :: c)).
+        Pstw GPR0 (Cint f.(fn_retaddr_ofs)) GPR1 ::
+        Pcfi_rel_offset f.(fn_retaddr_ofs) :: c)).
 
 Definition transf_function (f: Mach.function) : res Asm.function :=
   do tf <- transl_function f;
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index 913fb50..2b52fe0 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -918,30 +918,36 @@ Local Transparent destroyed_by_jumptable.
   set (rs2 := nextinstr (rs0#GPR1 <- sp #GPR0 <- Vundef)).
   set (rs3 := nextinstr (rs2#GPR0 <- (rs0#LR))).
   set (rs4 := nextinstr rs3).
+  set (rs5 := nextinstr rs4).
   assert (EXEC_PROLOGUE:
             exec_straight tge x
               x.(fn_code) rs0 m'
-              x1 rs4 m3').
+              x1 rs5 m3').
   rewrite <- H5 at 2. simpl. 
-  apply exec_straight_three with rs2 m2' rs3 m2'.
+  apply exec_straight_step with rs2 m2'.
   unfold exec_instr. rewrite C. fold sp.
-  rewrite <- (sp_val _ _ _ AG). rewrite F. auto. 
-  simpl. auto.
+  rewrite <- (sp_val _ _ _ AG). rewrite F. auto. auto.
+  apply exec_straight_step with rs3 m2'.
+  simpl. auto. auto.
+  apply exec_straight_two with rs4 m3'.
   simpl. unfold store1. rewrite gpr_or_zero_not_zero. 
   change (rs3 GPR1) with sp. change (rs3 GPR0) with (rs0 LR). simpl. 
   rewrite Int.add_zero_l. simpl in P. rewrite Int.add_zero_l in P. rewrite ATLR. rewrite P. auto. congruence.
   auto. auto. auto.
-  left; exists (State rs4 m3'); split.
+  left; exists (State rs5 m3'); split.
   eapply exec_straight_steps_1; eauto. omega. constructor. 
   econstructor; eauto. 
-  change (rs4 PC) with (Val.add (Val.add (Val.add (rs0 PC) Vone) Vone) Vone). 
+  change (rs5 PC) with (Val.add (Val.add (Val.add (Val.add (rs0 PC) Vone) Vone) Vone) Vone).
   rewrite ATPC. simpl. constructor; eauto.
-  subst x; simpl in g. unfold fn_code. eapply code_tail_next_int. omega. 
+  subst x; simpl in g. unfold fn_code.
   eapply code_tail_next_int. omega. 
   eapply code_tail_next_int. omega. 
+  eapply code_tail_next_int. omega.
+  eapply code_tail_next_int. omega.
   constructor.
-  unfold rs4, rs3, rs2.
-  apply agree_nextinstr. apply agree_set_other; auto. apply agree_set_other; auto. 
+  unfold rs5, rs4, rs3, rs2.
+  apply agree_nextinstr. apply agree_nextinstr. 
+  apply agree_set_other; auto. apply agree_set_other; auto. 
   apply agree_nextinstr. apply agree_set_other; auto.
   eapply agree_change_sp; eauto. unfold sp; congruence.
   congruence.
diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml
index e3f0724..691ecfb 100644
--- a/powerpc/PrintAsm.ml
+++ b/powerpc/PrintAsm.ml
@@ -102,6 +102,7 @@ let num_crbit = function
   | CRbit_1 -> 1
   | CRbit_2 -> 2
   | CRbit_3 -> 3
+  | CRbit_6 -> 6
 
 let crbit oc bit =
   fprintf oc "%d" (num_crbit bit)
@@ -287,16 +288,6 @@ let rolm_mask n =
   assert (!count = 2 || (!count = 0 && !last));
   (!mb, !me-1)
 
-(* Built-ins.  They come in three flavors: 
-   - annotation statements: take their arguments in registers or stack
-     locations; generate no code;
-   - inlined by the compiler: take their arguments in arbitrary
-     registers; preserve all registers except the reserved temporaries
-     (GPR0, GPR11, GPR12, FPR13);
-   - inlined while printing asm code; take their arguments in
-     locations dictated by the calling conventions; preserve
-     callee-save regs only. *)
-
 (* Handling of annotations *)
 
 let re_file_line = Str.regexp "#line:\\(.*\\):\\([1-9][0-9]*\\)$"
@@ -309,361 +300,6 @@ let print_annot_stmt oc txt targs args =
     PrintAnnot.print_annot_stmt preg "R1" oc txt targs args
   end
 
-let print_annot_val oc txt args res =
-  fprintf oc "%s annotation: " comment;
-  PrintAnnot.print_annot_val preg oc txt args;
-  match args, res with
-  | [IR src], [IR dst] ->
-      if dst <> src then fprintf oc "	mr	%a, %a\n" ireg dst ireg src 
-  | [FR src], [FR dst] ->
-      if dst <> src then fprintf oc "	fmr	%a, %a\n" freg dst freg src 
-  | _, _ ->
-      assert false
-
-(* Handling of memcpy *)
-
-(* On the PowerPC, unaligned accesses to 16- and 32-bit integers are
-   fast, but unaligned accesses to 64-bit floats can be slow
-   (not so much on G5, but clearly so on Power7).
-   So, use 64-bit accesses only if alignment >= 4.
-   Note that lfd and stfd cannot trap on ill-formed floats. *)
-
-let print_builtin_memcpy_small oc sz al src dst =
-  let rec copy ofs sz =
-    if sz >= 8 && al >= 4 && !Clflags.option_ffpu then begin
-      fprintf oc "	lfd	%a, %d(%a)\n" freg FPR13 ofs ireg src;
-      fprintf oc "	stfd	%a, %d(%a)\n" freg FPR13 ofs ireg dst;
-      copy (ofs + 8) (sz - 8)
-    end else if sz >= 4 then begin
-      fprintf oc "	lwz	%a, %d(%a)\n" ireg GPR0 ofs ireg src;
-      fprintf oc "	stw	%a, %d(%a)\n" ireg GPR0 ofs ireg dst;
-      copy (ofs + 4) (sz - 4)
-    end else if sz >= 2 then begin
-      fprintf oc "	lhz	%a, %d(%a)\n" ireg GPR0 ofs ireg src;
-      fprintf oc "	sth	%a, %d(%a)\n" ireg GPR0 ofs ireg dst;
-      copy (ofs + 2) (sz - 2)
-    end else if sz >= 1 then begin
-      fprintf oc "	lbz	%a, %d(%a)\n" ireg GPR0 ofs ireg src;
-      fprintf oc "	stb	%a, %d(%a)\n" ireg GPR0 ofs ireg dst;
-      copy (ofs + 1) (sz - 1)
-    end in
-  copy 0 sz
-
-let print_builtin_memcpy_big oc sz al src dst =
-  assert (sz >= 4);
-  fprintf oc "	li	%a, %d\n" ireg GPR0 (sz / 4);
-  fprintf oc "	mtctr	%a\n" ireg GPR0;
-  let (s,d) = if dst <> GPR11 then (GPR11, GPR12) else (GPR12, GPR11) in
-  fprintf oc "	addi	%a, %a, -4\n" ireg s ireg src;
-  fprintf oc "	addi	%a, %a, -4\n" ireg d ireg dst;
-  let lbl = new_label() in
-  fprintf oc "%a:	lwzu	%a, 4(%a)\n" label lbl ireg GPR0 ireg s;
-  fprintf oc "	stwu	%a, 4(%a)\n" ireg GPR0 ireg d;
-  fprintf oc "	bdnz	%a\n" label lbl;
-  (* s and d lag behind by 4 bytes *)
-  match sz land 3 with
-  | 1 -> fprintf oc "	lbz	%a, 4(%a)\n" ireg GPR0 ireg s;
-         fprintf oc "	stb	%a, 4(%a)\n" ireg GPR0 ireg d
-  | 2 -> fprintf oc "	lhz	%a, 4(%a)\n" ireg GPR0 ireg s;
-         fprintf oc "	sth	%a, 4(%a)\n" ireg GPR0 ireg d
-  | 3 -> fprintf oc "	lhz	%a, 4(%a)\n" ireg GPR0 ireg s;
-         fprintf oc "	sth	%a, 4(%a)\n" ireg GPR0 ireg d;
-         fprintf oc "	lbz	%a, 6(%a)\n" ireg GPR0 ireg s;
-         fprintf oc "	stb	%a, 6(%a)\n" ireg GPR0 ireg d
-  | _ -> ()
-
-let print_builtin_memcpy oc sz al args =
-  let (dst, src) =
-    match args with [IR d; IR s] -> (d, s) | _ -> assert false in
-  fprintf oc "%s begin builtin __builtin_memcpy_aligned, size = %d, alignment = %d\n"
-          comment sz al;
-  if sz <= (if !Clflags.option_ffpu then 48 else 24)
-  then print_builtin_memcpy_small oc sz al src dst
-  else print_builtin_memcpy_big oc sz al src dst;
-  fprintf oc "%s end builtin __builtin_memcpy_aligned\n" comment
-
-(* Handling of volatile reads and writes *)
-
-let print_builtin_vload_common oc chunk base offset res =
-  match chunk, res with
-  | Mint8unsigned, IR res ->
-      fprintf oc "	lbz	%a, %a(%a)\n" ireg res constant offset ireg base
-  | Mint8signed, IR res ->
-      fprintf oc "	lbz	%a, %a(%a)\n" ireg res constant offset ireg base;
-      fprintf oc "	extsb	%a, %a\n" ireg res ireg res
-  | Mint16unsigned, IR res ->
-      fprintf oc "	lhz	%a, %a(%a)\n" ireg res constant offset ireg base
-  | Mint16signed, IR res ->
-      fprintf oc "	lha	%a, %a(%a)\n" ireg res constant offset ireg base
-  | (Mint32 | Many32), IR res ->
-      fprintf oc "	lwz	%a, %a(%a)\n" ireg res constant offset ireg base
-  | Mfloat32, FR res ->
-      fprintf oc "	lfs	%a, %a(%a)\n" freg res constant offset ireg base
-  | (Mfloat64 | Many64), FR res ->
-      fprintf oc "	lfd	%a, %a(%a)\n" freg res constant offset ireg base
-  (* Mint64 is special-cased below *)
-  | _ ->
-      assert false
-
-let print_builtin_vload oc chunk args res =
-  fprintf oc "%s begin builtin __builtin_volatile_read\n" comment;
-  begin match args, res with
-  | [IR addr], [res] when chunk <> Mint64 ->
-      print_builtin_vload_common oc chunk addr (Cint Integers.Int.zero) res
-  | [IR addr], [IR res1; IR res2] when chunk = Mint64 ->
-      if addr <> res1 then begin
-        fprintf oc "	lwz	%a, 0(%a)\n" ireg res1 ireg addr;
-        fprintf oc "	lwz	%a, 4(%a)\n" ireg res2 ireg addr
-      end else begin
-        fprintf oc "	lwz	%a, 4(%a)\n" ireg res2 ireg addr;
-        fprintf oc "	lwz	%a, 0(%a)\n" ireg res1 ireg addr
-      end
-  | _ ->
-      assert false
-  end;
-  fprintf oc "%s end builtin __builtin_volatile_read\n" comment
-
-let print_builtin_vload_global oc chunk id ofs args res =
-  fprintf oc "%s begin builtin __builtin_volatile_read\n" comment;
-  begin match res with
-  | [res] when chunk <> Mint64 ->
-      fprintf oc "	addis	%a, %a, %a\n"
-                 ireg GPR11 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
-      print_builtin_vload_common oc chunk GPR11 (Csymbol_low(id, ofs)) res
-  | [IR res1; IR res2] when chunk = Mint64 ->
-      fprintf oc "	addis	%a, %a, %a\n"
-                 ireg res1 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
-      fprintf oc "	lwz	%a, %a(%a)\n"
-                 ireg res1 constant (Csymbol_low(id, ofs)) ireg res1;
-      let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in
-      fprintf oc "	addis	%a, %a, %a\n"
-                 ireg res2 ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
-      fprintf oc "	lwz	%a, %a(%a)\n"
-                 ireg res2 constant (Csymbol_low(id, ofs)) ireg res2
-  | _ ->
-      assert false
-  end;
-  fprintf oc "%s end builtin __builtin_volatile_read\n" comment
-
-let print_builtin_vstore_common oc chunk base offset src =
-  match chunk, src with
-  | (Mint8signed | Mint8unsigned), IR src ->
-      fprintf oc "	stb	%a, %a(%a)\n" ireg src constant offset ireg base
-  | (Mint16signed | Mint16unsigned), IR src ->
-      fprintf oc "	sth	%a, %a(%a)\n" ireg src constant offset ireg base
-  | (Mint32 | Many32), IR src ->
-      fprintf oc "	stw	%a, %a(%a)\n" ireg src constant offset ireg base
-  | Mfloat32, FR src ->
-      fprintf oc "	stfs	%a, %a(%a)\n" freg src constant offset ireg base
-  | (Mfloat64 | Many64), FR src ->
-      fprintf oc "	stfd	%a, %a(%a)\n" freg src constant offset ireg base
-  (* Mint64 is special-cased below *)
-  | _ ->
-      assert false
-
-let print_builtin_vstore oc chunk args =
-  fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
-  begin match args with
-  | [IR addr; src] when chunk <> Mint64 ->
-      print_builtin_vstore_common oc chunk addr (Cint Integers.Int.zero) src
-  | [IR addr; IR src1; IR src2] when chunk = Mint64 ->
-      fprintf oc "	stw	%a, 0(%a)\n" ireg src1 ireg addr;
-      fprintf oc "	stw	%a, 4(%a)\n" ireg src2 ireg addr
-  | _ ->
-      assert false
-  end;
-  fprintf oc "%s end builtin __builtin_volatile_write\n" comment
-
-let print_builtin_vstore_global oc chunk id ofs args =
-  fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
-  begin match args with
-  | [src] when chunk <> Mint64 ->
-      let tmp = if src = IR GPR11 then GPR12 else GPR11 in
-      fprintf oc "	addis	%a, %a, %a\n"
-                 ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
-      print_builtin_vstore_common oc chunk tmp (Csymbol_low(id, ofs)) src
-  | [IR src1; IR src2] when chunk = Mint64 ->
-      let tmp =
-        if not (List.mem GPR12 [src1; src2]) then GPR12 else
-        if not (List.mem GPR11 [src1; src2]) then GPR11 else GPR10 in
-      fprintf oc "	addis	%a, %a, %a\n"
-                 ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
-      fprintf oc "	stw	%a, %a(%a)\n"
-                 ireg src1 constant (Csymbol_low(id, ofs)) ireg tmp;
-      let ofs = Integers.Int.add ofs (coqint_of_camlint 4l) in
-      fprintf oc "	addis	%a, %a, %a\n"
-                 ireg tmp ireg_or_zero GPR0 constant (Csymbol_high(id, ofs));
-      fprintf oc "	stw	%a, %a(%a)\n"
-                 ireg src2 constant (Csymbol_low(id, ofs)) ireg tmp
-  | _ ->
-      assert false
-  end;
-  fprintf oc "%s end builtin __builtin_volatile_write\n" comment
-
-(* Handling of varargs *)
-
-let current_function_stacksize = ref 0l
-let current_function_sig =
-  ref { sig_args = []; sig_res = None; sig_cc = cc_default }
-
-let align n a = (n + a - 1) land (-a)
-
-let rec next_arg_locations ir fr ofs = function
-  | [] ->
-      (ir, fr, ofs)
-  | (Tint | Tany32) :: l ->
-      if ir < 8
-      then next_arg_locations (ir + 1) fr ofs l
-      else next_arg_locations ir fr (ofs + 4) l
-  | (Tfloat | Tsingle | Tany64) :: l ->
-      if fr < 8
-      then next_arg_locations ir (fr + 1) ofs l
-      else next_arg_locations ir fr (align ofs 8 + 8) l
-  | Tlong :: l ->
-      if ir < 7
-      then next_arg_locations (align ir 2 + 2) fr ofs l
-      else next_arg_locations ir fr (align ofs 8 + 8) l
-
-let print_builtin_va_start oc r =
-  if not (!current_function_sig).sig_cc.cc_vararg then
-    invalid_arg "Fatal error: va_start used in non-vararg function";
-  let (ir, fr, ofs) =
-    next_arg_locations 0 0 0 (!current_function_sig).sig_args in
-  fprintf oc "	li	%a, %d\n" ireg GPR0 ir;
-  fprintf oc "	stb     %a, 0(%a)\n" ireg GPR0 ireg r;
-  fprintf oc "	li	%a, %d\n" ireg GPR0 fr;
-  fprintf oc "	stb     %a, 1(%a)\n" ireg GPR0 ireg r;
-  fprintf oc "	addi	%a, %a, %ld\n" ireg GPR0 ireg GPR1
-                             Int32.(add (add !current_function_stacksize 8l)
-                                        (of_int ofs));
-  fprintf oc "	stw	%a, 4(%a)\n" ireg GPR0 ireg r;
-  fprintf oc "	addi	%a, %a, %ld\n" ireg GPR0 ireg GPR1
-                             Int32.(sub !current_function_stacksize 96l);
-  fprintf oc "	stw	%a, 8(%a)\n" ireg GPR0 ireg r
-
-(* Handling of compiler-inlined builtins *)
-
-let print_builtin_inline oc name args res =
-  fprintf oc "%s begin builtin %s\n" comment name;
-  (* Can use as temporaries: GPR0, FPR13 *)
-  begin match name, args, res with
-  (* Integer arithmetic *)
-  | "__builtin_mulhw", [IR a1; IR a2], [IR res] ->
-      fprintf oc "	mulhw	%a, %a, %a\n" ireg res ireg a1 ireg a2
-  | "__builtin_mulhwu", [IR a1; IR a2], [IR res] ->
-      fprintf oc "	mulhwu	%a, %a, %a\n" ireg res ireg a1 ireg a2
-  | "__builtin_cntlz", [IR a1], [IR res] ->
-      fprintf oc "	cntlzw	%a, %a\n" ireg res ireg a1
-  | ("__builtin_bswap" | "__builtin_bswap32"), [IR a1], [IR res] ->
-      fprintf oc "	stwu	%a, -8(%a)\n" ireg a1 ireg GPR1;
-      cfi_adjust oc 8l;
-      fprintf oc "	lwbrx	%a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg GPR1;
-      fprintf oc "	addi	%a, %a, 8\n" ireg GPR1 ireg GPR1;
-      cfi_adjust oc (-8l)
-  | "__builtin_bswap16", [IR a1], [IR res] ->
-      fprintf oc "	rlwinm	%a, %a, 8, 16, 23\n" ireg GPR0 ireg a1;
-      fprintf oc "	rlwinm	%a, %a, 24, 24, 31\n" ireg res ireg a1;
-      fprintf oc "	or	%a, %a, %a\n" ireg res ireg GPR0 ireg res
-  (* Float arithmetic *)
-  | "__builtin_fmadd", [FR a1; FR a2; FR a3], [FR res] ->
-      fprintf oc "	fmadd	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fmsub", [FR a1; FR a2; FR a3], [FR res] ->
-      fprintf oc "	fmsub	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fnmadd", [FR a1; FR a2; FR a3], [FR res] ->
-      fprintf oc "	fnmadd	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fnmsub", [FR a1; FR a2; FR a3], [FR res] ->
-      fprintf oc "	fnmsub	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fabs", [FR a1], [FR res] ->
-      fprintf oc "	fabs	%a, %a\n" freg res freg a1
-  | "__builtin_fsqrt", [FR a1], [FR res] ->
-      fprintf oc "	fsqrt	%a, %a\n" freg res freg a1
-  | "__builtin_frsqrte", [FR a1], [FR res] ->
-      fprintf oc "	frsqrte	%a, %a\n" freg res freg a1
-  | "__builtin_fres", [FR a1], [FR res] ->
-      fprintf oc "	fres	%a, %a\n" freg res freg a1
-  | "__builtin_fsel", [FR a1; FR a2; FR a3], [FR res] ->
-      fprintf oc "	fsel	%a, %a, %a, %a\n" freg res freg a1 freg a2 freg a3
-  | "__builtin_fcti", [FR a1], [IR res] ->
-      fprintf oc "	fctiw	%a, %a\n" freg FPR13 freg a1;
-      fprintf oc "	stfdu	%a, -8(%a)\n" freg FPR13 ireg GPR1;
-      cfi_adjust oc 8l;
-      fprintf oc "	lwz	%a, 4(%a)\n" ireg res ireg GPR1;
-      fprintf oc "	addi	%a, %a, 8\n" ireg GPR1 ireg GPR1;
-      cfi_adjust oc (-8l)
-  (* 64-bit integer arithmetic *)
-  | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
-      if rl = ah then begin
-        fprintf oc "	subfic	%a, %a, 0\n" ireg GPR0 ireg al;
-        fprintf oc "	subfze	%a, %a\n" ireg rh ireg ah;
-        fprintf oc "	mr	%a, %a\n" ireg rl ireg GPR0
-      end else begin
-        fprintf oc "	subfic	%a, %a, 0\n" ireg rl ireg al;
-        fprintf oc "	subfze	%a, %a\n" ireg rh ireg ah
-      end
-  | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
-      if rl = ah || rl = bh then begin
-        fprintf oc "	addc	%a, %a, %a\n" ireg GPR0 ireg al ireg bl;
-        fprintf oc "	adde	%a, %a, %a\n" ireg rh ireg ah ireg bh;
-        fprintf oc "	mr	%a, %a\n" ireg rl ireg GPR0
-      end else begin
-        fprintf oc "	addc	%a, %a, %a\n" ireg rl ireg al ireg bl;
-        fprintf oc "	adde	%a, %a, %a\n" ireg rh ireg ah ireg bh
-      end
-  | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
-      if rl = ah || rl = bh then begin
-        fprintf oc "	subfc	%a, %a, %a\n" ireg GPR0 ireg bl ireg al;
-        fprintf oc "	subfe	%a, %a, %a\n" ireg rh ireg bh ireg ah;
-        fprintf oc "	mr	%a, %a\n" ireg rl ireg GPR0
-      end else begin
-        fprintf oc "	subfc	%a, %a, %a\n" ireg rl ireg bl ireg al;
-        fprintf oc "	subfe	%a, %a, %a\n" ireg rh ireg bh ireg ah
-      end
-  | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
-      if rl = a || rl = b then begin
-        fprintf oc "	mullw	%a, %a, %a\n" ireg GPR0 ireg a ireg b;
-        fprintf oc "	mulhwu	%a, %a, %a\n" ireg rh ireg a ireg b;
-        fprintf oc "	mr	%a, %a\n" ireg rl ireg GPR0
-      end else begin
-        fprintf oc "	mullw	%a, %a, %a\n" ireg rl ireg a ireg b;
-        fprintf oc "	mulhwu	%a, %a, %a\n" ireg rh ireg a ireg b
-      end
-  (* Memory accesses *)
-  | "__builtin_read16_reversed", [IR a1], [IR res] ->
-      fprintf oc "	lhbrx	%a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1
-  | "__builtin_read32_reversed", [IR a1], [IR res] ->
-      fprintf oc "	lwbrx	%a, %a, %a\n" ireg res ireg_or_zero GPR0 ireg a1
-  | "__builtin_write16_reversed", [IR a1; IR a2], _ ->
-      fprintf oc "	sthbrx	%a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1
-  | "__builtin_write32_reversed", [IR a1; IR a2], _ ->
-      fprintf oc "	stwbrx	%a, %a, %a\n" ireg a2 ireg_or_zero GPR0 ireg a1
-  (* Synchronization *)
-  | "__builtin_eieio", [], _ ->
-      fprintf oc "	eieio\n"
-  | "__builtin_sync", [], _ ->
-      fprintf oc "	sync\n"
-  | "__builtin_isync", [], _ ->
-      fprintf oc "	isync\n"
-  | "__builtin_trap", [], _ ->
-      fprintf oc "	trap\n"
-  (* Vararg stuff *)
-  | "__builtin_va_start", [IR a], _ ->
-      print_builtin_va_start oc a
-  (* Catch-all *)
-  | _ ->
-      invalid_arg ("unrecognized builtin " ^ name)
-  end;
-  fprintf oc "%s end builtin %s\n" comment name
-
-(* Calls to variadic functions: condition bit 6 must be set
-   if at least one argument is a float; clear otherwise *)
-
-let set_cr6 oc sg =
-  if sg.sig_cc.cc_vararg then begin
-    if List.mem Tfloat sg.sig_args
-    then fprintf oc "	creqv	6, 6, 6\n"
-    else fprintf oc "	crxor	6, 6, 6\n"
-  end
-
 (* Determine if the displacement of a conditional branch fits the short form *)
 
 let short_cond_branch tbl pc lbl_dest =
@@ -681,6 +317,8 @@ let jumptables : (int * label list) list ref = ref []
 let print_instruction oc tbl pc fallthrough = function
   | Padd(r1, r2, r3) ->
       fprintf oc "	add	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+  | Paddc(r1, r2, r3) ->
+      fprintf oc "	addc	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Padde(r1, r2, r3) ->
       fprintf oc "	adde	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Paddi(r1, r2, c) ->
@@ -692,28 +330,7 @@ let print_instruction oc tbl pc fallthrough = function
   | Paddze(r1, r2) ->
       fprintf oc "	addze	%a, %a\n" ireg r1 ireg r2
   | Pallocframe(sz, ofs) ->
-      let sz = camlint_of_coqint sz
-      and ofs = camlint_of_coqint ofs in
-      assert (ofs = 0l);
-      let sz = 
-        if (!current_function_sig).sig_cc.cc_vararg
-        then Int32.add sz 96l
-        else sz in
-      let adj = Int32.neg sz in
-      if adj >= -0x8000l then
-        fprintf oc "	stwu	%a, %ld(%a)\n" ireg GPR1 adj ireg GPR1
-      else begin
-        fprintf oc "	addis	%a, 0, %ld\n" ireg GPR0 (Int32.shift_right_logical adj 16);
-        fprintf oc "	ori	%a, %a, %ld\n" ireg GPR0 ireg GPR0 (Int32.logand adj 0xFFFFl);
-        fprintf oc "	stwux	%a, %a, %a\n" ireg GPR1 ireg GPR1 ireg GPR0
-      end;
-      cfi_adjust oc sz;
-      if (!current_function_sig).sig_cc.cc_vararg then begin
-        fprintf oc "	mflr	%a\n" ireg GPR0;
-        fprintf oc "	bl	__compcert_va_saveregs\n";
-        fprintf oc "	mtlr	%a\n" ireg GPR0
-      end;
-      current_function_stacksize := sz
+      assert false
   | Pand_(r1, r2, r3) ->
       fprintf oc "	and.	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Pandc(r1, r2, r3) ->
@@ -725,11 +342,11 @@ let print_instruction oc tbl pc fallthrough = function
   | Pb lbl ->
       fprintf oc "	b	%a\n" label (transl_label lbl)
   | Pbctr sg ->
-      set_cr6 oc sg;
       fprintf oc "	bctr\n"
   | Pbctrl sg ->
-      set_cr6 oc sg;
       fprintf oc "	bctrl\n"
+  | Pbdnz lbl ->
+      fprintf oc "	bdnz	%a\n" label (transl_label lbl)
   | Pbf(bit, lbl) ->
       if !Clflags.option_faligncondbranchs > 0 then
         fprintf oc "	.balign	%d\n" !Clflags.option_faligncondbranchs;
@@ -742,10 +359,8 @@ let print_instruction oc tbl pc fallthrough = function
         fprintf oc "%a:\n" label next
       end
   | Pbl(s, sg) ->
-      set_cr6 oc sg;
       fprintf oc "	bl	%a\n" symbol s
   | Pbs(s, sg) ->
-      set_cr6 oc sg;
       fprintf oc "	b	%a\n" symbol s
   | Pblr ->
       fprintf oc "	blr\n"
@@ -781,12 +396,20 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "	cmpw	%a, %a, %a\n" creg 0 ireg r1 ireg r2
   | Pcmpwi(r1, c) ->
       fprintf oc "	cmpwi	%a, %a, %a\n" creg 0 ireg r1 constant c
+  | Pcntlz(r1, r2) ->
+      fprintf oc "	cntlz	%a, %a\n" ireg r1 ireg r2
+  | Pcreqv(c1, c2, c3) ->
+      fprintf oc "	creqv	%a, %a, %a\n" crbit c1 crbit c2 crbit c3
   | Pcror(c1, c2, c3) ->
       fprintf oc "	cror	%a, %a, %a\n" crbit c1 crbit c2 crbit c3
+  | Pcrxor(c1, c2, c3) ->
+      fprintf oc "	crxor	%a, %a, %a\n" crbit c1 crbit c2 crbit c3
   | Pdivw(r1, r2, r3) ->
       fprintf oc "	divw	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Pdivwu(r1, r2, r3) ->
       fprintf oc "	divwu	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+  | Peieio ->
+      fprintf oc "	eieio\n"
   | Peqv(r1, r2, r3) ->
       fprintf oc "	eqv	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Pextsb(r1, r2) ->
@@ -794,16 +417,7 @@ let print_instruction oc tbl pc fallthrough = function
   | Pextsh(r1, r2) ->
       fprintf oc "	extsh	%a, %a\n" ireg r1 ireg r2
   | Pfreeframe(sz, ofs) ->
-      let sz = camlint_of_coqint sz
-      and ofs = camlint_of_coqint ofs in
-      let sz = 
-        if (!current_function_sig).sig_cc.cc_vararg
-        then Int32.add sz 96l
-        else sz in
-      if sz < 0x8000l then
-        fprintf oc "	addi	%a, %a, %ld\n" ireg GPR1 ireg GPR1 sz
-      else
-        fprintf oc "	lwz	%a, %ld(%a)\n" ireg GPR1 ofs ireg GPR1
+      assert false
   | Pfabs(r1, r2) | Pfabss(r1, r2) ->
       fprintf oc "	fabs	%a, %a\n" freg r1 freg r2
   | Pfadd(r1, r2, r3) ->
@@ -813,28 +427,17 @@ let print_instruction oc tbl pc fallthrough = function
   | Pfcmpu(r1, r2) ->
       fprintf oc "	fcmpu	%a, %a, %a\n" creg 0 freg r1 freg r2
   | Pfcti(r1, r2) ->
-      fprintf oc "%s begin pseudoinstr %a = fcti(%a)\n" comment ireg r1 freg r2;
-      fprintf oc "	fctiwz	%a, %a\n" freg FPR13 freg r2;
-      fprintf oc "	stfdu	%a, -8(%a)\n" freg FPR13 ireg GPR1;
-      cfi_adjust oc 8l;
-      fprintf oc "	lwz	%a, 4(%a)\n" ireg r1 ireg GPR1;
-      fprintf oc "	addi	%a, %a, 8\n" ireg GPR1 ireg GPR1;
-      cfi_adjust oc (-8l);
-      fprintf oc "%s end pseudoinstr fcti\n" comment
+      assert false
+  | Pfctiw(r1, r2) ->
+      fprintf oc "	fctiw	%a, %a\n" freg r1 freg r2
+  | Pfctiwz(r1, r2) ->
+      fprintf oc "	fctiwz	%a, %a\n" freg r1 freg r2
   | Pfdiv(r1, r2, r3) ->
       fprintf oc "	fdiv	%a, %a, %a\n" freg r1 freg r2 freg r3
   | Pfdivs(r1, r2, r3) ->
       fprintf oc "	fdivs	%a, %a, %a\n" freg r1 freg r2 freg r3
   | Pfmake(rd, r1, r2) ->
-      fprintf oc "%s begin pseudoinstr %a = fmake(%a, %a)\n"
-              comment freg rd ireg r1 ireg r2;
-      fprintf oc "	stwu	%a, -8(%a)\n" ireg r1 ireg GPR1;
-      cfi_adjust oc 8l;
-      fprintf oc "	stw	%a, 4(%a)\n" ireg r2 ireg GPR1;
-      fprintf oc "	lfd	%a, 0(%a)\n" freg rd ireg GPR1;
-      fprintf oc "	addi	%a, %a, 8\n" ireg GPR1 ireg GPR1;
-      cfi_adjust oc (-8l);
-      fprintf oc "%s end pseudoinstr fmake\n" comment
+      assert false
   | Pfmr(r1, r2) ->
       fprintf oc "	fmr	%a, %a\n" freg r1 freg r2
   | Pfmul(r1, r2, r3) ->
@@ -846,12 +449,29 @@ let print_instruction oc tbl pc fallthrough = function
   | Pfrsp(r1, r2) ->
       fprintf oc "	frsp	%a, %a\n" freg r1 freg r2
   | Pfxdp(r1, r2) ->
-      if r1 <> r2 then
-        fprintf oc "	fmr	%a, %a\n" freg r1 freg r2
+      assert false
   | Pfsub(r1, r2, r3) ->
       fprintf oc "	fsub	%a, %a, %a\n" freg r1 freg r2 freg r3
   | Pfsubs(r1, r2, r3) ->
       fprintf oc "	fsubs	%a, %a, %a\n" freg r1 freg r2 freg r3
+  | Pfmadd(r1, r2, r3, r4) ->
+      fprintf oc "	fmadd	%a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4
+  | Pfmsub(r1, r2, r3, r4) ->
+      fprintf oc "	fmsub	%a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4
+  | Pfnmadd(r1, r2, r3, r4) ->
+      fprintf oc "	fnmadd	%a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4
+  | Pfnmsub(r1, r2, r3, r4) ->
+      fprintf oc "	fnmsub	%a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4
+  | Pfsqrt(r1, r2) ->
+      fprintf oc "	fsqrt	%a, %a\n" freg r1 freg r2    
+  | Pfrsqrte(r1, r2) ->
+      fprintf oc "	frsqrte	%a, %a\n" freg r1 freg r2    
+  | Pfres(r1, r2) ->
+      fprintf oc "	fres	%a, %a\n" freg r1 freg r2    
+  | Pfsel(r1, r2, r3, r4) ->
+      fprintf oc "	fsel	%a, %a, %a, %a\n" freg r1 freg r2 freg r3 freg r4
+  | Pisync ->
+      fprintf oc "	isync\n"
   | Plbz(r1, c, r2) ->
       fprintf oc "	lbz	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Plbzx(r1, r2, r3) ->
@@ -860,16 +480,6 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "	lfd	%a, %a(%a)\n" freg r1 constant c ireg r2
   | Plfdx(r1, r2, r3) | Plfdx_a(r1, r2, r3) ->
       fprintf oc "	lfdx	%a, %a, %a\n" freg r1 ireg r2 ireg r3
-  | Plfi(r1, c) ->
-      let lbl = new_label() in
-      fprintf oc "	addis	%a, 0, %a\n" ireg GPR12 label_high lbl;
-      fprintf oc "	lfd	%a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat c);
-      float_literals := (lbl, camlint64_of_coqint (Floats.Float.to_bits c)) :: !float_literals;
-  | Plfis(r1, c) ->
-      let lbl = new_label() in
-      fprintf oc "	addis	%a, 0, %a\n" ireg GPR12 label_high lbl;
-      fprintf oc "	lfs	%a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat32 c);
-      float32_literals := (lbl, camlint_of_coqint (Floats.Float32.to_bits c)) :: !float32_literals;
   | Plfs(r1, c, r2) ->
       fprintf oc "	lfs	%a, %a(%a)\n" freg r1 constant c ireg r2
   | Plfsx(r1, r2, r3) ->
@@ -878,20 +488,36 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "	lha	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Plhax(r1, r2, r3) ->
       fprintf oc "	lhax	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+  | Plhbrx(r1, r2, r3) ->
+      fprintf oc "	lhbrx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Plhz(r1, c, r2) ->
       fprintf oc "	lhz	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Plhzx(r1, r2, r3) ->
       fprintf oc "	lhzx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+  | Plfi(r1, c) ->
+      let lbl = new_label() in
+      fprintf oc "	addis	%a, 0, %a\n" ireg GPR12 label_high lbl;
+      fprintf oc "	lfd	%a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat c);
+      float_literals := (lbl, camlint64_of_coqint (Floats.Float.to_bits c)) :: !float_literals;
+  | Plfis(r1, c) ->
+      let lbl = new_label() in
+      fprintf oc "	addis	%a, 0, %a\n" ireg GPR12 label_high lbl;
+      fprintf oc "	lfs	%a, %a(%a) %s %.18g\n" freg r1 label_low lbl ireg GPR12 comment (camlfloat_of_coqfloat32 c);
+      float32_literals := (lbl, camlint_of_coqint (Floats.Float32.to_bits c)) :: !float32_literals;
+  | Plwbrx(r1, r2, r3) ->
+      fprintf oc "	lwbrx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Plwz(r1, c, r2) | Plwz_a(r1, c, r2) ->
       fprintf oc "	lwz	%a, %a(%a)\n" ireg r1 constant c ireg r2
+  | Plwzu(r1, c, r2) ->
+      fprintf oc "	lwzu	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Plwzx(r1, r2, r3) | Plwzx_a(r1, r2, r3) ->
       fprintf oc "	lwzx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+  | Pmfcr(r1) ->
+      fprintf oc "	mfcr	%a\n" ireg r1
   | Pmfcrbit(r1, bit) ->
-      fprintf oc "	mfcr	%a\n" ireg r1;
-      fprintf oc "	rlwinm	%a, %a, %d, 31, 31\n" ireg r1  ireg r1 (1 + num_crbit bit)
+      assert false
   | Pmflr(r1) ->
-      fprintf oc "	mflr	%a\n" ireg r1;
-      cfi_rel_offset oc "lr" 8l
+      fprintf oc "	mflr	%a\n" ireg r1
   | Pmr(r1, r2) ->
       fprintf oc "	mr	%a, %a\n" ireg r1 ireg r2
   | Pmtctr(r1) ->
@@ -942,6 +568,8 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "	stbx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Pstfd(r1, c, r2) | Pstfd_a(r1, c, r2) ->
       fprintf oc "	stfd	%a, %a(%a)\n" freg r1 constant c ireg r2
+  | Pstfdu(r1, c, r2) ->
+      fprintf oc "	stfdu	%a, %a(%a)\n" freg r1 constant c ireg r2
   | Pstfdx(r1, r2, r3) | Pstfdx_a(r1, r2, r3) ->
       fprintf oc "	stfdx	%a, %a, %a\n" freg r1 ireg r2 ireg r3
   | Pstfs(r1, c, r2) ->
@@ -952,16 +580,30 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "	sth	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Psthx(r1, r2, r3) ->
       fprintf oc "	sthx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+  | Psthbrx(r1, r2, r3) ->
+      fprintf oc "	sthbrx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Pstw(r1, c, r2) | Pstw_a(r1, c, r2) ->
       fprintf oc "	stw	%a, %a(%a)\n" ireg r1 constant c ireg r2
+  | Pstwu(r1, c, r2) ->
+      fprintf oc "	stwu	%a, %a(%a)\n" ireg r1 constant c ireg r2
   | Pstwx(r1, r2, r3) | Pstwx_a(r1, r2, r3) ->
       fprintf oc "	stwx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+  | Pstwxu(r1, r2, r3) ->
+      fprintf oc "	stwxu	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+  | Pstwbrx(r1, r2, r3) ->
+      fprintf oc "	stwbrx	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Psubfc(r1, r2, r3) ->
       fprintf oc "	subfc	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Psubfe(r1, r2, r3) ->
       fprintf oc "	subfe	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
+  | Psubfze(r1, r2) ->
+      fprintf oc "	subfze	%a, %a\n" ireg r1 ireg r2
   | Psubfic(r1, r2, c) ->
       fprintf oc "	subfic	%a, %a, %a\n" ireg r1 ireg r2 constant c
+  | Psync ->
+      fprintf oc "	sync\n"
+  | Ptrap ->
+      fprintf oc "	trap\n"
   | Pxor(r1, r2, r3) ->
       fprintf oc "	xor	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Pxori(r1, r2, c) ->
@@ -974,21 +616,6 @@ let print_instruction oc tbl pc fallthrough = function
       fprintf oc "%a:\n" label (transl_label lbl)
   | Pbuiltin(ef, args, res) ->
       begin match ef with
-      | EF_builtin(name, sg) ->
-          print_builtin_inline oc (extern_atom name) args res
-      | EF_vload chunk ->
-          print_builtin_vload oc chunk args res
-      | EF_vstore chunk ->
-          print_builtin_vstore oc chunk args
-      | EF_vload_global(chunk, id, ofs) ->
-          print_builtin_vload_global oc chunk id ofs args res
-      | EF_vstore_global(chunk, id, ofs) ->
-          print_builtin_vstore_global oc chunk id ofs args
-      | EF_memcpy(sz, al) ->
-          print_builtin_memcpy oc (Int32.to_int (camlint_of_coqint sz))
-                                  (Int32.to_int (camlint_of_coqint al)) args
-      | EF_annot_val(txt, targ) ->
-          print_annot_val oc (extern_atom txt) args res
       | EF_inline_asm txt ->
           fprintf oc "%s begin inline assembly\n" comment;
           fprintf oc "	%s\n" (extern_atom txt);
@@ -1003,6 +630,10 @@ let print_instruction oc tbl pc fallthrough = function
       | _ ->
           assert false
       end
+  | Pcfi_adjust n ->
+      cfi_adjust oc (camlint_of_coqint n)
+  | Pcfi_rel_offset n ->
+      cfi_rel_offset oc "lr" (camlint_of_coqint n)
 
 (* Determine if an instruction falls through *)
 
@@ -1017,44 +648,15 @@ let instr_fall_through = function
    PowerPC instructions.  We can over-approximate. *)
 
 let instr_size = function
-  | Pallocframe(sz, ofs) -> 3
   | Pbf(bit, lbl) -> 2
   | Pbt(bit, lbl) -> 2
-  | Pbtbl(r, tbl) -> 4
-  | Pfcti(r1, r2) -> 4
-  | Pfmake(rd, r1, r2) -> 4
+  | Pbtbl(r, tbl) -> 5
   | Plfi(r1, c) -> 2
-  | Pmfcrbit(r1, bit) -> 2
-  | Pstfs(r1, c, r2) -> 2
-  | Pstfsx(r1, r2, r3) -> 2
+  | Plfis(r1, c) -> 2
   | Plabel lbl -> 0
-  | Pbuiltin(ef, args, res) ->
-      begin match ef with
-      | EF_builtin(name, sg) ->
-          begin match extern_atom name with
-          | "__builtin_bswap" | "__builtin_bswap32" | "__builtin_bswap16" -> 3
-          | "__builtin_fcti" -> 4
-          | _ -> 1
-          end
-      | EF_vload chunk ->
-          if chunk = Mint8signed then 2 else 1
-      | EF_vstore chunk ->
-          if chunk = Mfloat32 then 2 else 1
-      | EF_vload_global(chunk, id, ofs) ->
-          if chunk = Mint8signed then 3 else 2
-      | EF_vstore_global(chunk, id, ofs) ->
-          if chunk = Mfloat32 then 3 else 2
-      | EF_memcpy(sz, al) ->
-          let sz = Int32.to_int (camlint_of_coqint sz) in
-          if sz <= 64 then (sz / 4) * 2 + 6 else 11
-      | EF_annot_val(txt, targ) ->
-          0
-      | EF_inline_asm txt ->
-          8 (* reasonable? default *)
-      | _ ->
-          assert false
-      end
+  | Pbuiltin(ef, args, res) -> 0
   | Pannot(ef, args) -> 0
+  | Pcfi_adjust _ | Pcfi_rel_offset _ -> 0
   | _ -> 1
 
 (* Build a table label -> estimated position in generated code.
@@ -1094,7 +696,6 @@ let print_function oc name fn =
   float_literals := [];
   float32_literals := [];
   jumptables := [];
-  current_function_sig := fn.fn_sig;
   let (text, lit, jmptbl) =
     match C2C.atom_sections name with
     | [t;l;j] -> (t, l, j)
diff --git a/test/Makefile b/test/Makefile
index 5771523..ab44be5 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -11,3 +11,6 @@ bench:
 
 clean:
 	for i in $(DIRS); do $(MAKE) -C $$i clean; done
+
+ccheck:
+	for i in $(DIRS); do $(MAKE) -C $$i ccheck; done
diff --git a/test/c/Makefile b/test/c/Makefile
index 1486666..a81a9d5 100644
--- a/test/c/Makefile
+++ b/test/c/Makefile
@@ -2,6 +2,10 @@ include ../../Makefile.config
 
 CCOMP=../../ccomp
 CCOMPFLAGS=-stdlib ../../runtime -dc -dclight -dasm
+ifeq ($(CCHECKLINK),true)
+CCHECK=../../cchecklink
+CCOMPFLAGS+= -sdump
+endif
 
 CFLAGS=-O1 -Wall
 
@@ -38,6 +42,12 @@ test:
 	   fi; \
          done
 
+ccheck:
+	@for i in $(PROGS); do \
+           echo "---- $$i"; \
+           $(CCHECK) -exe $$i.compcert $$i.sdump; \
+         done
+
 test_gcc:
 	@for i in $(PROGS); do \
 	   if ./$$i.gcc | cmp -s - Results/$$i; \
@@ -69,4 +79,4 @@ cminor_roundtrip:
 
 clean:
 	rm -f *.compcert *.gcc
-	rm -f *.compcert.c *.light.c *.parsed.c *.s *.o *~ 
+	rm -f *.compcert.c *.light.c *.parsed.c *.s *.o *.sdump *~ 
diff --git a/test/compression/Makefile b/test/compression/Makefile
index 8db55dd..e35e1a1 100644
--- a/test/compression/Makefile
+++ b/test/compression/Makefile
@@ -1,7 +1,13 @@
+include ../../Makefile.config
+
 CC=../../ccomp 
 CFLAGS=-U__GNUC__ -stdlib ../../runtime -dclight -dasm
 LIBS=
 TIME=xtime -o /dev/null -mintime 1.0
+ifeq ($(CCHECKLINK),true)
+CCHECK=../../cchecklink
+CFLAGS+= -sdump
+endif
 
 EXE=arcode lzw lzss
 
@@ -51,10 +57,18 @@ bench:
          done
 	rm -f $(TESTCOMPR)
 
+ccheck:
+	@echo "---- arcode"
+	@$(CCHECK) -exe arcode $(ARCODE_OBJS:.o=.sdump)
+	@echo "---- lzw"
+	@$(CCHECK) -exe lzw $(LZW_OBJS:.o=.sdump)
+	@echo "---- lzss"
+	@$(CCHECK) -exe lzss $(LZSS_OBJS:.o=.sdump)
+
 include .depend
 
 clean:
-	rm -f *.o *.light.c *.s $(EXE)
+	rm -f *.o *.light.c *.s *.sdump $(EXE)
 
 depend:
 	gcc -MM *.c > .depend
diff --git a/test/raytracer/Makefile b/test/raytracer/Makefile
index a4b8894..c6eb190 100644
--- a/test/raytracer/Makefile
+++ b/test/raytracer/Makefile
@@ -4,6 +4,10 @@ CC=../../ccomp
 CFLAGS=-stdlib ../../runtime -dparse -dclight -dasm -fstruct-return
 LIBS=$(LIBMATH)
 TIME=xtime -mintime 2.0
+ifeq ($(CCHECKLINK),true)
+CCHECK=../../cchecklink
+CFLAGS+= -sdump
+endif
 
 OBJS=memory.o gmllexer.o gmlparser.o eval.o \
   arrays.o vector.o matrix.o object.o intersect.o surface.o light.o \
@@ -15,7 +19,7 @@ render: $(OBJS)
 	$(CC) $(CFLAGS) -o render $(OBJS) $(LIBS)
 
 clean:
-	rm -f *.o *.parsed.c *.light.c *.s *.ppm render
+	rm -f *.o *.parsed.c *.light.c *.s *.sdump *.ppm render
 
 include .depend
 
@@ -55,3 +59,8 @@ test:
 
 bench:
 	@echo -n "raytracer: "; $(TIME) ./render < kal.gml
+
+ccheck:
+	@echo "---- render"
+	@$(CCHECK) -exe render *.sdump
+
diff --git a/test/regression/Makefile b/test/regression/Makefile
index 189dbd8..3583676 100644
--- a/test/regression/Makefile
+++ b/test/regression/Makefile
@@ -2,6 +2,10 @@ include ../../Makefile.config
 
 CCOMP=../../ccomp
 CCOMPFLAGS=-stdlib ../../runtime -dparse -dc -dclight -dasm -fall
+ifeq ($(CCHECKLINK),true)
+CCHECK=../../cchecklink
+CCOMPFLAGS+= -sdump
+endif
 
 LIBS=$(LIBMATH)
 
@@ -48,7 +52,7 @@ all_s: $(TESTS:%=%.s) $(TESTS_COMP:%=%.s) $(TESTS_DIFF:%=%.s) $(EXTRAS:%=%.s)
 
 clean:
 	rm -f *.compcert
-	rm -f *.parsed.c *.compcert.c *.light.c *.s *.o *~ 
+	rm -f *.parsed.c *.compcert.c *.light.c *.s *.o *.sdump *~ 
 
 test:
 	@for i in $(TESTS) $(TESTS_COMP); do \
@@ -81,3 +85,10 @@ test:
          done
 
 bench:
+
+ccheck:
+	@for i in $(TESTS) $(TESTS_COMP); do \
+           echo "---- $$i"; \
+           $(CCHECK) -exe $$i.compcert $$i.sdump; \
+         done
+
diff --git a/test/spass/Makefile b/test/spass/Makefile
index 6797475..6a4cd59 100644
--- a/test/spass/Makefile
+++ b/test/spass/Makefile
@@ -2,6 +2,10 @@ include ../../Makefile.config
 
 CC=../../ccomp 
 CFLAGS=-stdlib ../../runtime -dparse -dclight -dasm -fstruct-return
+ifeq ($(CCHECKLINK),true)
+CCHECK=../../cchecklink
+CFLAGS+= -sdump
+endif
 
 SRCS=analyze.c clause.c clock.c closure.c cnf.c component.c                 \
   condensing.c context.c defs.c dfgparser.c dfgscanner.c doc-proof.c        \
@@ -19,7 +23,7 @@ spass: $(SRCS:.c=.o)
 
 clean:
 	rm -f spass
-	rm -f *.o *.s *.parsed.c *.light.c
+	rm -f *.o *.s *.parsed.c *.light.c *.sdump
 
 test:
 	./spass small_problem.dfg | grep 'Proof found'
@@ -30,6 +34,10 @@ TIME=xtime -o /dev/null     # Xavier's hack
 bench:
 	@echo -n "spass: "; $(TIME) ./spass problem.dfg
 
+ccheck:
+	@echo "---- spass"
+	@$(CCHECK) -exe spass *.sdump
+
 depend:
 	gcc -MM $(SRCS) > .depend
 
-- 
cgit v1.2.3