ARM codegen ported to new ABI + VFD floats

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1692 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
author: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2011-07-30 09:54:35 +0000
committer: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2011-07-30 09:54:35 +0000
commit: 1fe68ad575178f7d8a775906947d2fed94d40976 (patch)
tree: 3bb4b2d1b101f66bcb6f84bd36ce8e334082f7ea
parent: 9b45e1d24a337e3f0047bf5056315169d4203b49 (diff)
16 files changed, 560 insertions, 629 deletions
diff --git a/Makefile b/Makefile
index a0f6d02..fc2158e 100644
--- a/Makefile
+++ b/Makefile
@@ -145,14 +145,14 @@ latexdoc:
 	@$(COQC) -dump-glob doc/$(*F).glob $*.v
 
 driver/Configuration.ml: Makefile.config
-	(echo 'let stdlib_path = "$(LIBDIR)"'; \
-         echo 'let prepro = "$(CPREPRO)"'; \
-         echo 'let asm = "$(CASM)"'; \
-         echo 'let linker = "$(CLINKER)"'; \
-         echo 'let arch = "$(ARCH)"'; \
-         echo 'let variant = "$(VARIANT)"'; \
-         echo 'let system = "$(SYSTEM)"'; \
-         echo 'let need_stdlib_wrapper = $(NEED_STDLIB_WRAPPER)') \
+	(echo let stdlib_path = "\"$(LIBDIR)\""; \
+         echo let prepro = "\"$(CPREPRO)\""; \
+         echo let asm = "\"$(CASM)\""; \
+         echo let linker = "\"$(CLINKER)\""; \
+         echo let arch = "\"$(ARCH)\""; \
+         echo let variant = "\"$(VARIANT)\""; \
+         echo let system = "\"$(SYSTEM)\""; \
+         echo let need_stdlib_wrapper = $(NEED_STDLIB_WRAPPER)) \
         > driver/Configuration.ml
 
 depend: $(FILES)
diff --git a/arm/Asm.v b/arm/Asm.v
index b7175b1..bc69930 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -38,7 +38,9 @@ Inductive ireg: Type :=
 
 Inductive freg: Type :=
   | FR0: freg  | FR1: freg  | FR2: freg  | FR3: freg
-  | FR4: freg  | FR5: freg  | FR6: freg  | FR7: freg.
+  | FR4: freg  | FR5: freg  | FR6: freg  | FR7: freg
+  | FR8: freg  | FR9: freg  | FR10: freg  | FR11: freg
+  | FR12: freg  | FR13: freg  | FR14: freg  | FR15: freg.
 
 Lemma ireg_eq: forall (x y: ireg), {x=y} + {x<>y}.
 Proof. decide equality. Defined.
@@ -69,7 +71,7 @@ Proof. decide equality. Defined.
 
 Inductive preg: Type :=
   | IR: ireg -> preg                    (**r integer registers *)
-  | FR: freg -> preg                    (**r float registers *)
+  | FR: freg -> preg                    (**r double-precision VFP float registers *)
   | CR: crbit -> preg                   (**r bits in the condition register *)
   | PC: preg.                           (**r program counter *)
 
@@ -122,10 +124,10 @@ Inductive instruction : Type :=
   | Pand: ireg -> ireg -> shift_op -> instruction (**r bitwise and *)
   | Pb: label -> instruction                      (**r branch to label *)
   | Pbc: crbit -> label -> instruction            (**r conditional branch to label *)
-  | Pbsymb: ident -> instruction                  (**r branch to symbol *)
-  | Pbreg: ireg -> instruction                    (**r computed branch *)
-  | Pblsymb: ident -> instruction                 (**r branch and link to symbol *)
-  | Pblreg: ireg -> instruction                   (**r computed branch and link *)
+  | Pbsymb: ident -> signature -> instruction                  (**r branch to symbol *)
+  | Pbreg: ireg -> signature -> instruction                    (**r computed branch *)
+  | Pblsymb: ident -> signature -> instruction                 (**r branch and link to symbol *)
+  | Pblreg: ireg -> signature -> instruction                   (**r computed branch and link *)
   | Pbic: ireg -> ireg -> shift_op -> instruction (**r bitwise bit-clear *)
   | Pcmp: ireg -> shift_op -> instruction         (**r integer comparison *)
   | Peor: ireg -> ireg -> shift_op -> instruction (**r bitwise exclusive or *)
@@ -146,23 +148,25 @@ Inductive instruction : Type :=
   | Psdiv: ireg -> ireg -> ireg -> instruction      (**r signed division *)
   | Psub: ireg -> ireg -> shift_op -> instruction  (**r integer subtraction *)
   | Pudiv: ireg -> ireg -> ireg -> instruction      (**r unsigned division *)
-  (* Floating-point coprocessor instructions *)
-  | Pabsd: freg -> freg -> instruction             (**r float absolute value *)
-  | Padfd: freg -> freg -> freg -> instruction     (**r float addition *)
-  | Pcmf: freg -> freg -> instruction              (**r float comparison *)
-  | Pdvfd: freg -> freg -> freg -> instruction     (**r float division *)
-  | Pfixz: ireg -> freg -> instruction             (**r float to signed int *)
-  | Pfltd: freg -> ireg -> instruction             (**r signed int to float *)
-  | Pldfd: freg -> ireg -> int -> instruction      (**r float64 load *)
-  | Pldfs: freg -> ireg -> int -> instruction      (**r float32 load *)
-  | Plifd: freg -> float -> instruction            (**r load float constant *)
-  | Pmnfd: freg -> freg -> instruction             (**r float opposite *)
-  | Pmvfd: freg -> freg -> instruction             (**r float move *)
-  | Pmvfs: freg -> freg -> instruction             (**r float move single precision *)
-  | Pmufd: freg -> freg -> freg -> instruction     (**r float multiplication *)
-  | Pstfd: freg -> ireg -> int -> instruction      (**r float64 store *)
-  | Pstfs: freg -> ireg -> int -> instruction      (**r float32 store *)
-  | Psufd: freg -> freg -> freg -> instruction     (**r float subtraction *)
+  (* Floating-point coprocessor instructions (VFP double scalar operations) *)
+  | Pfcpyd: freg -> freg -> instruction             (**r float move *)
+  | Pfabsd: freg -> freg -> instruction             (**r float absolute value *)
+  | Pfnegd: freg -> freg -> instruction             (**r float opposite *)
+  | Pfaddd: freg -> freg -> freg -> instruction     (**r float addition *)
+  | Pfdivd: freg -> freg -> freg -> instruction     (**r float division *)
+  | Pfmuld: freg -> freg -> freg -> instruction     (**r float multiplication *)
+  | Pfsubd: freg -> freg -> freg -> instruction     (**r float subtraction *)
+  | Pflid: freg -> float -> instruction             (**r load float constant *)
+  | Pfcmpd: freg -> freg -> instruction             (**r float comparison *)
+  | Pfsitod: freg -> ireg -> instruction            (**r signed int to float *)
+  | Pfuitod: freg -> ireg -> instruction            (**r unsigned int to float *)
+  | Pftosizd: ireg -> freg -> instruction           (**r float to signed int *)
+  | Pftouizd: ireg -> freg -> instruction           (**r float to unsigned int *)
+  | Pfcvtsd: freg -> freg -> instruction            (**r round to singled precision *)
+  | Pfldd: freg -> ireg -> int -> instruction       (**r float64 load *)
+  | Pflds: freg -> ireg -> int -> instruction       (**r float32 load *)
+  | Pfstd: freg -> ireg -> int -> instruction       (**r float64 store *)
+  | Pfsts: freg -> ireg -> int -> instruction       (**r float32 store *)
 
   (* Pseudo-instructions *)
   | Pallocframe: Z -> int -> instruction      (**r allocate new stack frame *)
@@ -225,7 +229,8 @@ lbl:    .word   symbol
 *)
 
 Definition code := list instruction.
-Definition fundef := AST.fundef code.
+Record function : Type := mkfunction { fn_sig: signature; fn_code: code }.
+Definition fundef := AST.fundef function.
 Definition program := AST.program fundef unit.
 
 (** * Operational semantics *)
@@ -409,13 +414,13 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       | Vint n => if Int.eq n Int.zero then OK (nextinstr rs) m else goto_label c lbl rs m
       | _ => Error
       end
-  | Pbsymb id =>                  
+  | Pbsymb id sg =>                  
       OK (rs#PC <- (symbol_offset id Int.zero)) m
-  | Pbreg r =>                    
+  | Pbreg r sg =>                    
       OK (rs#PC <- (rs#r)) m
-  | Pblsymb id =>                 
+  | Pblsymb id sg =>                 
       OK (rs#IR14 <- (Val.add rs#PC Vone) #PC <- (symbol_offset id Int.zero)) m
-  | Pblreg r =>                   
+  | Pblreg r sg =>                   
       OK (rs#IR14 <- (Val.add rs#PC Vone) #PC <- (rs#r)) m
   | Pbic r1 r2 so => 
       OK (nextinstr (rs#r1 <- (Val.and rs#r2 (Val.notint (eval_shift_op so rs))))) m
@@ -463,41 +468,45 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pudiv rd r1 r2 =>
       OK (nextinstr (rs#rd <- (Val.divu rs#r1 rs#r2))) m
   (* Floating-point coprocessor instructions *)
-  | Pabsd r1 r2 => 
+  | Pfcpyd r1 r2 =>
+      OK (nextinstr (rs#r1 <- (rs#r2))) m
+  | Pfabsd r1 r2 => 
       OK (nextinstr (rs#r1 <- (Val.absf rs#r2))) m
-  | Padfd r1 r2 r3 =>     
+  | Pfnegd r1 r2 => 
+      OK (nextinstr (rs#r1 <- (Val.negf rs#r2))) m
+  | Pfaddd r1 r2 r3 =>     
       OK (nextinstr (rs#r1 <- (Val.addf rs#r2 rs#r3))) m
-  | Pcmf r1 r2 =>              
-      OK (nextinstr (compare_float rs rs#r1 rs#r2)) m
-  | Pdvfd r1 r2 r3 =>     
+  | Pfdivd r1 r2 r3 =>     
       OK (nextinstr (rs#r1 <- (Val.divf rs#r2 rs#r3))) m
-  | Pfixz r1 r2 =>
-      OK (nextinstr (rs#r1 <- (Val.intoffloat rs#r2))) m
-  | Pfltd r1 r2 =>
+  | Pfmuld r1 r2 r3 =>     
+      OK (nextinstr (rs#r1 <- (Val.mulf rs#r2 rs#r3))) m
+  | Pfsubd r1 r2 r3 =>     
+      OK (nextinstr (rs#r1 <- (Val.subf rs#r2 rs#r3))) m
+  | Pflid r1 f =>            
+      OK (nextinstr (rs#r1 <- (Vfloat f))) m
+  | Pfcmpd r1 r2 =>              
+      OK (nextinstr (compare_float rs rs#r1 rs#r2)) m
+  | Pfsitod r1 r2 =>
       OK (nextinstr (rs#r1 <- (Val.floatofint rs#r2))) m
-  | Pldfd r1 r2 n =>
+  | Pfuitod r1 r2 =>
+      OK (nextinstr (rs#r1 <- (Val.floatofintu rs#r2))) m
+  | Pftosizd r1 r2 =>
+      OK (nextinstr (rs#r1 <- (Val.intoffloat rs#r2))) m
+  | Pftouizd r1 r2 =>
+      OK (nextinstr (rs#r1 <- (Val.intuoffloat rs#r2))) m
+  | Pfcvtsd r1 r2 =>
+      OK (nextinstr (rs#r1 <- (Val.singleoffloat rs#r2))) m
+  | Pfldd r1 r2 n =>
       exec_load Mfloat64 (Val.add rs#r2 (Vint n)) r1 rs m
-  | Pldfs r1 r2 n =>
+  | Pflds r1 r2 n =>
       exec_load Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m
-  | Plifd r1 f =>            
-      OK (nextinstr (rs#r1 <- (Vfloat f))) m
-  | Pmnfd r1 r2 => 
-      OK (nextinstr (rs#r1 <- (Val.negf rs#r2))) m
-  | Pmvfd r1 r2 =>
-      OK (nextinstr (rs#r1 <- (rs#r2))) m
-  | Pmvfs r1 r2 =>
-      OK (nextinstr (rs#r1 <- (Val.singleoffloat rs#r2))) m
-  | Pmufd r1 r2 r3 =>     
-      OK (nextinstr (rs#r1 <- (Val.mulf rs#r2 rs#r3))) m
-  | Pstfd r1 r2 n =>      
+  | Pfstd r1 r2 n =>      
       exec_store Mfloat64 (Val.add rs#r2 (Vint n)) r1 rs m
-  | Pstfs r1 r2 n =>
+  | Pfsts r1 r2 n =>
       match exec_store Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m with
-      | OK rs' m' => OK (rs'#FR3 <- Vundef) m'
+      | OK rs' m' => OK (rs'#FR7 <- Vundef) m'
       | Error => Error
       end
-  | Psufd r1 r2 r3 =>     
-      OK (nextinstr (rs#r1 <- (Val.subf rs#r2 rs#r3))) m
   (* Pseudo-instructions *)
   | Pallocframe sz pos =>             
       let (m1, stk) := Mem.alloc m 0 sz in
@@ -562,9 +571,11 @@ Definition ireg_of (r: mreg) : ireg :=
 
 Definition freg_of (r: mreg) : freg :=
   match r with
-  | F0 => FR0 | F1 => FR1
-  | F4 => FR4 | F5 => FR5 | F6 => FR6 | F7 => FR7
-  | FT1 => FR2 | FT2 => FR3
+  | F0 => FR0 | F1 => FR1 | F2 => FR2 | F3 => FR3
+  | F4 => FR4 | F5 => FR5
+  | F8 => FR8 | F9 => FR9 | F10 => FR10 | F11 => FR11
+  | F12 => FR12 | F13 => FR13 | F14 => FR14 | F15 => FR15
+  | FT1 => FR6 | FT2 => FR7
   | _ => FR0  (* should not happen *)
   end.
 
@@ -618,24 +629,24 @@ Inductive state: Type :=
 
 Inductive step: state -> trace -> state -> Prop :=
   | exec_step_internal:
-      forall b ofs c i rs m rs' m',
+      forall b ofs f i rs m rs' m',
       rs PC = Vptr b ofs ->
-      Genv.find_funct_ptr ge b = Some (Internal c) ->
-      find_instr (Int.unsigned ofs) c = Some i ->
-      exec_instr c i rs m = OK rs' m' ->
+      Genv.find_funct_ptr ge b = Some (Internal f) ->
+      find_instr (Int.unsigned ofs) (fn_code f) = Some i ->
+      exec_instr (fn_code f) i rs m = OK rs' m' ->
       step (State rs m) E0 (State rs' m')
   | exec_step_builtin:
-      forall b ofs c ef args res rs m t v m',
+      forall b ofs f ef args res rs m t v m',
       rs PC = Vptr b ofs ->
-      Genv.find_funct_ptr ge b = Some (Internal c) ->
-      find_instr (Int.unsigned ofs) c = Some (Pbuiltin ef args res) ->
+      Genv.find_funct_ptr ge b = Some (Internal f) ->
+      find_instr (Int.unsigned ofs) (fn_code f) = Some (Pbuiltin ef args res) ->
       external_call ef ge (map rs args) m t v m' ->
       step (State rs m) t (State (nextinstr(rs # res <- v)) m')
   | exec_step_annot:
-      forall b ofs c ef args rs m vargs t v m',
+      forall b ofs f ef args rs m vargs t v m',
       rs PC = Vptr b ofs ->
-      Genv.find_funct_ptr ge b = Some (Internal c) ->
-      find_instr (Int.unsigned ofs) c = Some (Pannot ef args) ->
+      Genv.find_funct_ptr ge b = Some (Internal f) ->
+      find_instr (Int.unsigned ofs) (fn_code f) = Some (Pannot ef args) ->
       annot_arguments rs m args vargs ->
       external_call ef ge vargs m t v m' ->
       step (State rs m) t (State (nextinstr rs) m')
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index 91a636b..4d36f91 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -152,9 +152,9 @@ Definition transl_cond
       else
         loadimm IR14 n (Pcmp (ireg_of a1) (SOreg IR14) :: k)
   | Ccompf cmp, a1 :: a2 :: nil =>
-      Pcmf (freg_of a1) (freg_of a2) :: k
+      Pfcmpd (freg_of a1) (freg_of a2) :: k
   | Cnotcompf cmp, a1 :: a2 :: nil =>
-      Pcmf (freg_of a1) (freg_of a2) :: k
+      Pfcmpd (freg_of a1) (freg_of a2) :: k
   | _, _ =>
      k (**r never happens for well-typed code *)
   end.
@@ -220,12 +220,12 @@ Definition transl_op
   | Omove, a1 :: nil =>
       match mreg_type a1 with
       | Tint => Pmov (ireg_of r) (SOreg (ireg_of a1)) :: k
-      | Tfloat => Pmvfd (freg_of r) (freg_of a1) :: k
+      | Tfloat => Pfcpyd (freg_of r) (freg_of a1) :: k
       end
   | Ointconst n, nil =>
       loadimm (ireg_of r) n k
   | Ofloatconst f, nil =>
-      Plifd (freg_of r) f :: k
+      Pflid (freg_of r) f :: k
   | Oaddrsymbol s ofs, nil =>
       Ploadsymbol (ireg_of r) s ofs :: k
   | Oaddrstack n, nil =>
@@ -304,23 +304,27 @@ Definition transl_op
          (Pmovc CRge IR14 (SOreg (ireg_of a1)) ::
           Pmov (ireg_of r) (SOasrimm IR14 n) :: k)
   | Onegf, a1 :: nil =>
-      Pmnfd (freg_of r) (freg_of a1) :: k
+      Pfnegd (freg_of r) (freg_of a1) :: k
   | Oabsf, a1 :: nil =>
-      Pabsd (freg_of r) (freg_of a1) :: k
+      Pfabsd (freg_of r) (freg_of a1) :: k
   | Oaddf, a1 :: a2 :: nil =>
-      Padfd (freg_of r) (freg_of a1) (freg_of a2) :: k
+      Pfaddd (freg_of r) (freg_of a1) (freg_of a2) :: k
   | Osubf, a1 :: a2 :: nil =>
-      Psufd (freg_of r) (freg_of a1) (freg_of a2) :: k
+      Pfsubd (freg_of r) (freg_of a1) (freg_of a2) :: k
   | Omulf, a1 :: a2 :: nil =>
-      Pmufd (freg_of r) (freg_of a1) (freg_of a2) :: k
+      Pfmuld (freg_of r) (freg_of a1) (freg_of a2) :: k
   | Odivf, a1 :: a2 :: nil =>
-      Pdvfd (freg_of r) (freg_of a1) (freg_of a2) :: k
+      Pfdivd (freg_of r) (freg_of a1) (freg_of a2) :: k
   | Osingleoffloat, a1 :: nil =>
-      Pmvfs (freg_of r) (freg_of a1) :: k
+      Pfcvtsd (freg_of r) (freg_of a1) :: k
   | Ointoffloat, a1 :: nil =>
-      Pfixz (ireg_of r) (freg_of a1) :: k
+      Pftosizd (ireg_of r) (freg_of a1) :: k
+  | Ointuoffloat, a1 :: nil =>
+      Pftouizd (ireg_of r) (freg_of a1) :: k
   | Ofloatofint, a1 :: nil =>
-      Pfltd (freg_of r) (ireg_of a1) :: k
+      Pfsitod (freg_of r) (ireg_of a1) :: k
+  | Ofloatofintu, a1 :: nil =>
+      Pfuitod (freg_of r) (ireg_of a1) :: k
   | Ocmp cmp, _ =>
       transl_cond cmp args
         (Pmov (ireg_of r) (SOimm Int.zero) ::
@@ -405,10 +409,10 @@ Definition loadind_int (base: ireg) (ofs: int) (dst: ireg) (k: code) :=
 
 Definition loadind_float (base: ireg) (ofs: int) (dst: freg) (k: code) :=
   if is_immed_mem_float ofs then
-    Pldfd dst base ofs :: k
+    Pfldd dst base ofs :: k
   else
     addimm IR14 base ofs
-      (Pldfd dst IR14 Int.zero :: k).
+      (Pfldd dst IR14 Int.zero :: k).
 
 Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
   match ty with
@@ -425,10 +429,10 @@ Definition storeind_int (src: ireg) (base: ireg) (ofs: int) (k: code) :=
 
 Definition storeind_float (src: freg) (base: ireg) (ofs: int) (k: code) :=
   if is_immed_mem_float ofs then
-    Pstfd src base ofs :: k
+    Pfstd src base ofs :: k
   else
     addimm IR14 base ofs
-      (Pstfd src IR14 Int.zero :: k).
+      (Pfstd src IR14 Int.zero :: k).
 
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
   match ty with
@@ -461,7 +465,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) (k: code) :=
       | Mint8signed =>
           transl_load_store_int Pldrsb is_immed_mem_small dst addr args k
       | Mint8unsigned =>
-          transl_load_store_int Pldrb is_immed_mem_small dst addr args k
+          transl_load_store_int Pldrb is_immed_mem_word dst addr args k
       | Mint16signed =>
           transl_load_store_int Pldrsh is_immed_mem_small dst addr args k
       | Mint16unsigned =>
@@ -469,16 +473,16 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) (k: code) :=
       | Mint32 =>
           transl_load_store_int Pldr is_immed_mem_word dst addr args k
       | Mfloat32 =>
-          transl_load_store_float Pldfs is_immed_mem_float dst addr args k
+          transl_load_store_float Pflds is_immed_mem_float dst addr args k
       | Mfloat64 =>
-          transl_load_store_float Pldfd is_immed_mem_float dst addr args k
+          transl_load_store_float Pfldd is_immed_mem_float dst addr args k
       end
   | Mstore chunk addr args src =>
       match chunk with
       | Mint8signed =>
           transl_load_store_int Pstrb is_immed_mem_small src addr args k
       | Mint8unsigned =>
-          transl_load_store_int Pstrb is_immed_mem_small src addr args k
+          transl_load_store_int Pstrb is_immed_mem_word src addr args k
       | Mint16signed =>
           transl_load_store_int Pstrh is_immed_mem_small src addr args k
       | Mint16unsigned =>
@@ -486,20 +490,20 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) (k: code) :=
       | Mint32 =>
           transl_load_store_int Pstr is_immed_mem_word src addr args k
       | Mfloat32 =>
-          transl_load_store_float Pstfs is_immed_mem_float src addr args k
+          transl_load_store_float Pfsts is_immed_mem_float src addr args k
       | Mfloat64 =>
-          transl_load_store_float Pstfd is_immed_mem_float src addr args k
+          transl_load_store_float Pfstd is_immed_mem_float src addr args k
       end
   | Mcall sig (inl r) =>
-      Pblreg (ireg_of r) :: k
+      Pblreg (ireg_of r) sig :: k
   | Mcall sig (inr symb) =>
-      Pblsymb symb :: k
+      Pblsymb symb sig :: k
   | Mtailcall sig (inl r) =>
       loadind_int IR13 f.(fn_retaddr_ofs) IR14
-        (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbreg (ireg_of r) :: k)
+        (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbreg (ireg_of r) sig :: k)
   | Mtailcall sig (inr symb) =>
       loadind_int IR13 f.(fn_retaddr_ofs) IR14
-        (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbsymb symb :: k)
+        (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbsymb symb sig :: k)
   | Mbuiltin ef args res =>
       Pbuiltin ef (map preg_of args) (preg_of res) :: k
   | Mannot ef args =>
@@ -515,7 +519,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) (k: code) :=
       Pbtbl IR14 tbl :: k
   | Mreturn =>
       loadind_int IR13 f.(fn_retaddr_ofs) IR14
-        (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbreg IR14 :: k)
+        (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbreg IR14 f.(Mach.fn_sig) :: k)
   end.
 
 Definition transl_code (f: Mach.function) (il: list Mach.instruction) :=
@@ -527,9 +531,10 @@ Definition transl_code (f: Mach.function) (il: list Mach.instruction) :=
   around, leading to incorrect executions. *)
 
 Definition transl_function (f: Mach.function) :=
-  Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
-  Pstr IR14 IR13 (SAimm f.(fn_retaddr_ofs)) ::
-  transl_code f f.(fn_code).
+  mkfunction f.(Mach.fn_sig)
+    (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
+     Pstr IR14 IR13 (SAimm f.(fn_retaddr_ofs)) ::
+     transl_code f f.(Mach.fn_code)).
 
 Fixpoint code_size (c: code) : Z :=
   match c with
@@ -539,11 +544,11 @@ Fixpoint code_size (c: code) : Z :=
 
 Open Local Scope string_scope.
 
-Definition transf_function (f: Mach.function) : res Asm.code :=
-  let c := transl_function f in
-  if zlt Int.max_unsigned (code_size c)
+Definition transf_function (f: Mach.function) : res Asm.function :=
+  let tf := transl_function f in
+  if zlt Int.max_unsigned (code_size tf.(fn_code))
   then Errors.Error (msg "code size exceeded")
-  else Errors.OK c.
+  else Errors.OK tf.
 
 Definition transf_fundef (f: Mach.fundef) : res Asm.fundef :=
   transf_partial_fundef transf_function f.
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index 0e2bdca..48f265b 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -66,19 +66,19 @@ Proof.
   intros. 
   destruct (functions_translated _ _ H) as [tf [A B]].
   rewrite A. generalize B. unfold transf_fundef, transf_partial_fundef, transf_function.
-  case (zlt Int.max_unsigned (code_size (transl_function f))); simpl; intro.
+  case (zlt Int.max_unsigned (code_size (fn_code (transl_function f)))); simpl; intro.
   congruence. intro. inv B0. auto.
 Qed.
 
 Lemma functions_transl_no_overflow:
   forall b f,
   Genv.find_funct_ptr ge b = Some (Internal f) ->
-  code_size (transl_function f) <= Int.max_unsigned.
+  code_size (fn_code (transl_function f)) <= Int.max_unsigned.
 Proof.
   intros. 
   destruct (functions_translated _ _ H) as [tf [A B]].
   generalize B. unfold transf_fundef, transf_partial_fundef, transf_function.
-  case (zlt Int.max_unsigned (code_size (transl_function f))); simpl; intro.
+  case (zlt Int.max_unsigned (code_size (fn_code (transl_function f)))); simpl; intro.
   congruence. intro; omega.
 Qed.
 
@@ -166,7 +166,7 @@ Inductive transl_code_at_pc: val -> block -> Mach.function -> Mach.code -> Prop
   transl_code_at_pc_intro:
     forall b ofs f c,
     Genv.find_funct_ptr ge b = Some (Internal f) ->
-    code_tail (Int.unsigned ofs) (transl_function f) (transl_code f c) ->
+    code_tail (Int.unsigned ofs) (fn_code (transl_function f)) (transl_code f c) ->
     transl_code_at_pc (Vptr b ofs) b f c.
 
 (** The following lemmas show that straight-line executions
@@ -175,12 +175,12 @@ Inductive transl_code_at_pc: val -> block -> Mach.function -> Mach.code -> Prop
 
 Lemma exec_straight_steps_1:
   forall fn c rs m c' rs' m',
-  exec_straight tge fn c rs m c' rs' m' ->
-  code_size fn <= Int.max_unsigned ->
+  exec_straight tge (fn_code fn) c rs m c' rs' m' ->
+  code_size (fn_code fn) <= Int.max_unsigned ->
   forall b ofs,
   rs#PC = Vptr b ofs ->
   Genv.find_funct_ptr tge b = Some (Internal fn) ->
-  code_tail (Int.unsigned ofs) fn c ->
+  code_tail (Int.unsigned ofs) (fn_code fn) c ->
   plus step tge (State rs m) E0 (State rs' m').
 Proof.
   induction 1; intros.
@@ -199,15 +199,15 @@ Qed.
     
 Lemma exec_straight_steps_2:
   forall fn c rs m c' rs' m',
-  exec_straight tge fn c rs m c' rs' m' ->
-  code_size fn <= Int.max_unsigned ->
+  exec_straight tge (fn_code fn) c rs m c' rs' m' ->
+  code_size (fn_code fn) <= Int.max_unsigned ->
   forall b ofs,
   rs#PC = Vptr b ofs ->
   Genv.find_funct_ptr tge b = Some (Internal fn) ->
-  code_tail (Int.unsigned ofs) fn c ->
+  code_tail (Int.unsigned ofs) (fn_code fn) c ->
   exists ofs',
      rs'#PC = Vptr b ofs'
-  /\ code_tail (Int.unsigned ofs') fn c'.
+  /\ code_tail (Int.unsigned ofs') (fn_code fn) c'.
 Proof.
   induction 1; intros.
   exists (Int.add ofs Int.one). split.
@@ -221,7 +221,7 @@ Qed.
 Lemma exec_straight_exec:
   forall fb f c c' rs m rs' m',
   transl_code_at_pc (rs PC) fb f c ->
-  exec_straight tge (transl_function f)
+  exec_straight tge (fn_code (transl_function f))
                 (transl_code f c) rs m c' rs' m' ->
   plus step tge (State rs m) E0 (State rs' m').
 Proof.
@@ -234,7 +234,7 @@ Qed.
 Lemma exec_straight_at:
   forall fb f c c' rs m rs' m',
   transl_code_at_pc (rs PC) fb f c ->
-  exec_straight tge (transl_function f)
+  exec_straight tge (fn_code (transl_function f))
                 (transl_code f c) rs m (transl_code f c') rs' m' ->
   transl_code_at_pc (rs' PC) fb f c'.
 Proof.
@@ -490,7 +490,7 @@ Proof.
   unfold transl_load_store_int, transl_load_store_float. 
   destruct m; rewrite transl_load_store_label; intros; auto.
   destruct s0; reflexivity.
-  destruct s0; autorewrite with labels; reflexivity.
+  destruct s0; simpl; autorewrite with labels; reflexivity.
   rewrite peq_false. auto. congruence.
 Qed.
 
@@ -507,8 +507,8 @@ Qed.
 
 Lemma transl_find_label:
   forall f,
-  find_label lbl (transl_function f) = 
-    option_map (transl_code f) (Mach.find_label lbl f.(fn_code)).
+  find_label lbl (fn_code (transl_function f)) = 
+    option_map (transl_code f) (Mach.find_label lbl (Mach.fn_code f)).
 Proof.
   intros. unfold transl_function. simpl. autorewrite with labels. apply transl_code_label.
 Qed.
@@ -522,16 +522,16 @@ Lemma find_label_goto_label:
   forall f lbl rs m c' b ofs,
   Genv.find_funct_ptr ge b = Some (Internal f) ->
   rs PC = Vptr b ofs ->
-  Mach.find_label lbl f.(fn_code) = Some c' ->
+  Mach.find_label lbl (Mach.fn_code f) = Some c' ->
   exists rs',
-    goto_label (transl_function f) lbl rs m = OK rs' m  
+    goto_label (fn_code (transl_function f)) lbl rs m = OK rs' m  
   /\ transl_code_at_pc (rs' PC) b f c'
   /\ forall r, r <> PC -> rs'#r = rs#r.
 Proof.
   intros. 
   generalize (transl_find_label lbl f).
-  rewrite H1; simpl. intro.
-  generalize (label_pos_code_tail lbl (transl_function f) 0 
+  rewrite H1. unfold option_map. intro.
+  generalize (label_pos_code_tail lbl (fn_code (transl_function f)) 0 
                  (transl_code f c') H2).
   intros [pos' [A [B C]]].
   exists (rs#PC <- (Vptr b (Int.repr pos'))).
@@ -568,7 +568,7 @@ Inductive match_stack: list Machsem.stackframe -> Prop :=
   | match_stack_cons: forall fb sp ra c s f,
       Genv.find_funct_ptr ge fb = Some (Internal f) ->
       wt_function f ->
-      incl c f.(fn_code) ->
+      incl c (Mach.fn_code f) ->
       transl_code_at_pc ra fb f c ->
       sp <> Vundef ->
       ra <> Vundef ->
@@ -581,7 +581,7 @@ Inductive match_states: Machsem.state -> Asm.state -> Prop :=
         (STACKS: match_stack s)
         (FIND: Genv.find_funct_ptr ge fb = Some (Internal f))
         (WTF: wt_function f)
-        (INCL: incl c f.(fn_code))
+        (INCL: incl c (Mach.fn_code f))
         (AT: transl_code_at_pc (rs PC) fb f c)
         (AG: agree ms sp rs)
         (MEXT: Mem.extends m m'),
@@ -610,13 +610,13 @@ Lemma exec_straight_steps:
   match_stack s ->
   Genv.find_funct_ptr ge fb = Some (Internal f) ->
   wt_function f ->
-  incl c2 f.(fn_code) ->
+  incl c2 (Mach.fn_code f) ->
   transl_code_at_pc (rs1 PC) fb f c1 ->
   Mem.extends m1 m1' ->
   (exists m2',
      Mem.extends m2 m2' /\
      exists rs2,
-     exec_straight tge (transl_function f) (transl_code f c1) rs1 m1' (transl_code f c2) rs2 m2'
+     exec_straight tge (fn_code (transl_function f)) (transl_code f c1) rs1 m1' (transl_code f c2) rs2 m2'
      /\ agree ms2 sp rs2) ->
   exists st',
   plus step tge (State rs1 m1') E0 st' /\
@@ -753,11 +753,11 @@ Proof.
   assert (parent' = parent_sp s). inv B. auto. rewrite <- H3 in H1; discriminate. subst parent'.
   exploit Mem.loadv_extends. eauto. eexact H1. eauto. 
   intros [v' [C D]]. 
-  exploit (loadind_int_correct tge (transl_function f) IR13 f.(fn_link_ofs) IR14
+  exploit (loadind_int_correct tge (fn_code (transl_function f)) IR13 f.(fn_link_ofs) IR14
                  rs m' (parent_sp s) (loadind IR14 ofs (mreg_type dst) dst (transl_code f c))).
   auto.
   intros [rs1 [EX1 [RES1 OTH1]]].
-  exploit (loadind_correct tge (transl_function f) IR14 ofs (mreg_type dst) dst
+  exploit (loadind_correct tge (fn_code (transl_function f)) IR14 ofs (mreg_type dst) dst
                 (transl_code f c) rs1 m' v').
   rewrite RES1. auto. auto. 
   intros [rs2 [EX2 [RES2 OTH2]]].
@@ -853,7 +853,7 @@ Qed.
 Lemma exec_Mcall_prop:
   forall (s : list stackframe) (fb : block) (sp : val)
          (sig : signature) (ros : mreg + ident) (c : Mach.code)
-         (ms : Mach.regset) (m : mem) (f : function) (f' : block)
+         (ms : Mach.regset) (m : mem) (f : Mach.function) (f' : block)
          (ra : int),
   find_function_ptr ge ros ms = Some f' ->
   Genv.find_funct_ptr ge fb = Some (Internal f) ->
@@ -866,17 +866,16 @@ Proof.
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
   intro WTI. inv WTI.
   inv AT. 
-  assert (NOOV: code_size (transl_function f) <= Int.max_unsigned).
+  assert (NOOV: code_size (fn_code (transl_function f)) <= Int.max_unsigned).
     eapply functions_transl_no_overflow; eauto.
-  assert (CT: code_tail (Int.unsigned (Int.add ofs Int.one)) (transl_function f) (transl_code f c)).
+  assert (CT: code_tail (Int.unsigned (Int.add ofs Int.one)) (fn_code (transl_function f)) (transl_code f c)).
     destruct ros; simpl in H5; eapply code_tail_next_int; eauto.
   set (rs2 := rs #IR14 <- (Val.add rs#PC Vone) #PC <- (Vptr f' Int.zero)).
   exploit return_address_offset_correct; eauto. constructor; eauto. 
   intro RA_EQ.
   assert (ATLR: rs2 IR14 = Vptr fb ra).
     rewrite RA_EQ.
-    change (rs2 IR14) with (Val.add (rs PC) Vone).
-    rewrite <- H2. reflexivity. 
+    unfold rs2. rewrite <- H2. reflexivity. 
   assert (AG3: agree ms sp rs2).
     unfold rs2. apply agree_set_other; auto. apply agree_set_other; auto.
   left; exists (State rs2 m'); split.
@@ -928,7 +927,7 @@ Proof.
   generalize (wt_function_instrs _ WTF _ (INCL _ (in_eq _ _))).
   intro WTI. inv WTI.
   set (call_instr :=
-         match ros with inl r => Pbreg (ireg_of r) | inr symb => Pbsymb symb end).
+         match ros with inl r => Pbreg (ireg_of r) sig | inr symb => Pbsymb symb sig end).
   assert (TR: transl_code f (Mtailcall sig ros :: c) =
               loadind_int IR13 (fn_retaddr_ofs f) IR14
                 (Pfreeframe f.(fn_stacksize) (fn_link_ofs f) :: call_instr :: transl_code f c)).
@@ -941,13 +940,13 @@ Proof.
   intros [ra' [C D]].
   exploit lessdef_parent_ra; eauto. intros. subst ra'.
   exploit Mem.free_parallel_extends; eauto. intros [m2' [P Q]].
-  destruct (loadind_int_correct tge (transl_function f) IR13 f.(fn_retaddr_ofs) IR14
+  destruct (loadind_int_correct tge (fn_code (transl_function f)) IR13 f.(fn_retaddr_ofs) IR14
                 rs m'0 (parent_ra s) 
                 (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: call_instr :: transl_code f c))
   as [rs1 [EXEC1 [RES1 OTH1]]].
   rewrite <- (sp_val ms (Vptr stk soff) rs); auto.
   set (rs2 := nextinstr (rs1#IR13 <- (parent_sp s))).
-  assert (EXEC2: exec_straight tge (transl_function f)
+  assert (EXEC2: exec_straight tge (fn_code (transl_function f))
                    (transl_code f (Mtailcall sig ros :: c)) rs m'0
                    (call_instr :: transl_code f c) rs2 m2').
   rewrite TR. eapply exec_straight_trans. eexact EXEC1.
@@ -1047,11 +1046,11 @@ Proof.
 Qed.
 
 Lemma exec_Mgoto_prop:
-  forall (s : list stackframe) (fb : block) (f : function) (sp : val)
+  forall (s : list stackframe) (fb : block) (f : Mach.function) (sp : val)
          (lbl : Mach.label) (c : list Mach.instruction) (ms : Mach.regset)
          (m : mem) (c' : Mach.code),
   Genv.find_funct_ptr ge fb = Some (Internal f) ->
-  Mach.find_label lbl (fn_code f) = Some c' ->
+  Mach.find_label lbl (Mach.fn_code f) = Some c' ->
   exec_instr_prop (Machsem.State s fb sp (Mgoto lbl :: c) ms m) E0
                   (Machsem.State s fb sp c' ms m).
 Proof.
@@ -1071,13 +1070,13 @@ Proof.
 Qed.
 
 Lemma exec_Mcond_true_prop:
-  forall (s : list stackframe) (fb : block) (f : function) (sp : val)
+  forall (s : list stackframe) (fb : block) (f : Mach.function) (sp : val)
          (cond : condition) (args : list mreg) (lbl : Mach.label)
          (c : list Mach.instruction) (ms : mreg -> val) (m : mem)
          (c' : Mach.code),
   eval_condition cond ms ## args m = Some true ->
   Genv.find_funct_ptr ge fb = Some (Internal f) ->
-  Mach.find_label lbl (fn_code f) = Some c' ->
+  Mach.find_label lbl (Mach.fn_code f) = Some c' ->
   exec_instr_prop (Machsem.State s fb sp (Mcond cond args lbl :: c) ms m) E0
                   (Machsem.State s fb sp c' (undef_temps ms) m).
 Proof.
@@ -1132,14 +1131,14 @@ Proof.
 Qed.
 
 Lemma exec_Mjumptable_prop:
-  forall (s : list stackframe) (fb : block) (f : function) (sp : val)
+  forall (s : list stackframe) (fb : block) (f : Mach.function) (sp : val)
          (arg : mreg) (tbl : list Mach.label) (c : list Mach.instruction)
          (ms : mreg -> val) (m : mem) (n : int) (lbl : Mach.label)
          (c' : Mach.code),
   ms arg = Vint n ->
   list_nth_z tbl (Int.unsigned n) = Some lbl ->
   Genv.find_funct_ptr ge fb = Some (Internal f) ->
-  Mach.find_label lbl (fn_code f) = Some c' ->
+  Mach.find_label lbl (Mach.fn_code f) = Some c' ->
   exec_instr_prop
     (Machsem.State s fb sp (Mjumptable arg tbl :: c) ms m) E0
     (Machsem.State s fb sp c' (undef_temps ms) m).
@@ -1161,7 +1160,7 @@ Proof.
   generalize (functions_transl_no_overflow _ _ H4); intro NOOV.
   assert (rs (ireg_of arg) = Vint n).
     exploit ireg_val; eauto. intros LD. inv LD. auto. congruence.
-  assert (exec_straight tge (transl_function f)
+  assert (exec_straight tge (fn_code (transl_function f))
             (Pmov IR14 (SOlslimm (ireg_of arg) (Int.repr 2)) :: k1) rs m'
             k1 rs1 m').
    apply exec_straight_one. 
@@ -1208,16 +1207,16 @@ Proof.
   exploit lessdef_parent_ra; eauto. intros. subst ra'.
   exploit Mem.free_parallel_extends; eauto. intros [m2' [E F]].
 
-  exploit (loadind_int_correct tge (transl_function f) IR13 f.(fn_retaddr_ofs) IR14
+  exploit (loadind_int_correct tge (fn_code (transl_function f)) IR13 f.(fn_retaddr_ofs) IR14
                 rs m'0 (parent_ra s)
-                (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbreg IR14 :: transl_code f c)).
+                (Pfreeframe f.(fn_stacksize) f.(fn_link_ofs) :: Pbreg IR14 f.(Mach.fn_sig) :: transl_code f c)).
   rewrite <- (sp_val ms (Vptr stk soff) rs); auto.
   intros [rs1 [EXEC1 [RES1 OTH1]]].
   set (rs2 := nextinstr (rs1#IR13 <- (parent_sp s))).
-  assert (EXEC2: exec_straight tge (transl_function f)
+  assert (EXEC2: exec_straight tge (fn_code (transl_function f))
           (loadind_int IR13 (fn_retaddr_ofs f) IR14
-             (Pfreeframe f.(fn_stacksize)  (fn_link_ofs f) :: Pbreg IR14 :: transl_code f c))
-          rs m'0 (Pbreg IR14 :: transl_code f c) rs2 m2').
+             (Pfreeframe f.(fn_stacksize)  (fn_link_ofs f) :: Pbreg IR14 f.(Mach.fn_sig) :: transl_code f c))
+          rs m'0 (Pbreg IR14 f.(Mach.fn_sig) :: transl_code f c) rs2 m2').
   eapply exec_straight_trans. eexact EXEC1. 
   apply exec_straight_one. simpl. rewrite OTH1; try congruence.
   rewrite <- (sp_val ms (Vptr stk soff) rs); auto. 
@@ -1249,14 +1248,14 @@ Hypothesis wt_prog: wt_program prog.
 
 Lemma exec_function_internal_prop:
   forall (s : list stackframe) (fb : block) (ms : Mach.regset)
-         (m : mem) (f : function) (m1 m2 m3 : mem) (stk : block),
+         (m : mem) (f : Mach.function) (m1 m2 m3 : mem) (stk : block),
   Genv.find_funct_ptr ge fb = Some (Internal f) ->
   Mem.alloc m 0 (fn_stacksize f) = (m1, stk) ->
   let sp := Vptr stk Int.zero in
   store_stack m1 sp Tint f.(fn_link_ofs) (parent_sp s) = Some m2 ->
   store_stack m2 sp Tint f.(fn_retaddr_ofs) (parent_ra s) = Some m3 ->
   exec_instr_prop (Machsem.Callstate s fb ms m) E0
-                  (Machsem.State s fb sp (fn_code f) (undef_temps ms) m3).
+                  (Machsem.State s fb sp (Mach.fn_code f) (undef_temps ms) m3).
 Proof.
   intros; red; intros; inv MS.
   assert (WTF: wt_function f).
@@ -1275,9 +1274,9 @@ Proof.
   intros [m3' [E F]].
   (* Execution of function prologue *)
   assert (EXEC_PROLOGUE:
-            exec_straight tge (transl_function f)
-              (transl_function f) rs m'
-              (transl_code f f.(fn_code)) rs3 m3').
+            exec_straight tge (fn_code (transl_function f))
+              (fn_code (transl_function f)) rs m'
+              (transl_code f f.(Mach.fn_code)) rs3 m3').
   unfold transl_function at 2.
   apply exec_straight_two with rs2 m2'.
   unfold exec_instr. rewrite A. fold sp. 
@@ -1287,7 +1286,7 @@ Proof.
   rewrite E. auto. 
   auto. auto. 
   (* Agreement at end of prologue *)
-  assert (AT3: transl_code_at_pc rs3#PC fb f f.(fn_code)).
+  assert (AT3: transl_code_at_pc rs3#PC fb f f.(Mach.fn_code)).
     change (rs3 PC) with (Val.add (Val.add (rs PC) Vone) Vone).
     rewrite ATPC. simpl. constructor. auto.
     eapply code_tail_next_int; auto.
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index d6ad203..8f6b337 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -207,8 +207,8 @@ Definition nontemp_preg (r: preg) : bool :=
   | IR IR10 => false
   | IR IR12 => false
   | IR _ => true
-  | FR FR2 => false
-  | FR FR3 => false
+  | FR FR6 => false
+  | FR FR7 => false
   | FR _ => true
   | CR _ => false
   | PC => false
@@ -1460,7 +1460,7 @@ Lemma transl_store_float_correct:
    exec_store chunk (Val.add rs1#r2 (Vint n)) r1 rs1 m1' = OK (nextinstr rs1) m2' ->
    exists rs2,
    exec_instr ge c (mk_instr r1 r2 n) rs1 m1' = OK rs2 m2'
-   /\ (forall (r: preg), r <> FR3 -> rs2 r = nextinstr rs1 r)) ->
+   /\ (forall (r: preg), r <> FR7 -> rs2 r = nextinstr rs1 r)) ->
   agree ms sp rs ->
   map mreg_type args = type_of_addressing addr ->
   mreg_type rd = Tfloat ->
diff --git a/arm/Asmgenretaddr.v b/arm/Asmgenretaddr.v
index 5238d21..48b6328 100644
--- a/arm/Asmgenretaddr.v
+++ b/arm/Asmgenretaddr.v
@@ -68,7 +68,7 @@ Qed.
 Inductive return_address_offset: Mach.function -> Mach.code -> int -> Prop :=
   | return_address_offset_intro:
       forall c f ofs,
-      code_tail ofs (transl_function f) (transl_code f c) ->
+      code_tail ofs (fn_code (transl_function f)) (transl_code f c) ->
       return_address_offset f c (Int.repr ofs).
 
 (** We now show that such an offset always exists if the Mach code [c]
@@ -209,11 +209,11 @@ Proof.
 Qed.
 
 Lemma return_address_exists:
-  forall f sg ros c, is_tail (Mcall sg ros :: c) f.(fn_code) ->
+  forall f sg ros c, is_tail (Mcall sg ros :: c) f.(Mach.fn_code) ->
   exists ra, return_address_offset f c ra.
 Proof.
-  intros. assert (is_tail (transl_code f c) (transl_function f)).
-  unfold transl_function. IsTail. apply transl_code_tail; eauto with coqlib.
+  intros. assert (is_tail (transl_code f c) (fn_code (transl_function f))).
+  unfold transl_function. simpl. IsTail. apply transl_code_tail; eauto with coqlib.
   destruct (is_tail_code_tail _ _ H0) as [ofs A].
   exists (Int.repr ofs). constructor. auto.
 Qed.
diff --git a/arm/ConstpropOp.v b/arm/ConstpropOp.v
index fa97c6c..86b6d66 100644
--- a/arm/ConstpropOp.v
+++ b/arm/ConstpropOp.v
@@ -345,6 +345,12 @@ Inductive eval_static_operation_cases: forall (op: operation) (vl: list approx),
   | eval_static_operation_case49:
       forall n1,
       eval_static_operation_cases (Ofloatofint) (I n1 :: nil)
+  | eval_static_operation_case50:
+      forall n1,
+      eval_static_operation_cases (Ointuoffloat) (F n1 :: nil)
+  | eval_static_operation_case53:
+      forall n1,
+      eval_static_operation_cases (Ofloatofintu) (I n1 :: nil)
   | eval_static_operation_case51:
       forall c vl,
       eval_static_operation_cases (Ocmp c) (vl)
@@ -455,6 +461,10 @@ Definition eval_static_operation_match (op: operation) (vl: list approx) :=
       eval_static_operation_case48 n1
   | Ofloatofint, I n1 :: nil =>
       eval_static_operation_case49 n1
+  | Ointuoffloat, F n1 :: nil =>
+      eval_static_operation_case50 n1
+  | Ofloatofintu, I n1 :: nil =>
+      eval_static_operation_case53 n1
   | Ocmp c, vl =>
       eval_static_operation_case51 c vl
   | Oshrximm n, I n1 :: nil =>
@@ -563,6 +573,10 @@ Definition eval_static_operation (op: operation) (vl: list approx) :=
       match Float.intoffloat n1 with Some x => I x | None => Unknown end
   | eval_static_operation_case49 n1 =>
       F(Float.floatofint n1)
+  | eval_static_operation_case50 n1 =>
+      match Float.intuoffloat n1 with Some x => I x | None => Unknown end
+  | eval_static_operation_case53 n1 =>
+      F(Float.floatofintu n1)
   | eval_static_operation_case51 c vl =>
       match eval_static_condition c vl with
       | None => Unknown
diff --git a/arm/ConstpropOpproof.v b/arm/ConstpropOpproof.v
index 25758cc..4d43082 100644
--- a/arm/ConstpropOpproof.v
+++ b/arm/ConstpropOpproof.v
@@ -143,6 +143,8 @@ Proof.
 
   inv H4. destruct (Float.intoffloat f); simpl in H0; inv H0. red; auto.
 
+  inv H4. destruct (Float.intuoffloat f); simpl in H0; inv H0. red; auto.
+
   caseEq (eval_static_condition c vl0).
   intros. generalize (eval_static_condition_correct _ _ _ m _ H H1).
   intro. rewrite H2 in H0. 
diff --git a/arm/Machregs.v b/arm/Machregs.v
index 2e422d2..f5b5329 100644
--- a/arm/Machregs.v
+++ b/arm/Machregs.v
@@ -34,13 +34,15 @@ Inductive mreg: Type :=
   | R0: mreg  | R1: mreg  | R2: mreg  | R3: mreg
   | R4: mreg  | R5: mreg  | R6: mreg  | R7: mreg
   | R8: mreg  | R9: mreg  | R11: mreg
-  (** Allocatable float regs *)
-  | F0: mreg  | F1: mreg  | F4: mreg  | F5: mreg
-  | F6: mreg  | F7: mreg
+  (** Allocatable double-precision float regs *)
+  | F0: mreg  | F1: mreg  | F2: mreg  | F3: mreg
+  | F4: mreg  | F5: mreg
+  | F8: mreg  | F9: mreg  | F10: mreg | F11: mreg
+  | F12: mreg | F13: mreg | F14: mreg | F15: mreg
   (** Integer temporaries *)
   | IT1: mreg (* R10 *) | IT2: mreg (* R12 *)
   (** Float temporaries *)
-  | FT1: mreg (* F2 *) | FT2: mreg (* F3 *).
+  | FT1: mreg (* F6 *) | FT2: mreg (* F7 *).
 
 Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}.
 Proof. decide equality. Qed.
@@ -50,8 +52,10 @@ Definition mreg_type (r: mreg): typ :=
   | R0 => Tint  | R1 => Tint  | R2 => Tint  | R3 => Tint
   | R4 => Tint  | R5 => Tint  | R6 => Tint  | R7 => Tint
   | R8 => Tint  | R9 => Tint  | R11 => Tint
-  | F0 => Tfloat  | F1 => Tfloat  | F4 => Tfloat  | F5 => Tfloat
-  | F6 => Tfloat  | F7 => Tfloat
+  | F0 => Tfloat | F1 => Tfloat | F2 => Tfloat | F3 => Tfloat
+  | F4 => Tfloat| F5 => Tfloat
+  | F8 => Tfloat | F9 => Tfloat | F10 => Tfloat | F11 => Tfloat
+  | F12 => Tfloat | F13 => Tfloat | F14 => Tfloat | F15 => Tfloat
   | IT1 => Tint | IT2 => Tint
   | FT1 => Tfloat | FT2 => Tfloat
   end.
@@ -66,10 +70,12 @@ Module IndexedMreg <: INDEXED_TYPE.
     | R0 => 1  | R1 => 2  | R2 => 3  | R3 => 4
     | R4 => 5  | R5 => 6  | R6 => 7  | R7 => 8
     | R8 => 9  | R9 => 10 | R11 => 11
-    | F0 => 12 | F1 => 13 | F4 => 14  | F5 => 15
-    | F6 => 16 | F7 => 17
-    | IT1 => 18 | IT2 => 19
-    | FT1 => 20 | FT2 => 21
+    | F0 => 12 | F1 => 13 | F2 => 14  | F3 => 15
+    | F4 => 16 | F5 => 17
+    | F8 => 18 | F9 => 19 | F10 => 20 | F11 => 21
+    | F12 => 22 | F13 => 23 | F14 => 24 | F15 => 25
+    | IT1 => 26 | IT2 => 27
+    | FT1 => 28 | FT2 => 29
     end.
   Lemma index_inj:
     forall r1 r2, index r1 = index r2 -> r1 = r2.
diff --git a/arm/Machregsaux.ml b/arm/Machregsaux.ml
index fa72e14..642437e 100644
--- a/arm/Machregsaux.ml
+++ b/arm/Machregsaux.ml
@@ -18,10 +18,12 @@ let register_names = [
   ("R0", R0);  ("R1", R1);  ("R2", R2);  ("R3", R3);
   ("R4", R4);  ("R5", R5);  ("R6", R6);  ("R7", R7);
   ("R8", R8);  ("R9", R9);  ("R11", R11);
-  ("F0", F0);  ("F1", F1);  
-  ("F4", F4);  ("F5", F5);  ("F6", F6);  ("F7", F7);
+  ("F0", F0);  ("F1", F1);  ("F2", F2); ("F3", F3);
+  ("F4", F4);  ("F5", F5);
+  ("F8", F8);  ("F9", F9);  ("F10", F10); ("F11", F11);
+  ("F12", F12);("F13", F13);("F14", F14); ("F15", F15);
   ("R10", IT1); ("R12", IT2);
-  ("F2", FT1); ("F3", FT2)
+  ("F6", FT1); ("F7", FT2)
 ]
 
 let name_of_register r =
diff --git a/arm/Op.v b/arm/Op.v
index bb688ce..17cd0b4 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -112,7 +112,9 @@ Inductive operation : Type :=
   | Osingleoffloat: operation           (**r [rd] is [r1] truncated to single-precision float *)
 (*c Conversions between int and float: *)
   | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
+  | Ointuoffloat: operation             (**r [rd = unsigned_int_of_float(r1)] *)
   | Ofloatofint: operation              (**r [rd = float_of_signed_int(r1)] *)
+  | Ofloatofintu: operation             (**r [rd = float_of_unsigned_int(r1)] *)
 (*c Boolean tests: *)
   | Ocmp: condition -> operation.       (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
 
@@ -287,7 +289,9 @@ Definition eval_operation
   | Odivf, Vfloat f1 :: Vfloat f2 :: nil => Some (Vfloat (Float.div f1 f2))
   | Osingleoffloat, v1 :: nil => Some (Val.singleoffloat v1)
   | Ointoffloat, Vfloat f1 :: nil => option_map Vint (Float.intoffloat f1)
+  | Ointuoffloat, Vfloat f1 :: nil => option_map Vint (Float.intuoffloat f1)
   | Ofloatofint, Vint n1 :: nil => Some (Vfloat (Float.floatofint n1))
+  | Ofloatofintu, Vint n1 :: nil => Some (Vfloat (Float.floatofintu n1))
   | Ocmp c, _ =>
       match eval_condition c vl m with
       | None => None
@@ -491,7 +495,9 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Odivf => (Tfloat :: Tfloat :: nil, Tfloat)
   | Osingleoffloat => (Tfloat :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
+  | Ointuoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
+  | Ofloatofintu => (Tint :: nil, Tfloat)
   | Ocmp c => (type_of_condition c, Tint)
   end.
 
@@ -555,6 +561,7 @@ Proof.
   injection H0; intro; subst v; exact I. discriminate.
   destruct v0; exact I.
   destruct (Float.intoffloat f); simpl in H0; inv H0. exact I.
+  destruct (Float.intuoffloat f); simpl in H0; inv H0. exact I.
   destruct (eval_condition c vl). 
   destruct b; injection H0; intro; subst v; exact I.
   discriminate.
@@ -656,7 +663,9 @@ Definition eval_operation_total (sp: val) (op: operation) (vl: list val) : val :
   | Odivf, v1::v2::nil => Val.divf v1 v2
   | Osingleoffloat, v1::nil => Val.singleoffloat v1
   | Ointoffloat, v1::nil => Val.intoffloat v1
+  | Ointuoffloat, v1::nil => Val.intuoffloat v1
   | Ofloatofint, v1::nil => Val.floatofint v1
+  | Ofloatofintu, v1::nil => Val.floatofintu v1
   | Ocmp c, _ => eval_condition_total c vl
   | _, _ => Vundef
   end.
@@ -729,6 +738,7 @@ Proof.
   assert (Int.unsigned (Int.repr 31) < Int.unsigned Int.iwordsize). vm_compute; auto.
   omega. discriminate.
   destruct (Float.intoffloat f); simpl in H; inv H. auto.
+  destruct (Float.intuoffloat f); simpl in H; inv H. auto.
   caseEq (eval_condition c vl m); intros; rewrite H0 in H.
   replace v with (Val.of_bool b).
   eapply eval_condition_weaken; eauto.
@@ -840,6 +850,7 @@ Proof.
   destruct (Int.ltu i (Int.repr 31)); inv H1; TrivialExists.
   exists (Val.singleoffloat v2); split. auto. apply Val.singleoffloat_lessdef; auto.
   destruct (Float.intoffloat f); simpl in *; inv H1. TrivialExists.
+  destruct (Float.intuoffloat f); simpl in *; inv H1. TrivialExists.
   exists v1; split; auto. 
   destruct (eval_condition c vl1 m1) as [] _eqn. 
   rewrite (eval_condition_lessdef c H H0 Heqo).
@@ -1000,6 +1011,7 @@ Proof.
   destruct (Int.ltu i (Int.repr 31)); inv H1. TrivialExists2.
   exists (Val.singleoffloat v'); split; auto. inv H4; simpl; auto.
   destruct (Float.intoffloat f0); simpl in *; inv H1. TrivialExists2.
+  destruct (Float.intuoffloat f0); simpl in *; inv H1. TrivialExists2.
   destruct (eval_condition c vl1 m1) as [] _eqn; try discriminate.
   exploit eval_condition_inject; eauto. intros EQ; rewrite EQ. 
   destruct b; inv H1; TrivialExists2.
diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml
index 4406f5b..f1beded 100644
--- a/arm/PrintAsm.ml
+++ b/arm/PrintAsm.ml
@@ -37,21 +37,6 @@ let label_for_label lbl =
     Hashtbl.add current_function_labels lbl lbl';
     lbl'
 
-(* Record identifiers of external functions that need stub code *)
-
-module IdentSet = Set.Make(struct type t = ident let compare = compare end)
-
-let extfuns = (ref IdentSet.empty)
-
-let need_stub = function
-  | EF_external(name, sg) -> List.mem Tfloat sg.sig_args
-  | _ -> false
-
-let record_extfun (Coq_pair(name, defn)) =
-  match defn with
-  | Internal f -> ()
-  | External ef -> if need_stub ef then extfuns := IdentSet.add name !extfuns
-
 (* Basic printing functions *)
 
 let strip_variadic_suffix name =
@@ -59,9 +44,7 @@ let strip_variadic_suffix name =
   with Not_found -> name
 
 let print_symb oc symb =
-  if IdentSet.mem symb !extfuns
-  then fprintf oc ".L%s$stub" (extern_atom symb)
-  else fprintf oc "%s" (strip_variadic_suffix (extern_atom symb))
+  fprintf oc "%s" (strip_variadic_suffix (extern_atom symb))
 
 let print_label oc lbl =
   fprintf oc ".L%d" (label_for_label lbl)
@@ -83,11 +66,26 @@ let int_reg_name = function
   | IR12 -> "r12" | IR13 -> "sp" | IR14 -> "lr"
 
 let float_reg_name = function
-  | FR0 -> "f0"  | FR1 -> "f1"  | FR2 -> "f2"  | FR3 -> "f3"
-  | FR4 -> "f4"  | FR5 -> "f5"  | FR6 -> "f6"  | FR7 -> "f7"
+  | FR0 -> "d0"  | FR1 -> "d1"  | FR2 -> "d2"  | FR3 -> "d3"
+  | FR4 -> "d4"  | FR5 -> "d5"  | FR6 -> "d6"  | FR7 -> "d7"
+  | FR8 -> "d8"  | FR9 -> "d9"  | FR10 -> "d10"  | FR11 -> "d11"
+  | FR12 -> "d12"  | FR13 -> "d13"  | FR14 -> "d14"  | FR15 -> "d15"
+
+let single_float_reg_index = function
+  | FR0 -> 0  | FR1 -> 2  | FR2 -> 4  | FR3 -> 6
+  | FR4 -> 8  | FR5 -> 10  | FR6 -> 12  | FR7 -> 14
+  | FR8 -> 16  | FR9 -> 18  | FR10 -> 20  | FR11 -> 22
+  | FR12 -> 24  | FR13 -> 26  | FR14 -> 28  | FR15 -> 30
+
+let single_float_reg_name = function
+  | FR0 -> "s0"  | FR1 -> "s2"  | FR2 -> "s4"  | FR3 -> "s6"
+  | FR4 -> "s8"  | FR5 -> "s10"  | FR6 -> "s12"  | FR7 -> "s14"
+  | FR8 -> "s16"  | FR9 -> "s18"  | FR10 -> "s20"  | FR11 -> "s22"
+  | FR12 -> "s24"  | FR13 -> "s26"  | FR14 -> "s28"  | FR15 -> "s30"
 
 let ireg oc r = output_string oc (int_reg_name r)
 let freg oc r = output_string oc (float_reg_name r)
+let freg_single oc r = output_string oc (single_float_reg_name r)
 
 let preg oc = function
   | IR r -> ireg oc r
@@ -171,11 +169,10 @@ let reset_constants () =
 let emit_constants oc =
   Hashtbl.iter
     (fun bf lbl ->
-      (* Floats are mixed-endian on this flavor of ARM *)
+      (* Little-endian floats *)
       let bfhi = Int64.shift_right_logical bf 32
       and bflo = Int64.logand bf 0xFFFF_FFFFL in
-      fprintf oc ".L%d:	.word	0x%Lx, 0x%Lx\n"
-          lbl bfhi bflo)
+      fprintf oc ".L%d:	.word	0x%Lx, 0x%Lx\n" lbl bflo bfhi)
     float_labels;
   Hashtbl.iter
     (fun (id, ofs) lbl ->
@@ -219,15 +216,13 @@ let call_helper oc fn dst arg1 arg2 =
   (* ... for a total of at most 7 instructions *)
   7
 
-(* Built-ins.  They come in three flavors: 
+(* Built-ins.  They come in two flavors: 
    - annotation statements: take their arguments in registers or stack
      locations; generate no code;
    - inlined by the compiler: take their arguments in arbitrary
      registers; preserve all registers the temporaries
      (IR10, IR12, IR14, FP2, FP4)
-   - inlined while printing asm code; take their arguments in
-     locations dictated by the calling conventions; preserve
-     callee-save regs only. *)
+*)
 
 (* Handling of annotations *)
 
@@ -262,7 +257,7 @@ let print_annot_val oc txt args res =
   | IR src :: _, IR dst ->
       if dst = src then 0 else (fprintf oc "	mov	%a, %a\n" ireg dst ireg src; 1)
   | FR src :: _, FR dst ->
-      if dst = src then 0 else (fprintf oc "	mvfd	%a, %a\n" freg dst freg src; 1)
+      if dst = src then 0 else (fprintf oc "	fcpy	%a, %a\n" freg dst freg src; 1)
   | _, _ -> assert false
 
 (* Handling of memcpy *)
@@ -298,7 +293,8 @@ let print_builtin_memcpy_big oc sz al src dst =
     if src <> IR0 && dst <> IR0 then IR0
     else if src <> IR1 && dst <> IR1 then IR1
     else IR2 in
-  fprintf oc "	stmfd	sp!, {%a,%a,%a}\n" ireg tmp ireg src ireg dst;
+  let tosave = List.sort compare [tmp;src;dst] in
+  fprintf oc "	stmfd	sp!, {%a}\n" print_list_ireg tosave;
   begin match Asmgen.decompose_int
                  (coqint_of_camlint (Int32.of_int (sz / chunksize))) with
   | [] -> assert false
@@ -314,7 +310,7 @@ let print_builtin_memcpy_big oc sz al src dst =
   fprintf oc "	subs	%a, %a, #1\n" ireg IR14 ireg IR14;
   fprintf oc "	%s	%a, [%a], #%d\n" store ireg tmp ireg dst chunksize;
   fprintf oc "	bne	.L%d\n" lbl;
-  fprintf oc "	ldmfd	sp!, {%a,%a,%a}\n" ireg tmp ireg src ireg dst;
+  fprintf oc "	ldmfd	sp!, {%a}\n" print_list_ireg tosave;
   9
 
 let print_builtin_memcpy oc sz al args =
@@ -345,10 +341,10 @@ let print_builtin_vload oc chunk args res =
     | Mint32, [IR addr], IR res ->
         fprintf oc "	ldr	%a, [%a, #0]\n" ireg res ireg addr; 1
     | Mfloat32, [IR addr], FR res ->
-        fprintf oc "	ldfs	%a, [%a, #0]\n" freg res ireg addr;
-        fprintf oc "	mvfd	%a, %a\n" freg res freg res; 2
+        fprintf oc "	flds	%a, [%a, #0]\n" freg_single res ireg addr;
+        fprintf oc "	fcvtds	%a, %a\n" freg res freg_single res; 2
     | Mfloat64, [IR addr], FR res ->
-        fprintf oc "	ldfd	%a, [%a, #0]\n" freg res ireg addr; 1
+        fprintf oc "	fldd	%a, [%a, #0]\n" freg res ireg addr; 1
     | _ ->
         assert false
     end in
@@ -365,10 +361,10 @@ let print_builtin_vstore oc chunk args =
     | Mint32, [IR addr; IR src] ->
         fprintf oc "	str	%a, [%a, #0]\n" ireg src ireg addr; 1
     | Mfloat32, [IR addr; FR src] ->
-        fprintf oc "	mvfs	%a, %a\n" freg FR2 freg src;
-        fprintf oc "	stfs	%a, [%a, #0]\n" freg FR2 ireg addr; 2
+        fprintf oc "	fcvtsd	%a, %a\n" freg_single FR6 freg src;
+        fprintf oc "	fsts	%a, [%a, #0]\n" freg_single FR6 ireg addr; 2
     | Mfloat64, [IR addr; FR src] ->
-        fprintf oc "	stfd	%a, [%a, #0]\n" freg src ireg addr; 1
+        fprintf oc "	fstd	%a, [%a, #0]\n" freg src ireg addr; 1
     | _ ->
         assert false
     end in
@@ -381,7 +377,7 @@ let print_builtin_inline oc name args res =
   let n = match name, args, res with
   (* Float arithmetic *)
   | "__builtin_fabs", [FR a1], FR res ->
-      fprintf oc "	absd	%a, %a\n" freg res freg a1; 1
+      fprintf oc "	fabsd	%a, %a\n" freg res freg a1; 1
   (* Catch-all *)
   | _ ->
       invalid_arg ("unrecognized builtin " ^ name)
@@ -389,6 +385,35 @@ let print_builtin_inline oc name args res =
   fprintf oc "%s end %s\n" comment name;
   n
 
+(* Fixing up calling conventions *)
+
+type direction = Incoming | Outgoing
+
+let fixup_conventions oc dir tyl =
+  let fixup f i1 i2 =
+    match dir with
+    | Incoming ->     (* f <- (i1, i2)  *)
+        fprintf oc "	fmdlr	%a, %a\n" freg f ireg i1;
+        fprintf oc "	fmdhr	%a, %a\n" freg f ireg i2
+    | Outgoing ->      (* (i1, i2) <- f *)
+        fprintf oc "	fmrdl	%a, %a\n" ireg i1 freg f;
+        fprintf oc "	fmrdh	%a, %a\n" ireg i2 freg f in
+  match tyl with
+  | Tfloat :: Tfloat :: _ ->
+      fixup FR0 IR0 IR1; fixup FR1 IR2 IR3; 4
+  | Tfloat :: _ ->
+      fixup FR0 IR0 IR1; 2
+  | Tint :: Tfloat :: _ | Tint :: Tint :: Tfloat :: _ ->
+      fixup FR1 IR2 IR3; 2
+  | _ ->
+      0
+
+let fixup_arguments oc dir sg =
+  fixup_conventions oc dir sg.sig_args
+
+let fixup_result oc dir sg =
+  fixup_conventions oc dir (proj_sig_res sg :: [])
+
 (* Printing of instructions *)
 
 let shift_op oc = function
@@ -421,15 +446,27 @@ let print_instruction oc = function
       fprintf oc "	b	%a\n" print_label lbl; 1
   | Pbc(bit, lbl) ->
       fprintf oc "	b%s	%a\n" (condition_name bit) print_label lbl; 1
-  | Pbsymb id ->
-      fprintf oc "	b	%a\n" print_symb id; 1
-  | Pbreg r ->
-      fprintf oc "	mov	pc, %a\n" ireg r; 1
-  | Pblsymb id ->
-      fprintf oc "	bl	%a\n" print_symb id; 1
-  | Pblreg r ->
-      fprintf oc "	mov	lr, pc\n";
-      fprintf oc "	mov	pc, %a\n" ireg r; 2
+  | Pbsymb(id, sg) ->
+      let n = fixup_arguments oc Outgoing sg in
+      fprintf oc "	b	%a\n" print_symb id;
+      n + 1
+  | Pbreg(r, sg) ->
+      let n = 
+        if r = IR14
+        then fixup_result oc Outgoing sg
+        else fixup_arguments oc Outgoing sg in
+      fprintf oc "	bx	%a\n" ireg r;
+      n + 1
+  | Pblsymb(id, sg) ->
+      let n1 = fixup_arguments oc Outgoing sg in
+      fprintf oc "	bl	%a\n" print_symb id;
+      let n2 = fixup_result oc Incoming sg in
+      n1 + 1 + n2
+  | Pblreg(r, sg) ->
+      let n1 = fixup_arguments oc Outgoing sg in
+      fprintf oc "	blx	%a\n" ireg r;
+      let n2 = fixup_result oc Incoming sg in
+      n1 + 1 + n2
   | Pbic(r1, r2, so) ->
       fprintf oc "	bic	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
   | Pcmp(r1, so) ->
@@ -465,52 +502,62 @@ let print_instruction oc = function
   | Pstrh(r1, r2, sa) ->
       fprintf oc "	strh	%a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
   | Psdiv(r1, r2, r3) ->
-      call_helper oc "__divsi3" r1 r2 r3
+      call_helper oc "__aeabi_idiv" r1 r2 r3
   | Psub(r1, r2, so) ->
       fprintf oc "	sub	%a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
   | Pudiv(r1, r2, r3) ->
-      call_helper oc "__udivsi3" r1 r2 r3
+      call_helper oc "__aeabi_uidiv" r1 r2 r3
   (* Floating-point coprocessor instructions *)
-  | Pabsd(r1, r2) ->
-      fprintf oc "	absd	%a, %a\n" freg r1 freg r2; 1
-  | Padfd(r1, r2, r3) ->
-      fprintf oc "	adfd	%a, %a, %a\n" freg r1 freg r2 freg r3; 1
-  | Pcmf(r1, r2) ->
-      fprintf oc "	cmf	%a, %a\n" freg r1 freg r2; 1
-  | Pdvfd(r1, r2, r3) ->
-      fprintf oc "	dvfd	%a, %a, %a\n" freg r1 freg r2 freg r3; 1
-  | Pfixz(r1, r2) ->
-      fprintf oc "	fixz	%a, %a\n" ireg r1 freg r2; 1
-  | Pfltd(r1, r2) ->
-      fprintf oc "	fltd	%a, %a\n" freg r1 ireg r2; 1
-  | Pldfd(r1, r2, n) ->
-      fprintf oc "	ldfd	%a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
-  | Pldfs(r1, r2, n) ->
-      fprintf oc "	ldfs	%a, [%a, #%a]\n" freg r1 ireg r2 coqint n;
-      fprintf oc "	mvfd	%a, %a\n" freg r1 freg r1; 2
-  | Plifd(r1, f) ->
+  | Pfcpyd(r1, r2) ->
+      fprintf oc "	fcpyd	%a, %a\n" freg r1 freg r2; 1
+  | Pfabsd(r1, r2) ->
+      fprintf oc "	fabsd	%a, %a\n" freg r1 freg r2; 1
+  | Pfnegd(r1, r2) ->
+      fprintf oc "	fnegd	%a, %a\n" freg r1 freg r2; 1
+  | Pfaddd(r1, r2, r3) ->
+      fprintf oc "	faddd	%a, %a, %a\n" freg r1 freg r2 freg r3; 1
+  | Pfdivd(r1, r2, r3) ->
+      fprintf oc "	fdivd	%a, %a, %a\n" freg r1 freg r2 freg r3; 1
+  | Pfmuld(r1, r2, r3) ->
+      fprintf oc "	fmuld	%a, %a, %a\n" freg r1 freg r2 freg r3; 1
+  | Pfsubd(r1, r2, r3) ->
+      fprintf oc "	fsubd	%a, %a, %a\n" freg r1 freg r2 freg r3; 1
+  | Pflid(r1, f) ->
+(*
       if Int64.bits_of_float f = 0L (* +0.0 *) then begin
         fprintf oc "	mvfd	%a, #0.0\n" freg r1; 1
       end else begin
-        let lbl = label_float f in
-        fprintf oc "	ldfd	%a, .L%d @ %.12g\n" freg r1 lbl f; 1
-      end
-  | Pmnfd(r1, r2) ->
-      fprintf oc "	mnfd	%a, %a\n" freg r1 freg r2; 1
-  | Pmvfd(r1, r2) ->
-      fprintf oc "	mvfd	%a, %a\n" freg r1 freg r2; 1
-  | Pmvfs(r1, r2) ->
-      fprintf oc "	mvfs	%a, %a\n" freg r1 freg r2;
-      fprintf oc "	mvfd	%a, %a\n" freg r2 freg r2; 2
-  | Pmufd(r1, r2, r3) ->
-      fprintf oc "	mufd	%a, %a, %a\n" freg r1 freg r2 freg r3; 1
-  | Pstfd(r1, r2, n) ->
-      fprintf oc "	stfd	%a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
-  | Pstfs(r1, r2, n) ->
-      fprintf oc "	mvfs	f3, %a\n" freg r1;
-      fprintf oc "	stfs	f3, [%a, #%a]\n" ireg r2 coqint n; 2
-  | Psufd(r1, r2, r3) ->
-      fprintf oc "	sufd	%a, %a, %a\n" freg r1 freg r2 freg r3; 1
+*)
+      let lbl = label_float f in
+      fprintf oc "	fldd	%a, .L%d @ %.12g\n" freg r1 lbl f; 1
+  | Pfcmpd(r1, r2) ->
+      fprintf oc "	fcmpd	%a, %a\n" freg r1 freg r2;
+      fprintf oc "	fmstat\n"; 2
+  | Pfsitod(r1, r2) ->
+      fprintf oc "	fmsr	%a, %a\n" freg_single r1 ireg r2;
+      fprintf oc "	fsitod	%a, %a\n" freg r1 freg_single r1; 2
+  | Pfuitod(r1, r2) ->
+      fprintf oc "	fmsr	%a, %a\n" freg_single r1 ireg r2;
+      fprintf oc "	fuitod	%a, %a\n" freg r1 freg_single r1; 2
+  | Pftosizd(r1, r2) ->
+      fprintf oc "	ftosizd	%a, %a\n" freg_single FR6 freg r2;
+      fprintf oc "	fmrs	%a, %a\n" ireg r1 freg_single FR6; 2
+  | Pftouizd(r1, r2) ->
+      fprintf oc "	ftouizd	%a, %a\n" freg_single FR6 freg r2;
+      fprintf oc "	fmrs	%a, %a\n" ireg r1 freg_single FR6; 2
+  | Pfcvtsd(r1, r2) ->
+      fprintf oc "	fcvtsd	%a, %a\n" freg_single r1 freg r2;
+      fprintf oc "	fcvtds	%a, %a\n" freg r1 freg_single r1; 2
+  | Pfldd(r1, r2, n) ->
+      fprintf oc "	fldd	%a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
+  | Pflds(r1, r2, n) ->
+      fprintf oc "	flds	%a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n;
+      fprintf oc "	fcvtds	%a, %a\n" freg r1 freg_single r1; 2
+  | Pfstd(r1, r2, n) ->
+      fprintf oc "	fstd	%a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
+  | Pfsts(r1, r2, n) ->
+      fprintf oc "	fcvtsd	%a, %a\n" freg_single FR6 freg r1;
+      fprintf oc "	fsts	%a, [%a, #%a]\n" freg_single FR6 ireg r2 coqint n; 2
   (* Pseudo-instructions *)
   | Pallocframe(sz, ofs) ->
       fprintf oc "	mov	r12, sp\n";
@@ -587,7 +634,7 @@ let rec print_instructions oc instrs =
       end;
       print_instructions oc il
 
-let print_function oc name code =
+let print_function oc name fn =
   Hashtbl.clear current_function_labels;
   reset_constants();
   currpos := 0;
@@ -597,57 +644,18 @@ let print_function oc name code =
   if not (C2C.atom_is_static name) then
     fprintf oc "	.global	%a\n" print_symb name;
   fprintf oc "%a:\n" print_symb name;
-  print_instructions oc code;
+  ignore (fixup_arguments oc Incoming fn.fn_sig);
+  print_instructions oc fn.fn_code;
   emit_constants oc;
   fprintf oc "	.type	%a, %%function\n" print_symb name;
   fprintf oc "	.size	%a, . - %a\n" print_symb name print_symb name
 
-(* Generation of stub code for external functions that take floats.
-   Compcert passes the first two float arguments in F0 and F1,
-   while gcc passes them in pairs of integer registers. *)
-
-let print_external_function oc name sg =
-  let name = extern_atom name in
-  let rec move_args ty_args int_regs float_regs =
-    match ty_args with
-    | [] -> ()
-    | Tint :: tys ->
-        begin match int_regs with
-        | [] -> move_args tys [] float_regs
-        | ir :: irs -> move_args tys irs float_regs
-        end
-    | Tfloat :: tys ->
-        begin match float_regs, int_regs with
-        | fr :: frs, ir1 :: ir2 :: irs ->
-            (* transfer fr to the pair (ir1, ir2) *)
-            fprintf oc "	stfd	%a, [sp, #-8]!\n" freg fr;
-            fprintf oc "	ldmfd	sp!, {%a, %a}\n" ireg ir1 ireg ir2;
-            move_args tys irs frs
-        | fr :: frs, ir1 :: [] ->
-            (* transfer fr to ir1 and the bottom stack word *)
-            fprintf oc "	stfd	%a, [sp, #-4]!\n" freg fr;
-            fprintf oc "	ldmfd	sp!, {%a}\n" ireg ir1;
-            move_args tys [] frs
-        | _, _ ->
-            (* no more float regs, or no more int regs:
-               float arg is already on stack, where it should be *)
-            move_args tys int_regs float_regs
-        end
-  in
-  section oc Section_text;
-  fprintf oc "	.align 2\n";
-  fprintf oc ".L%s$stub:\n" name;
-  move_args sg.sig_args [IR0;IR1;IR2;IR3] [FR0;FR1];
-  (* Remove variadic prefix $iiff if any *)
-  let real_name = strip_variadic_suffix name in
-  fprintf oc "	b	%s\n" real_name
-
 let print_fundef oc (Coq_pair(name, defn)) =
   match defn with
   | Internal code ->
       print_function oc name code
   | External ef ->
-      if need_stub ef then print_external_function oc name (ef_sig ef)
+      ()
 
 (* Data *)
 
@@ -659,13 +667,9 @@ let print_init oc = function
   | Init_int32 n ->
       fprintf oc "	.word	%ld\n" (camlint_of_coqint n)
   | Init_float32 n ->
-      fprintf oc "	.word	0x%lx @ %.15g \n" (Int32.bits_of_float n) n
+      fprintf oc "	.word	0x%lx %s %.15g \n" (Int32.bits_of_float n) comment n
   | Init_float64 n ->
-      (* Floats are mixed-endian on this flavor of ARM *)
-      let bf = Int64.bits_of_float n in
-      let bfhi = Int64.shift_right_logical bf 32
-      and bflo = Int64.logand bf 0xFFFF_FFFFL in
-      fprintf oc "	.word	0x%Lx, 0x%Lx @ %.15g\n" bfhi bflo n
+      fprintf oc "	.quad	%Ld %s %.18g\n" (Int64.bits_of_float n) comment n
   | Init_space n ->
       let n = camlint_of_z n in
       if n > 0l then fprintf oc "	.space	%ld\n" n
@@ -709,8 +713,7 @@ let print_var oc (Coq_pair(name, v)) =
       fprintf oc "	.size	%a, . - %a\n" print_symb name print_symb name
 
 let print_program oc p =
-  extfuns := IdentSet.empty;
-  List.iter record_extfun p.prog_funct;
+  fprintf oc "	.fpu	vfp\n";
   List.iter (print_var oc) p.prog_vars;
   List.iter (print_fundef oc) p.prog_funct
 
diff --git a/arm/SelectOp.v b/arm/SelectOp.v
index 44528c6..6580901 100644
--- a/arm/SelectOp.v
+++ b/arm/SelectOp.v
@@ -1022,28 +1022,14 @@ Definition negint (e: expr) := Eop (Orsubimm Int.zero) (e ::: Enil).
 Definition negf (e: expr) :=  Eop Onegf (e ::: Enil).
 Definition absf (e: expr) :=  Eop Oabsf (e ::: Enil).
 Definition intoffloat (e: expr) := Eop Ointoffloat (e ::: Enil).
+Definition intuoffloat (e: expr) := Eop Ointuoffloat (e ::: Enil).
 Definition floatofint (e: expr) := Eop Ofloatofint (e ::: Enil).
+Definition floatofintu (e: expr) := Eop Ofloatofintu (e ::: Enil).
 Definition addf (e1 e2: expr) :=  Eop Oaddf (e1 ::: e2 ::: Enil).
 Definition subf (e1 e2: expr) :=  Eop Osubf (e1 ::: e2 ::: Enil).
 Definition mulf (e1 e2: expr) :=  Eop Omulf (e1 ::: e2 ::: Enil).
 Definition divf (e1 e2: expr) :=  Eop Odivf (e1 ::: e2 ::: Enil).
 
-(** ** Conversions between unsigned ints and floats *)
-
-Definition intuoffloat (e: expr) :=
-  let f := Eop (Ofloatconst (Float.floatofintu Float.ox8000_0000)) Enil in
-  Elet e
-    (Econdition (CEcond (Ccompf Clt) (Eletvar O ::: f ::: Enil))
-      (intoffloat (Eletvar O))
-      (addimm Float.ox8000_0000 (intoffloat (subf (Eletvar O) f)))).
-
-Definition floatofintu (e: expr) :=
-  let f := Eop (Ofloatconst (Float.floatofintu Float.ox8000_0000)) Enil in
-  Elet e
-    (Econdition (CEcond (Ccompuimm Clt Float.ox8000_0000) (Eletvar O ::: Enil))
-      (floatofint (Eletvar O))
-      (addf (floatofint (addimm (Int.neg Float.ox8000_0000) (Eletvar O))) f)).
-
 (** ** Recognition of addressing modes for load and store operations *)
 
 (*
@@ -1090,13 +1076,30 @@ Definition addressing_match (e: expr) :=
 
 (** We do not recognize the [Aindexed2] and [Aindexed2shift] modes
     for floating-point accesses, since these are not supported
-    by the hardware and emulated inefficiently in [ARMgen]. *)
+    by the hardware and emulated inefficiently in [Asmgen].
+    Likewise, [Aindexed2shift] are not supported for halfword
+    and signed byte accesses. *)
+
+Definition can_use_Aindexed (chunk: memory_chunk): bool :=
+  match chunk with
+  | Mint8signed => true
+  | Mint8unsigned => true
+  | Mint16signed => true
+  | Mint16unsigned => true
+  | Mint32 => true
+  | Mfloat32 => false
+  | Mfloat64 => false
+  end.
 
-Definition is_float_addressing (chunk: memory_chunk): bool :=
+Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
   match chunk with
-  | Mfloat32 => true
-  | Mfloat64 => true
-  | _ => false
+  | Mint8signed => false
+  | Mint8unsigned => true
+  | Mint16signed => false
+  | Mint16unsigned => false
+  | Mint32 => true
+  | Mfloat32 => false
+  | Mfloat64 => false
   end.
 
 Definition addressing (chunk: memory_chunk) (e: expr) :=
@@ -1106,13 +1109,13 @@ Definition addressing (chunk: memory_chunk) (e: expr) :=
   | addressing_case3 n e1 =>
       (Aindexed n, e1:::Enil)
   | addressing_case4 s e1 e2 =>
-      if is_float_addressing chunk
-      then (Aindexed Int.zero, Eop (Oaddshift s) (e1:::e2:::Enil) ::: Enil)
-      else (Aindexed2shift s, e1:::e2:::Enil)
+      if can_use_Aindexed2 chunk
+      then (Aindexed2shift s, e1:::e2:::Enil)
+      else (Aindexed Int.zero, Eop (Oaddshift s) (e1:::e2:::Enil) ::: Enil)
   | addressing_case5 e1 e2 =>
-      if is_float_addressing chunk
-      then (Aindexed Int.zero, Eop Oadd (e1:::e2:::Enil) ::: Enil)
-      else (Aindexed2, e1:::e2:::Enil)
+      if can_use_Aindexed chunk
+      then (Aindexed2, e1:::e2:::Enil)
+      else (Aindexed Int.zero, Eop Oadd (e1:::e2:::Enil) ::: Enil)
   | addressing_default e =>
       (Aindexed Int.zero, e:::Enil)
   end.
diff --git a/arm/SelectOpproof.v b/arm/SelectOpproof.v
index 7602b11..9ecf1de 100644
--- a/arm/SelectOpproof.v
+++ b/arm/SelectOpproof.v
@@ -946,23 +946,8 @@ Theorem eval_intuoffloat:
   Float.intuoffloat x = Some n ->
   eval_expr ge sp e m le (intuoffloat a) (Vint n).
 Proof.
-  intros. unfold intuoffloat. 
-  econstructor. eauto. 
-  set (im := Int.repr Int.half_modulus).
-  set (fm := Float.floatofintu im).
-  assert (eval_expr ge sp e m (Vfloat x :: le) (Eletvar O) (Vfloat x)).
-    constructor. auto. 
-  apply eval_Econdition with (v1 := Float.cmp Clt x fm).
-  econstructor. constructor. eauto. constructor. EvalOp. simpl; eauto. constructor.
-  simpl. auto.
-  caseEq (Float.cmp Clt x fm); intros.
-  exploit Float.intuoffloat_intoffloat_1; eauto. intro EQ.
-  EvalOp. simpl. rewrite EQ; auto.
-  exploit Float.intuoffloat_intoffloat_2; eauto. intro EQ.
-  replace n with (Int.add (Int.sub n Float.ox8000_0000) Float.ox8000_0000).
-  apply eval_addimm. eapply eval_intoffloat; eauto.
-  apply eval_subf; auto. EvalOp.
-  rewrite Int.sub_add_opp. rewrite Int.add_assoc. apply Int.add_zero. 
+  intros; unfold intuoffloat; EvalOp.
+  simpl. rewrite H0. auto.
 Qed.
 
 Theorem eval_floatofint:
@@ -975,23 +960,7 @@ Theorem eval_floatofintu:
   forall le a x,
   eval_expr ge sp e m le a (Vint x) ->
   eval_expr ge sp e m le (floatofintu a) (Vfloat (Float.floatofintu x)).
-Proof.
-  intros. unfold floatofintu. 
-  econstructor. eauto. 
-  set (f := Float.floatofintu Float.ox8000_0000).
-  assert (eval_expr ge sp e m (Vint x :: le) (Eletvar O) (Vint x)).
-    constructor. auto. 
-  apply eval_Econdition with (v1 := Int.ltu x Float.ox8000_0000).
-  econstructor. constructor. eauto. constructor.
-  simpl. auto.
-  caseEq (Int.ltu x Float.ox8000_0000); intros.
-  rewrite Float.floatofintu_floatofint_1; auto.
-  apply eval_floatofint; auto.
-  rewrite Float.floatofintu_floatofint_2; auto.
-  fold f. apply eval_addf. apply eval_floatofint. 
-  rewrite Int.sub_add_opp. apply eval_addimm; auto.
-  EvalOp.
-Qed.
+Proof. intros; unfold floatofintu; EvalOp. Qed.
 
 Lemma eval_addressing:
   forall le chunk a v b ofs,
@@ -1007,25 +976,25 @@ Proof.
   exists (@nil val). split. eauto with evalexpr. simpl. auto.
   exists (Vptr b0 i :: nil). split. eauto with evalexpr. 
     simpl. congruence.
-  destruct (is_float_addressing chunk).
-  exists (Vptr b0 ofs :: nil).
-    split. constructor. econstructor. eauto with evalexpr. simpl. congruence. constructor. 
-    simpl. rewrite Int.add_zero. congruence.
+  destruct (can_use_Aindexed2 chunk).
   exists (Vptr b0 i :: Vint i0 :: nil).
     split. eauto with evalexpr. simpl. congruence.
-  destruct (is_float_addressing chunk).
   exists (Vptr b0 ofs :: nil).
     split. constructor. econstructor. eauto with evalexpr. simpl. congruence. constructor. 
     simpl. rewrite Int.add_zero. congruence.
+  destruct (can_use_Aindexed chunk).
   exists (Vint i :: Vptr b0 i0 :: nil).
     split. eauto with evalexpr. simpl. 
     rewrite Int.add_commut. congruence.
-  destruct (is_float_addressing chunk).
   exists (Vptr b0 ofs :: nil).
     split. constructor. econstructor. eauto with evalexpr. simpl. congruence. constructor. 
     simpl. rewrite Int.add_zero. congruence.
+  destruct (can_use_Aindexed chunk).
   exists (Vptr b0 i :: Vint i0 :: nil).
     split. eauto with evalexpr. simpl. congruence.
+  exists (Vptr b0 ofs :: nil).
+    split. constructor. econstructor. eauto with evalexpr. simpl. congruence. constructor. 
+    simpl. rewrite Int.add_zero. congruence.
   exists (v :: nil). split. eauto with evalexpr. 
     subst v. simpl. rewrite Int.add_zero. auto.
 Qed.
diff --git a/arm/linux/Conventions1.v b/arm/linux/Conventions1.v
index fdccf75..842ccbf 100644
--- a/arm/linux/Conventions1.v
+++ b/arm/linux/Conventions1.v
@@ -35,13 +35,13 @@ Definition int_caller_save_regs :=
   R0 :: R1 :: R2 :: R3 :: nil.
 
 Definition float_caller_save_regs :=
-  F0 :: F1 :: nil.
+  F0 :: F1 :: F2 :: F3 :: F4 :: F5 :: nil.
 
 Definition int_callee_save_regs :=
   R4 :: R5 :: R6 :: R7 :: R8 :: R9 :: R11 :: nil.
 
 Definition float_callee_save_regs :=
-  F4 :: F5 :: F6 :: F7 :: nil.
+  F8 :: F9 :: F10 :: F11 :: F12 :: F13 :: F14 :: F15 :: nil.
 
 Definition destroyed_at_call_regs :=
   int_caller_save_regs ++ float_caller_save_regs.
@@ -73,7 +73,8 @@ Definition index_int_callee_save (r: mreg) :=
 
 Definition index_float_callee_save (r: mreg) :=
   match r with
-  | F4 => 0  | F5 => 1  | F6 => 2  | F7 => 3
+  | F8 => 0  | F9 => 1  | F10 => 2  | F11 => 3
+  | F12 => 4  | F13 => 5  | F14 => 6  | F15 => 7
   | _ => -1
   end.
 
@@ -240,9 +241,13 @@ Qed.
   proved in this section (such as no overlapping between the locations
   of function arguments), but this leaves much liberty in choosing actual
   locations.  To ensure binary interoperability of code generated by our
-  compiler with libraries compiled by another PowerPC compiler, we
-  implement the standard conventions defined in the PowerPC application
-  binary interface. *)
+  compiler with libraries compiled by another ARM EABI compiler, we
+  implement *almost* the standard conventions defined in the ARM EABI application
+  binary interface, with two exceptions:
+- Double-precision arguments and results are passed in VFP double registers
+  instead of pairs of integer registers.
+- Single-precision arguments and results are passed as double-precision floats.
+*)
 
 (** ** Location of function result *)
 
@@ -284,92 +289,59 @@ Qed.
 
 (** ** Location of function arguments *)
 
-(** We use the following calling conventions, adapted from the ARM ABI:
+(** We use the following calling conventions, adapted from the ARM EABI:
 - The first 4 integer arguments are passed in registers [R0] to [R3].
 - The first 2 float arguments are passed in registers [F0] and [F1].
-- Each float argument passed in a float register ``consumes'' two
-  integer arguments.
+- Each float argument passed in a float register ``consumes'' an aligned pair
+  of two integer registers.
 - Extra arguments are passed on the stack, in [Outgoing] slots, consecutively
   assigned (1 word for an integer argument, 2 words for a float),
   starting at word offset 0.
-
-These conventions are somewhat baroque, but they are mandated by the ABI.
 *)
 
-Fixpoint loc_arguments_rec
-    (tyl: list typ) (iregl: list mreg) (fregl: list mreg)
-    (ofs: Z) {struct tyl} : list loc :=
+Function ireg_param (n: Z) : mreg :=
+  if zeq n (-4) then R0
+  else if zeq n (-3) then R1
+  else if zeq n (-2) then R2
+  else if zeq n (-1) then R3
+  else R4. (**r should not happen *)
+
+Function freg_param (n: Z) : mreg :=
+  if zeq n (-4) then F0
+  else if zeq n (-3) then F1
+  else if zeq n (-2) then F1
+  else F2.  (**r should not happen *)
+
+
+Fixpoint loc_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
   | Tint :: tys =>
-      match iregl with
-      | nil =>
-          S (Outgoing ofs Tint) :: loc_arguments_rec tys nil fregl (ofs + 1)
-      | ireg :: iregs =>
-          R ireg :: loc_arguments_rec tys iregs fregl ofs
-      end
+      (if zle 0 ofs then S (Outgoing ofs Tint) else R (ireg_param ofs))
+      :: loc_arguments_rec tys (ofs + 1)
   | Tfloat :: tys =>
-      match fregl with
-      | nil =>
-          S (Outgoing ofs Tfloat) ::
-          loc_arguments_rec tys iregl nil (ofs + 2)
-      | freg :: fregs =>
-          match iregl with
-          | nil =>
-              S (Outgoing ofs Tfloat) ::
-              loc_arguments_rec tys nil fregl (ofs + 2)
-          | ireg :: nil =>
-              R freg ::
-              loc_arguments_rec tys nil fregs (ofs + 1)              
-          | ireg1 :: ireg2 :: iregs =>
-              R freg ::
-              loc_arguments_rec tys iregs fregs ofs
-          end
-      end
+      (if zle (-1) ofs then S (Outgoing (align ofs 2) Tfloat) else R (freg_param ofs))
+      :: loc_arguments_rec tys (align ofs 2 + 2)
   end.
 
-Definition int_param_regs :=
-  R0 :: R1 :: R2 :: R3 :: nil.
-Definition float_param_regs :=
-  F0 :: F1 :: nil.
-
 (** [loc_arguments s] returns the list of locations where to store arguments
   when calling a function with signature [s].  *)
 
 Definition loc_arguments (s: signature) : list loc :=
-  loc_arguments_rec s.(sig_args) int_param_regs float_param_regs 0.
+  loc_arguments_rec s.(sig_args) (-4).
 
 (** [size_arguments s] returns the number of [Outgoing] slots used
   to call a function with signature [s]. *)
 
-Fixpoint size_arguments_rec
-    (tyl: list typ) (iregl: list mreg) (fregl: list mreg)
-    (ofs: Z) {struct tyl} : Z :=
+Fixpoint size_arguments_rec (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
-  | Tint :: tys =>
-      match iregl with
-      | nil => size_arguments_rec tys nil fregl (ofs + 1)
-      | ireg :: iregs => size_arguments_rec tys iregs fregl ofs
-      end
-  | Tfloat :: tys =>
-      match fregl with
-      | nil =>
-          size_arguments_rec tys iregl nil (ofs + 2)
-      | freg :: fregs =>
-          match iregl with
-          | nil =>
-              size_arguments_rec tys nil fregl (ofs + 2)
-          | ireg :: nil =>
-              size_arguments_rec tys nil fregs (ofs + 1)              
-          | ireg1 :: ireg2 :: iregs =>
-              size_arguments_rec tys iregs fregs ofs
-          end
-      end
+  | Tint :: tys => size_arguments_rec tys (ofs + 1)
+  | Tfloat :: tys => size_arguments_rec tys (align ofs 2 + 2)
   end.
 
 Definition size_arguments (s: signature) : Z :=
-  size_arguments_rec s.(sig_args) int_param_regs float_param_regs 0.
+  Zmax 0 (size_arguments_rec s.(sig_args) (-4)).
 
 (** Argument locations are either non-temporary registers or [Outgoing] 
   stack slots at nonnegative offsets. *)
@@ -381,172 +353,122 @@ Definition loc_argument_acceptable (l: loc) : Prop :=
   | _ => False
   end.
 
+Lemma align_monotone:
+  forall x1 x2 y, y > 0 -> x1 <= x2 -> align x1 y <= align x2 y.
+Proof.
+  intros. unfold align. apply Zmult_le_compat_r. apply Z_div_le. 
+  omega. omega. omega.
+Qed.
+
 Remark loc_arguments_rec_charact:
-  forall tyl iregl fregl ofs l,
-  In l (loc_arguments_rec tyl iregl fregl ofs) ->
+  forall tyl ofs l,
+  In l (loc_arguments_rec tyl ofs) ->
   match l with
-  | R r => In r iregl \/ In r fregl
-  | S (Outgoing ofs' ty) => ofs' >= ofs
+  | R r => 
+       (exists n, ofs <= n < 0 /\ r = ireg_param n)
+    \/ (exists n, ofs <= n < -1 /\ r = freg_param n)
+  | S (Outgoing ofs' ty) => ofs' >= 0 /\ ofs' >= ofs
   | S _ => False
   end.
 Proof.
   induction tyl; simpl loc_arguments_rec; intros.
   elim H.
-  destruct a. 
-  destruct iregl; elim H; intro. 
-  subst l. omega.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega.
-  subst l. auto with coqlib.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto. simpl; intuition.
-  destruct fregl. 
-  elim H; intro.
-  subst l. omega.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega.
-  destruct iregl.
-  elim H; intro.
-  subst l. omega.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto. destruct s; auto. omega.
-  destruct iregl.
-  elim H; intro.
-  subst l. auto with coqlib.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto.
-  intros [A|B]. elim A. auto with coqlib.
-  destruct s; auto. omega.
-  elim H; intro.
-  subst l. auto with coqlib.
-  generalize (IHtyl _ _ _ _ H0). destruct l; auto.
-  intros [A|B]; auto with coqlib.
+  destruct a; elim H; intro.
+  subst l. destruct (zle 0 ofs). omega.
+  left. exists ofs; split; auto; omega.
+  generalize (IHtyl _ _ H0). 
+  destruct l. intros [[n A] | [n A]]; [left|right]; exists n; intuition omega.
+  destruct s; auto; omega.
+  subst l. destruct (zle (-1) ofs).
+  split. apply Zle_ge. change 0 with (align (-1) 2). apply align_monotone; omega.
+  apply Zle_ge. apply align_le. omega.
+  right. exists ofs. intuition.
+  assert (ofs <= align ofs 2) by (apply align_le; omega).
+  generalize (IHtyl _ _ H0). 
+  destruct l. intros [[n A] | [n A]]; [left|right]; exists n; intuition omega.
+  destruct s; auto; omega.
 Qed.
 
-Lemma loc_arguments_acceptable:
-  forall (s: signature) (r: loc),
-  In r (loc_arguments s) -> loc_argument_acceptable r.
-Proof.
-  unfold loc_arguments; intros.
-  generalize (loc_arguments_rec_charact _ _ _ _ _ H).
-  destruct r.
-  intro H0; elim H0. simpl. unfold not. ElimOrEq; NotOrEq.
-  simpl. unfold not. ElimOrEq; NotOrEq.
-  destruct s0; try contradiction.
-  simpl. omega. 
-Qed. 
-Hint Resolve loc_arguments_acceptable: locs.
-
-(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *)
-
-Remark loc_arguments_rec_notin_reg:
-  forall tyl iregl fregl ofs r,
-  ~(In r iregl) -> ~(In r fregl) ->
-  Loc.notin (R r) (loc_arguments_rec tyl iregl fregl ofs).
+Lemma loc_notin_in_diff:
+  forall l ll,
+  Loc.notin l ll <-> (forall l', In l' ll -> Loc.diff l l').
 Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a. 
-  destruct iregl; simpl. auto.
-  simpl in H. split. apply sym_not_equal. tauto.
-  apply IHtyl. tauto. tauto.
-  destruct fregl; simpl. auto. simpl in H0. 
-  destruct iregl; simpl. auto.
-  destruct iregl; simpl. 
-  split. apply sym_not_equal. tauto. apply IHtyl. hnf. tauto. tauto. 
-  split. apply sym_not_equal. tauto. apply IHtyl.  
-  red; intro. apply H. auto with coqlib. tauto.
+  induction ll; simpl; intuition. subst l'. auto. 
 Qed.
-
+  
 Remark loc_arguments_rec_notin_local:
-  forall tyl iregl fregl ofs ofs0 ty0,
-  Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs).
+  forall tyl ofs ofs0 ty0,
+  Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl ofs).
 Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a.
-  destruct iregl; simpl; auto.
-  destruct fregl; simpl; auto.
-  destruct iregl; simpl; auto.
-  destruct iregl; simpl; auto.
+  intros. rewrite loc_notin_in_diff. intros. 
+  exploit loc_arguments_rec_charact; eauto. 
+  destruct l'; intros; simpl; auto. destruct s; auto; contradiction.
 Qed.
 
-Remark loc_arguments_rec_notin_outgoing:
-  forall tyl iregl fregl ofs ofs0 ty0,
-  ofs0 + typesize ty0 <= ofs ->
-  Loc.notin (S (Outgoing ofs0 ty0)) (loc_arguments_rec tyl iregl fregl ofs).
+Lemma loc_arguments_acceptable:
+  forall (s: signature) (r: loc),
+  In r (loc_arguments s) -> loc_argument_acceptable r.
 Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a.
-  destruct iregl; simpl. 
-  split. omega. eapply IHtyl. omega.
-  auto.
-  destruct fregl; simpl. 
-  split. omega. eapply IHtyl. omega.
-  destruct iregl; simpl.
-  split. omega. eapply IHtyl. omega.
-  destruct iregl; simpl.
-  split; auto. eapply IHtyl. omega.
-  split; auto.
+  unfold loc_arguments; intros.
+  generalize (loc_arguments_rec_charact _ _ _ H).
+  destruct r; simpl.
+  intros [[n [A B]] | [n [A B]]]; subst m.
+  functional induction (ireg_param n); intuition congruence.
+  functional induction (freg_param n); intuition congruence.
+  destruct s0; tauto.
 Qed.
+Hint Resolve loc_arguments_acceptable: locs.
+
+(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *)
 
 Lemma loc_arguments_norepet:
   forall (s: signature), Loc.norepet (loc_arguments s).
 Proof.
-  assert (forall tyl iregl fregl ofs,
-    list_norepet iregl ->
-    list_norepet fregl ->
-    list_disjoint iregl fregl ->
-    Loc.norepet (loc_arguments_rec tyl iregl fregl ofs)).
-  induction tyl; simpl; intros.
-  constructor.
-  destruct a. 
-  destruct iregl; constructor.
-  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
-  apply loc_arguments_rec_notin_reg. inversion H. auto.
-  apply list_disjoint_notin with (m :: iregl); auto with coqlib.
-  apply IHtyl. inv H; auto. auto.
-  eapply list_disjoint_cons_left; eauto.
-
-  destruct fregl. constructor.
-  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
-  destruct iregl. constructor.
-  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
-  destruct iregl; constructor.
-  apply loc_arguments_rec_notin_reg.
-  red; intro. apply (H1 m m). auto with coqlib. auto with coqlib. auto. 
-  inv H0; auto.
-  apply IHtyl. constructor. inv H0; auto. 
-  red; intros. elim H2.
-  apply loc_arguments_rec_notin_reg.
-  red; intros. elim (H1 m m); auto with coqlib.
-  inv H0; auto.
-  apply IHtyl. inv H. inv H5. auto. inv H0; auto. 
-  red; intros. apply H1; auto with coqlib.
-
-  intro. unfold loc_arguments. apply H.
-  unfold int_param_regs. NoRepet.
-  unfold float_param_regs. NoRepet.
-  red; intros x y; simpl. ElimOrEq; ElimOrEq; discriminate.
+  unfold loc_arguments; intros. 
+  assert (forall tyl ofs, -4 <= ofs -> Loc.norepet (loc_arguments_rec tyl ofs)).
+    induction tyl; simpl; intros.
+    constructor.
+    destruct a; constructor.
+    rewrite loc_notin_in_diff. intros. exploit loc_arguments_rec_charact; eauto. 
+    destruct (zle 0 ofs); destruct l'; simpl; auto.
+    destruct s0; intuition.
+    intros [[n [A B]] | [n [A B]]]; subst m.
+    functional induction (ireg_param ofs); functional induction (ireg_param n); congruence || omegaContradiction.
+    functional induction (ireg_param ofs); functional induction (freg_param n); congruence || omegaContradiction.
+    apply IHtyl. omega.
+    rewrite loc_notin_in_diff. intros. exploit loc_arguments_rec_charact; eauto. 
+    destruct (zle (-1) ofs); destruct l'; simpl; auto.
+    destruct s0; intuition.
+    intros [[n [A B]] | [n [A B]]]; subst m.
+    functional induction (freg_param ofs); functional induction (ireg_param n); congruence || omegaContradiction.
+    functional induction (freg_param ofs); functional induction (freg_param n); try (congruence || omegaContradiction).
+    compute in A. intuition.
+    compute in A. intuition.
+    compute in A. intuition.
+    compute in A. intuition.
+    compute in A. intuition.
+    apply IHtyl. assert (ofs <= align ofs 2) by (apply align_le; omega). omega.
+  apply H. omega.
 Qed.
 
 (** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
 
 Remark size_arguments_rec_above:
-  forall tyl iregl fregl ofs0,
-  ofs0 <= size_arguments_rec tyl iregl fregl ofs0.
+  forall tyl ofs,
+  ofs <= size_arguments_rec tyl ofs.
 Proof.
   induction tyl; simpl; intros.
   omega.
   destruct a.
-  destruct iregl. apply Zle_trans with (ofs0 + 1); auto; omega. auto.
-  destruct fregl. apply Zle_trans with (ofs0 + 2); auto; omega.
-  destruct iregl. apply Zle_trans with (ofs0 + 2); auto; omega.
-  destruct iregl. apply Zle_trans with (ofs0 + 1); auto; omega.
-  auto.
+  apply Zle_trans with (ofs + 1); auto; omega.
+  assert (ofs <= align ofs 2) by (apply align_le; omega).
+  apply Zle_trans with (align ofs 2 + 2); auto; omega.
 Qed.
 
 Lemma size_arguments_above:
   forall s, size_arguments s >= 0.
 Proof.
-  intros; unfold size_arguments. apply Zle_ge. 
-  apply size_arguments_rec_above.
+  intros; unfold size_arguments. apply Zle_ge. apply Zmax1.
 Qed.
 
 Lemma loc_arguments_bounded:
@@ -555,23 +477,24 @@ Lemma loc_arguments_bounded:
   ofs + typesize ty <= size_arguments s.
 Proof.
   intros.
-  assert (forall tyl iregl fregl ofs0,
-    In (S (Outgoing ofs ty)) (loc_arguments_rec tyl iregl fregl ofs0) ->
-    ofs + typesize ty <= size_arguments_rec tyl iregl fregl ofs0).
+  assert (forall tyl ofs0,
+          0 <= ofs0 ->
+          ofs0 <= Zmax 0 (size_arguments_rec tyl ofs0)).
+    intros. generalize (size_arguments_rec_above tyl ofs0). intros.
+    rewrite Zmax_spec. rewrite zlt_false. auto. omega. 
+  assert (forall tyl ofs0,
+    In (S (Outgoing ofs ty)) (loc_arguments_rec tyl ofs0) ->
+    ofs + typesize ty <= Zmax 0 (size_arguments_rec tyl ofs0)).
   induction tyl; simpl; intros.
-  elim H0.
-  destruct a. destruct iregl; elim H0; intro.
-  inv H1. simpl. apply size_arguments_rec_above. auto.
-  discriminate. auto. 
-  destruct fregl. elim H0; intro.
-  inv H1. simpl. apply size_arguments_rec_above. auto.
-  destruct iregl. elim H0; intro.
-  inv H1. simpl. apply size_arguments_rec_above. auto.
-  destruct iregl.
-  elim H0; intro. inv H1. auto.
-  elim H0; intro. inv H1. auto.
+  elim H1.
+  destruct a; elim H1; intros.
+  destruct (zle 0 ofs0); inv H2. apply H0. omega. auto.
+  destruct (zle (-1) ofs0); inv H2. apply H0. 
+  assert (align (-1) 2 <= align ofs0 2). apply align_monotone. omega. auto.
+  change (align (-1) 2) with 0 in H2. omega. 
+  auto.
  
-  unfold size_arguments. eapply H0. unfold loc_arguments in H. eauto.
+  unfold size_arguments. apply H1. auto.  
 Qed.
 
 (** Temporary registers do not overlap with argument locations. *)
@@ -579,12 +502,10 @@ Qed.
 Lemma loc_arguments_not_temporaries:
   forall sig, Loc.disjoint (loc_arguments sig) temporaries.
 Proof.
-  intros; red; intros x1 x2 H.
-  generalize (loc_arguments_rec_charact _ _ _ _ _ H).
-  destruct x1. 
-  intro H0; elim H0; simpl; (ElimOrEq; ElimOrEq; congruence).
-  destruct s; try contradiction. intro.
-  simpl; ElimOrEq; auto.
+  intros; red; intros x1 x2 A B.
+  exploit loc_arguments_acceptable; eauto. unfold loc_argument_acceptable. 
+  destruct x1; intros. simpl. destruct x2; auto. intuition congruence.
+  destruct s; try contradiction. revert B. simpl. ElimOrEq; auto.
 Qed.
 Hint Resolve loc_arguments_not_temporaries: locs.
 
@@ -595,9 +516,9 @@ Lemma arguments_caller_save:
   In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call.
 Proof.
   unfold loc_arguments; intros.
-  elim (loc_arguments_rec_charact _ _ _ _ _ H); simpl.
-  ElimOrEq; intuition.
-  ElimOrEq; intuition.
+  destruct (loc_arguments_rec_charact _ _ _ H) as [[n [A B]] | [n [A B]]]; subst r.
+  functional induction (ireg_param n); simpl; auto. omegaContradiction.
+  functional induction (freg_param n); simpl; auto 10.
 Qed.
 
 (** Argument locations agree in number with the function signature. *)
@@ -606,15 +527,10 @@ Lemma loc_arguments_length:
   forall sig,
   List.length (loc_arguments sig) = List.length sig.(sig_args).
 Proof.
-  assert (forall tyl iregl fregl ofs,
-    List.length (loc_arguments_rec tyl iregl fregl ofs) = List.length tyl).
+  assert (forall tyl ofs, List.length (loc_arguments_rec tyl ofs) = List.length tyl).
   induction tyl; simpl; intros.
   auto.
-  destruct a. 
-  destruct iregl; simpl; decEq; auto.
-  destruct fregl; simpl; decEq; auto.
-  destruct iregl; simpl. decEq; auto.
-  destruct iregl; simpl; decEq; auto.
+  destruct a; simpl; decEq; auto.
 
   intros. unfold loc_arguments. auto.
 Qed.
@@ -624,23 +540,12 @@ Qed.
 Lemma loc_arguments_type:
   forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args).
 Proof.
-  assert (forall tyl iregl fregl ofs,
-    (forall r, In r iregl -> mreg_type r = Tint) ->
-    (forall r, In r fregl -> mreg_type r = Tfloat) ->
-    List.map Loc.type (loc_arguments_rec tyl iregl fregl ofs) = tyl).
+  assert (forall tyl ofs, List.map Loc.type (loc_arguments_rec tyl ofs) = tyl).
   induction tyl; simpl; intros.
   auto.
-  destruct a. 
-  destruct iregl; simpl; f_equal; eauto with coqlib. 
-  destruct fregl; simpl.
-  f_equal; eauto with coqlib.
-  destruct iregl; simpl.
-  f_equal; eauto with coqlib.
-  destruct iregl; simpl; f_equal; eauto with coqlib.
-  apply IHtyl. simpl; tauto. auto with coqlib. 
-  apply IHtyl. auto with coqlib. auto with coqlib.
+  destruct a; simpl; decEq; auto.
+  destruct (zle 0 ofs). auto. functional induction (ireg_param ofs); auto.
+  destruct (zle (-1) ofs). auto. functional induction (freg_param ofs); auto.
 
   intros. unfold loc_arguments. apply H. 
-  intro; simpl. ElimOrEq; reflexivity.
-  intro; simpl. ElimOrEq; reflexivity.
 Qed.
diff --git a/configure b/configure
index 5744c54..da2e59f 100755
--- a/configure
+++ b/configure
@@ -32,7 +32,7 @@ usage='Usage: ./configure [options] target
 Supported targets:
   ppc-macosx       (PowerPC, MacOS X)
   ppc-linux        (PowerPC, Linux)
-  arm-linux        (ARM, Linux)
+  arm-linux        (ARM, Linux EABI)
   ia32-linux       (x86 32 bits, Linux)
   ia32-bsd         (x86 32 bits, BSD)
   ia32-macosx      (x86 32 bits, MacOS X)
@@ -91,7 +91,7 @@ case "$target" in
     variant="linux"
     system="linux"
     cc="gcc"
-    cprepro="gcc -U__GNUC__ -E"
+    cprepro="gcc -U__GNUC__ '-D__REDIRECT(name,proto,alias)=name proto' '-D__REDIRECT_NTH(name,proto,alias)=name proto' -E"
     casm="gcc -c"
     clinker="gcc"
     libmath="-lm"
author	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2011-07-30 09:54:35 +0000
committer	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2011-07-30 09:54:35 +0000
commit	1fe68ad575178f7d8a775906947d2fed94d40976 (patch)
tree	3bb4b2d1b101f66bcb6f84bd36ce8e334082f7ea
parent	9b45e1d24a337e3f0047bf5056315169d4203b49 (diff)