From 4297fcb821c3188449b64184af73e41491a6118f Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Mon, 23 Jul 2012 15:01:54 +0000
Subject: - Revised non-overflow constraints on memory injections so that  
 injections compose (Values, Memdata, Memory) - Memory chunks: Mfloat64 now
 has alignment 8; introduced Mfloat64al32   that works like old Mfloat64 (i.e.
 has alignment 4); simplified   handling of memcpy builtin accordingly.

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@1983 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 arm/Asm.v                   |   6 +-
 arm/Asmgen.v                |   4 +-
 arm/Asmgenproof.v           |   2 +
 arm/Asmgenproof1.v          |   4 +-
 arm/Op.v                    |  11 --
 arm/PrintAsm.ml             |   4 +-
 arm/SelectOp.vp             |   2 +
 arm/linux/Stacklayout.v     |  10 +-
 backend/CMtypecheck.ml      |   2 +
 backend/Inliningproof.v     |   5 +
 backend/Mach.v              |   2 +-
 backend/PrintCminor.ml      |   1 +
 backend/Stackingproof.v     |   6 +-
 cfrontend/Cexec.v           |   7 +-
 cfrontend/Cminorgen.v       |   3 +-
 cfrontend/Cminorgenproof.v  |   7 +-
 cfrontend/Csharpminor.v     |   4 +-
 cfrontend/Cshmgen.v         |   2 +-
 cfrontend/Cshmgenproof.v    |  14 +--
 checklink/Check.ml          |   4 +-
 common/AST.v                |   4 +-
 common/Events.v             |   2 +-
 common/Memdata.v            |  44 ++++++--
 common/Memory.v             | 270 ++++++++++++++++++++++++++++----------------
 common/Memtype.v            |   1 +
 common/PrintAST.ml          |   1 +
 common/Values.v             |  25 +++-
 driver/Interp.ml            |   2 +-
 ia32/Asm.v                  |  10 +-
 ia32/Asmgen.v               |   4 +-
 ia32/Asmgenproof1.v         |  10 +-
 ia32/PrintAsm.ml            |   4 +-
 ia32/standard/Stacklayout.v |   5 +-
 powerpc/Asm.v               |  10 +-
 powerpc/Asmgen.v            |   4 +-
 powerpc/Asmgenproof.v       |   9 +-
 powerpc/PrintAsm.ml         |   4 +-
 powerpc/eabi/Stacklayout.v  |   5 +-
 38 files changed, 331 insertions(+), 183 deletions(-)

diff --git a/arm/Asm.v b/arm/Asm.v
index 58b2634..1e4bfa0 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -515,11 +515,11 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pfcvtsd r1 r2 =>
       OK (nextinstr (rs#r1 <- (Val.singleoffloat rs#r2))) m
   | Pfldd r1 r2 n =>
-      exec_load Mfloat64 (Val.add rs#r2 (Vint n)) r1 rs m
+      exec_load Mfloat64al32 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pflds r1 r2 n =>
       exec_load Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pfstd r1 r2 n =>      
-      exec_store Mfloat64 (Val.add rs#r2 (Vint n)) r1 rs m
+      exec_store Mfloat64al32 (Val.add rs#r2 (Vint n)) r1 rs m
   | Pfsts r1 r2 n =>
       match exec_store Mfloat32 (Val.add rs#r2 (Vint n)) r1 rs m with
       | OK rs' m' => OK (rs'#FR7 <- Vundef) m'
@@ -616,7 +616,7 @@ Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
       extcall_arg rs m (S (Outgoing ofs Tint)) v
   | extcall_arg_float_stack: forall ofs bofs v,
       bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mfloat64 m (Val.add (rs (IR IR13)) (Vint (Int.repr bofs))) = Some v ->
+      Mem.loadv Mfloat64al32 m (Val.add (rs (IR IR13)) (Vint (Int.repr bofs))) = Some v ->
       extcall_arg rs m (S (Outgoing ofs Tfloat)) v.
 
 Definition extcall_arguments
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index 7cf25f5..4200c11 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -469,7 +469,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) (k: code) :=
           transl_load_store_int Pldr is_immed_mem_word dst addr args k
       | Mfloat32 =>
           transl_load_store_float Pflds is_immed_mem_float dst addr args k
-      | Mfloat64 =>
+      | Mfloat64 | Mfloat64al32 =>
           transl_load_store_float Pfldd is_immed_mem_float dst addr args k
       end
   | Mstore chunk addr args src =>
@@ -486,7 +486,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) (k: code) :=
           transl_load_store_int Pstr is_immed_mem_word src addr args k
       | Mfloat32 =>
           transl_load_store_float Pfsts is_immed_mem_float src addr args k
-      | Mfloat64 =>
+      | Mfloat64 | Mfloat64al32 =>
           transl_load_store_float Pfstd is_immed_mem_float src addr args k
       end
   | Mcall sig (inl r) =>
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index a888aae..8ee9c2e 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -816,6 +816,7 @@ Proof.
   left; eapply exec_straight_steps; eauto with coqlib.
   exists m'; split; auto.
   destruct chunk; simpl; simpl in H6;
+  try (generalize (Mem.loadv_float64al32 _ _ _ H0); intros);
   (eapply transl_load_int_correct || eapply transl_load_float_correct);
   eauto; intros; reflexivity.
 Qed.
@@ -843,6 +844,7 @@ Proof.
   destruct chunk; simpl; simpl in H6;
   try (rewrite storev_8_signed_unsigned in H0);
   try (rewrite storev_16_signed_unsigned in H0);
+  try (generalize (Mem.storev_float64al32 _ _ _ _ H0); intros);
   simpl;
   (eapply transl_store_int_correct || eapply transl_store_float_correct);
   eauto; intros; simpl; auto.
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index 29197e9..e7d2509 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -802,7 +802,7 @@ Qed.
 
 Lemma loadind_float_correct:
   forall (base: ireg) ofs dst (rs: regset) m v k,
-  Mem.loadv Mfloat64 m (Val.add rs#base (Vint ofs)) = Some v ->
+  Mem.loadv Mfloat64al32 m (Val.add rs#base (Vint ofs)) = Some v ->
   exists rs',
      exec_straight (loadind_float base ofs dst k) rs m k rs' m
   /\ rs'#dst = v
@@ -868,7 +868,7 @@ Qed.
 
 Lemma storeind_float_correct:
   forall (base: ireg) ofs (src: freg) (rs: regset) m m' k,
-  Mem.storev Mfloat64 m (Val.add rs#base (Vint ofs)) (rs#src) = Some m' ->
+  Mem.storev Mfloat64al32 m (Val.add rs#base (Vint ofs)) (rs#src) = Some m' ->
   base <> IR14 ->
   exists rs',
      exec_straight (storeind_float src base ofs k) rs m k rs' m'
diff --git a/arm/Op.v b/arm/Op.v
index fa05288..cfe8b83 100644
--- a/arm/Op.v
+++ b/arm/Op.v
@@ -342,17 +342,6 @@ Definition type_of_addressing (addr: addressing) : list typ :=
   | Ainstack _ => nil
   end.
 
-Definition type_of_chunk (c: memory_chunk) : typ :=
-  match c with
-  | Mint8signed => Tint
-  | Mint8unsigned => Tint
-  | Mint16signed => Tint
-  | Mint16unsigned => Tint
-  | Mint32 => Tint
-  | Mfloat32 => Tfloat
-  | Mfloat64 => Tfloat
-  end.
-
 (** Weak type soundness results for [eval_operation]:
   the result values, when defined, are always of the type predicted
   by [type_of_operation]. *)
diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml
index 6d6296b..ec1e836 100644
--- a/arm/PrintAsm.ml
+++ b/arm/PrintAsm.ml
@@ -343,7 +343,7 @@ let print_builtin_vload oc chunk args res =
     | Mfloat32, [IR addr], FR res ->
         fprintf oc "	flds	%a, [%a, #0]\n" freg_single res ireg addr;
         fprintf oc "	fcvtds	%a, %a\n" freg res freg_single res; 2
-    | Mfloat64, [IR addr], FR res ->
+    | (Mfloat64 | Mfloat64al32), [IR addr], FR res ->
         fprintf oc "	fldd	%a, [%a, #0]\n" freg res ireg addr; 1
     | _ ->
         assert false
@@ -363,7 +363,7 @@ let print_builtin_vstore oc chunk args =
     | Mfloat32, [IR addr; FR src] ->
         fprintf oc "	fcvtsd	%a, %a\n" freg_single FR6 freg src;
         fprintf oc "	fsts	%a, [%a, #0]\n" freg_single FR6 ireg addr; 2
-    | Mfloat64, [IR addr; FR src] ->
+    | (Mfloat64 | Mfloat64al32), [IR addr; FR src] ->
         fprintf oc "	fstd	%a, [%a, #0]\n" freg src ireg addr; 1
     | _ ->
         assert false
diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp
index 6049017..9296ce6 100644
--- a/arm/SelectOp.vp
+++ b/arm/SelectOp.vp
@@ -426,6 +426,7 @@ Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
   | Mint32 => true
   | Mfloat32 => false
   | Mfloat64 => false
+  | Mfloat64al32 => false
   end.
 
 Definition can_use_Aindexed2shift (chunk: memory_chunk): bool :=
@@ -437,6 +438,7 @@ Definition can_use_Aindexed2shift (chunk: memory_chunk): bool :=
   | Mint32 => true
   | Mfloat32 => false
   | Mfloat64 => false
+  | Mfloat64al32 => false
   end.
 
 Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
diff --git a/arm/linux/Stacklayout.v b/arm/linux/Stacklayout.v
index 4521114..d84da6b 100644
--- a/arm/linux/Stacklayout.v
+++ b/arm/linux/Stacklayout.v
@@ -109,7 +109,7 @@ Remark frame_env_aligned:
   /\ (8 | fe.(fe_ofs_float_local))
   /\ (8 | fe.(fe_ofs_float_callee_save))
   /\ (4 | fe.(fe_ofs_retaddr))
-  /\ (4 | fe.(fe_stack_data))
+  /\ (8 | fe.(fe_stack_data))
   /\ (8 | fe.(fe_size)).
 Proof.
   intros.
@@ -128,13 +128,15 @@ Proof.
   set (x5 := x4 + 8 * bound_float_local b).
   assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring.
   set (x6 := x5 + 8 * bound_float_callee_save b).
-  assert (4 | x6).
-    apply Zdivides_trans with 8. exists 2; auto.
+  assert (8 | x6).
     unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
+  assert (4 | x6).
+    apply Zdivides_trans with 8. exists 2; auto. auto.
   set (x7 := x6 + 4).
   assert (4 | x7). unfold x7; apply Zdivide_plus_r; auto. exists 1; auto.
   set (x8 := x7 + 4).
-  assert (4 | x8). unfold x8; apply Zdivide_plus_r; auto. exists 1; auto.
+  assert (8 | x8). unfold x8, x7. replace (x6 + 4 + 4) with (x6 + 8) by omega.
+    apply Zdivide_plus_r; auto. exists 1; auto.
   set (x9 := align (x8 + bound_stack_data b) 8).
   assert (8 | x9). unfold x9; apply align_divides. omega.
   tauto.
diff --git a/backend/CMtypecheck.ml b/backend/CMtypecheck.ml
index 8db488a..6e91f5b 100644
--- a/backend/CMtypecheck.ml
+++ b/backend/CMtypecheck.ml
@@ -176,6 +176,7 @@ let type_chunk = function
   | Mint32 -> tint
   | Mfloat32 -> tfloat
   | Mfloat64 -> tfloat
+  | Mfloat64al32 -> tfloat
 
 let name_of_chunk = function
   | Mint8signed -> "int8signed"
@@ -185,6 +186,7 @@ let name_of_chunk = function
   | Mint32 -> "int32"
   | Mfloat32 -> "float32"
   | Mfloat64 -> "float64"
+  | Mfloat64al32 -> "float64al32"
 
 let rec type_expr env lenv e =
   match e with
diff --git a/backend/Inliningproof.v b/backend/Inliningproof.v
index f88ca81..9ca351a 100644
--- a/backend/Inliningproof.v
+++ b/backend/Inliningproof.v
@@ -671,6 +671,11 @@ Proof.
     destruct (zle sz 2). omegaContradiction.
     destruct (zle sz 4). auto.
     apply Zdivides_trans with 8; auto. exists 2; auto.
+  assert (8 <= sz -> (8 | n)). intros.
+    destruct (zle sz 1). omegaContradiction.
+    destruct (zle sz 2). omegaContradiction.
+    destruct (zle sz 4). omegaContradiction.
+    auto.
   destruct chunk; simpl in *; auto.
   apply Zone_divide.
   apply Zone_divide.
diff --git a/backend/Mach.v b/backend/Mach.v
index 5c9cff5..669d35e 100644
--- a/backend/Mach.v
+++ b/backend/Mach.v
@@ -97,7 +97,7 @@ Definition genv := Genv.t fundef unit.
 (** The operational semantics is in module [Machsem]. *)
 
 Definition chunk_of_type (ty: typ) :=
-  match ty with Tint => Mint32 | Tfloat => Mfloat64 end.
+  match ty with Tint => Mint32 | Tfloat => Mfloat64al32 end.
 
 Module RegEq.
   Definition t := mreg.
diff --git a/backend/PrintCminor.ml b/backend/PrintCminor.ml
index cdbd870..66b579e 100644
--- a/backend/PrintCminor.ml
+++ b/backend/PrintCminor.ml
@@ -107,6 +107,7 @@ let name_of_chunk = function
   | Mint32 -> "int32"
   | Mfloat32 -> "float32"
   | Mfloat64 -> "float64"
+  | Mfloat64al32 -> "float64al32"
 
 (* Expressions *)
 
diff --git a/backend/Stackingproof.v b/backend/Stackingproof.v
index be5e4b9..1cfb738 100644
--- a/backend/Stackingproof.v
+++ b/backend/Stackingproof.v
@@ -232,7 +232,7 @@ Proof.
 Qed.
 
 Lemma fe_stack_data_aligned:
-  (4 | fe_stack_data fe).
+  (8 | fe_stack_data fe).
 Proof.
   intros.
   generalize (frame_env_aligned b). intuition. fold fe in H. intuition.
@@ -371,7 +371,7 @@ Lemma gss_index_contains_base:
   /\ decode_encode_val v (chunk_of_type (type_of_index idx)) (chunk_of_type (type_of_index idx)) v'.
 Proof.
   intros. 
-  exploit Mem.load_store_similar. eauto. reflexivity. 
+  exploit Mem.load_store_similar. eauto. reflexivity. omega. 
   intros [v' [A B]].
   exists v'; split; auto. constructor; auto.
 Qed.
@@ -1408,7 +1408,7 @@ Proof.
     intros. apply Mem.perm_implies with Freeable; auto with mem. 
     eapply Mem.perm_alloc_2; eauto.
     generalize stack_data_offset_valid bound_stack_data_stacksize; omega.
-    red. intros. apply Zdivides_trans with 4. 
+    red. intros. apply Zdivides_trans with 8. 
     destruct chunk; simpl; auto with align_4.
     apply fe_stack_data_aligned.
     intros.
diff --git a/cfrontend/Cexec.v b/cfrontend/Cexec.v
index f589fab..5427ac6 100644
--- a/cfrontend/Cexec.v
+++ b/cfrontend/Cexec.v
@@ -435,7 +435,7 @@ Definition do_ef_free
 
 Definition memcpy_args_ok
   (sz al: Z) (bdst: block) (odst: Z) (bsrc: block) (osrc: Z) : Prop :=
-      (al = 1 \/ al = 2 \/ al = 4)
+      (al = 1 \/ al = 2 \/ al = 4 \/ al = 8)
    /\ sz > 0
    /\ (al | sz) /\ (al | osrc) /\ (al | odst)
    /\ (bsrc <> bdst \/ osrc = odst \/ osrc + sz <= odst \/ odst + sz <= osrc).
@@ -445,8 +445,9 @@ Remark memcpy_check_args:
   {memcpy_args_ok sz al bdst odst bsrc osrc} + {~memcpy_args_ok sz al bdst odst bsrc osrc}.
 Proof with try (right; intuition omega).
   intros. 
-  assert (X: {al = 1 \/ al = 2 \/ al = 4} + {~(al = 1 \/ al = 2 \/ al = 4)}).
-    destruct (zeq al 1); auto. destruct (zeq al 2); auto. destruct (zeq al 4); auto...
+  assert (X: {al = 1 \/ al = 2 \/ al = 4 \/ al = 8} + {~(al = 1 \/ al = 2 \/ al = 4 \/ al = 8)}).
+    destruct (zeq al 1); auto. destruct (zeq al 2); auto.
+    destruct (zeq al 4); auto. destruct (zeq al 8); auto...
   unfold memcpy_args_ok. destruct X...
   assert (al > 0) by (intuition omega).
   destruct (zlt 0 sz)...
diff --git a/cfrontend/Cminorgen.v b/cfrontend/Cminorgen.v
index 20e7bdb..c6d6fd5 100644
--- a/cfrontend/Cminorgen.v
+++ b/cfrontend/Cminorgen.v
@@ -219,6 +219,7 @@ Definition of_chunk (chunk: memory_chunk) :=
   | Mint32 => Any
   | Mfloat32 => Float32
   | Mfloat64 => Any
+  | Mfloat64al32 => Any
   end.
 
 Definition unop (op: unary_operation) (a: approx) :=
@@ -322,7 +323,7 @@ Definition var_set_self (cenv: compilenv) (id: ident) (k: stmt): res stmt :=
   | Var_stack_scalar chunk ofs =>
       OK (Sseq (make_store chunk (make_stackaddr ofs) (Evar (for_var id))) k)
   | Var_stack_array ofs sz al =>
-      OK (Sseq (Sbuiltin None (EF_memcpy sz (Zmin al 4))
+      OK (Sseq (Sbuiltin None (EF_memcpy sz al)
                          (make_stackaddr ofs :: Evar (for_var id) :: nil)) k)
   | _ =>
       Error(msg "Cminorgen.var_set_self")
diff --git a/cfrontend/Cminorgenproof.v b/cfrontend/Cminorgenproof.v
index 7b18d8f..ea5d68e 100644
--- a/cfrontend/Cminorgenproof.v
+++ b/cfrontend/Cminorgenproof.v
@@ -1753,6 +1753,8 @@ Proof.
   inv B; auto. inv H0; auto. constructor. auto.
   (* float64 *)
   exists va; auto.
+  (* float64al32 *)
+  exists va; auto.
 Qed.
 
 Lemma storev_mapped_content_inject:
@@ -2069,8 +2071,7 @@ Lemma var_set_self_correct_array:
   val_inject f v tv ->
   Mem.inject f m tm ->
   PTree.get id e = Some(b, Varray sz al) ->
-  extcall_memcpy_sem sz (Zmin al 4) ge
-                             (Vptr b Int.zero :: v :: nil) m E0 Vundef m' ->
+  extcall_memcpy_sem sz al ge (Vptr b Int.zero :: v :: nil) m E0 Vundef m' ->
   te!(for_var id) = Some tv ->
   exists f', exists tm',
     star step tge (State fn a k (Vptr sp Int.zero) te tm)
@@ -2087,7 +2088,7 @@ Proof.
   (* var_stack_array *)
   unfold var_set_self in VS. rewrite <- H in VS. inv VS.
   exploit match_callstack_match_globalenvs; eauto. intros [hi' MG].
-  assert (external_call (EF_memcpy sz0 (Zmin al0 4)) ge (Vptr b0 Int.zero :: v :: nil) m E0 Vundef m').
+  assert (external_call (EF_memcpy sz0 al0) ge (Vptr b0 Int.zero :: v :: nil) m E0 Vundef m').
     assumption.
   exploit external_call_mem_inject; eauto. 
   eapply inj_preserves_globals; eauto.
diff --git a/cfrontend/Csharpminor.v b/cfrontend/Csharpminor.v
index 88eb3c7..b4993e7 100644
--- a/cfrontend/Csharpminor.v
+++ b/cfrontend/Csharpminor.v
@@ -320,8 +320,8 @@ Inductive bind_parameters: env ->
   | bind_parameters_array:
       forall e m id sz al params v1 vl b m1 m2,
       PTree.get id e = Some (b, Varray sz al) ->
-      extcall_memcpy_sem sz (Zmin al 4) 
-                   ge (Vptr b Int.zero :: v1 :: nil) m E0 Vundef m1 ->
+      extcall_memcpy_sem sz al
+                         ge (Vptr b Int.zero :: v1 :: nil) m E0 Vundef m1 ->
       bind_parameters e m1 params vl m2 ->
       bind_parameters e m ((id, Varray sz al) :: params) (v1 :: vl) m2.
 
diff --git a/cfrontend/Cshmgen.v b/cfrontend/Cshmgen.v
index 3145681..b635b7d 100644
--- a/cfrontend/Cshmgen.v
+++ b/cfrontend/Cshmgen.v
@@ -273,7 +273,7 @@ Definition make_vol_load (dst: ident) (addr: expr) (ty: type) :=
   by-copy assignment of a value of Clight type [ty]. *)
 
 Definition make_memcpy (dst src: expr) (ty: type) :=
-  Sbuiltin None (EF_memcpy (Csyntax.sizeof ty) (Zmin (Csyntax.alignof ty) 4))
+  Sbuiltin None (EF_memcpy (Csyntax.sizeof ty) (Csyntax.alignof ty))
                 (dst :: src :: nil).
 
 (** [make_store addr ty rhs] stores the value of the
diff --git a/cfrontend/Cshmgenproof.v b/cfrontend/Cshmgenproof.v
index 0b8b9a0..47bc1c6 100644
--- a/cfrontend/Cshmgenproof.v
+++ b/cfrontend/Cshmgenproof.v
@@ -727,6 +727,7 @@ Proof.
   rewrite H in MKLOAD. inv MKLOAD. constructor; auto. 
 Qed.
 
+(*
 Remark capped_alignof_divides:
   forall ty n, 
   (alignof ty | n) -> (Zmin (alignof ty) 4 | n).
@@ -743,6 +744,7 @@ Proof.
   intros. generalize (alignof_1248 ty). 
   intros [A|[A|[A|A]]]; rewrite A; auto.
 Qed.
+*)
 
 Lemma make_memcpy_correct:
   forall f dst src ty k e le m b ofs v t m',
@@ -757,11 +759,9 @@ Proof.
   econstructor.
   econstructor. eauto. econstructor. eauto. constructor. 
   econstructor; eauto. 
-  apply capped_alignof_124.
+  apply alignof_1248.
   apply sizeof_pos. 
-  apply capped_alignof_divides. apply sizeof_alignof_compat.
-  apply capped_alignof_divides; auto.
-  apply capped_alignof_divides; auto.
+  apply sizeof_alignof_compat.
 Qed.
  
 Lemma make_store_correct:
@@ -1046,11 +1046,9 @@ Proof.
   apply bind_parameters_array with b m1. 
   exploit me_local; eauto. intros [vk [A B]]. congruence. 
   econstructor; eauto. 
-  apply capped_alignof_124.
+  apply alignof_1248.
   apply sizeof_pos. 
-  apply capped_alignof_divides. apply sizeof_alignof_compat. 
-  apply capped_alignof_divides; auto.
-  apply capped_alignof_divides; auto.
+  apply sizeof_alignof_compat. 
   apply IHbind_parameters; auto.
 Qed.
 
diff --git a/checklink/Check.ml b/checklink/Check.ml
index c60a4a7..d1b6007 100644
--- a/checklink/Check.ml
+++ b/checklink/Check.ml
@@ -2420,7 +2420,7 @@ and check_builtin_vload_common ccode ecode pc chunk addr offset res fw =
           >>= recur_simpl
       | _ -> error
       end
-  | Mfloat64, FR res ->
+  | (Mfloat64 | Mfloat64al32), FR res ->
       begin match ecode with
       | LFD(frD, rA, d) :: es ->
           OK(fw)
@@ -2480,7 +2480,7 @@ and check_builtin_vstore_common ccode ecode pc chunk addr offset src fw =
           >>= compare_code ccode es (Int32.add pc 8l)
       | _ -> error
       end
-  | Mfloat64, FR src ->
+  | (Mfloat64 | Mfloat64al32), FR src ->
       begin match ecode with
       | STFD(frS, rA, d) :: es ->
           OK(fw)
diff --git a/common/AST.v b/common/AST.v
index bcd63a2..6425cb0 100644
--- a/common/AST.v
+++ b/common/AST.v
@@ -80,7 +80,8 @@ Inductive memory_chunk : Type :=
   | Mint16unsigned : memory_chunk  (**r 16-bit unsigned integer *)
   | Mint32 : memory_chunk          (**r 32-bit integer, or pointer *)
   | Mfloat32 : memory_chunk        (**r 32-bit single-precision float *)
-  | Mfloat64 : memory_chunk.       (**r 64-bit double-precision float *)
+  | Mfloat64 : memory_chunk        (**r 64-bit double-precision float *)
+  | Mfloat64al32 : memory_chunk.   (**r 64-bit double-precision float, 4-aligned *)
 
 (** The type (integer/pointer or float) of a chunk. *)
 
@@ -93,6 +94,7 @@ Definition type_of_chunk (c: memory_chunk) : typ :=
   | Mint32 => Tint
   | Mfloat32 => Tfloat
   | Mfloat64 => Tfloat
+  | Mfloat64al32 => Tfloat
   end.
 
 (** Initialization data for global variables. *)
diff --git a/common/Events.v b/common/Events.v
index 93e1827..b36a86f 100644
--- a/common/Events.v
+++ b/common/Events.v
@@ -1283,7 +1283,7 @@ Qed.
 
 Inductive extcall_memcpy_sem (sz al: Z) (F V: Type) (ge: Genv.t F V): list val -> mem -> trace -> val -> mem -> Prop :=
   | extcall_memcpy_sem_intro: forall bdst odst bsrc osrc m bytes m',
-      al = 1 \/ al = 2 \/ al = 4 -> sz > 0 ->
+      al = 1 \/ al = 2 \/ al = 4 \/ al = 8 -> sz > 0 ->
       (al | sz) -> (al | Int.unsigned osrc) -> (al | Int.unsigned odst) ->
       bsrc <> bdst \/ Int.unsigned osrc = Int.unsigned odst
                    \/ Int.unsigned osrc + sz <= Int.unsigned odst
diff --git a/common/Memdata.v b/common/Memdata.v
index 6f1ad67..611a32b 100644
--- a/common/Memdata.v
+++ b/common/Memdata.v
@@ -37,6 +37,7 @@ Definition size_chunk (chunk: memory_chunk) : Z :=
   | Mint32 => 4
   | Mfloat32 => 4
   | Mfloat64 => 8
+  | Mfloat64al32 => 8
   end.
 
 Lemma size_chunk_pos:
@@ -80,7 +81,10 @@ Definition align_chunk (chunk: memory_chunk) : Z :=
   | Mint8unsigned => 1
   | Mint16signed => 2
   | Mint16unsigned => 2
-  | _ => 4
+  | Mint32 => 4
+  | Mfloat32 => 4
+  | Mfloat64 => 8
+  | Mfloat64al32 => 4
   end.
 
 Lemma align_chunk_pos:
@@ -95,12 +99,16 @@ Proof.
   intros. destruct chunk; simpl; try apply Zdivide_refl. exists 2; auto. 
 Qed.
 
-Lemma align_chunk_compat:
+Lemma align_le_divides:
   forall chunk1 chunk2,
-  size_chunk chunk1 = size_chunk chunk2 -> align_chunk chunk1 = align_chunk chunk2.
+  align_chunk chunk1 <= align_chunk chunk2 -> (align_chunk chunk1 | align_chunk chunk2).
 Proof.
-  intros chunk1 chunk2. 
-  destruct chunk1; destruct chunk2; simpl; congruence.
+  intros. destruct chunk1; destruct chunk2; simpl in *;
+  solve [ omegaContradiction
+        | apply Zdivide_refl
+        | exists 2; reflexivity
+        | exists 4; reflexivity
+        | exists 8; reflexivity ].
 Qed.
 
 (** * Memory values *)
@@ -331,7 +339,7 @@ Definition encode_val (chunk: memory_chunk) (v: val) : list memval :=
   | Vint n, Mint32 => inj_bytes (encode_int 4%nat (Int.unsigned n))
   | Vptr b ofs, Mint32 => inj_pointer 4%nat b ofs
   | Vfloat n, Mfloat32 => inj_bytes (encode_int 4%nat (Int.unsigned (Float.bits_of_single n)))
-  | Vfloat n, Mfloat64 => inj_bytes (encode_int 8%nat (Int64.unsigned (Float.bits_of_double n)))
+  | Vfloat n, (Mfloat64 | Mfloat64al32) => inj_bytes (encode_int 8%nat (Int64.unsigned (Float.bits_of_double n)))
   | _, _ => list_repeat (size_chunk_nat chunk) Undef
   end.
 
@@ -345,7 +353,7 @@ Definition decode_val (chunk: memory_chunk) (vl: list memval) : val :=
       | Mint16unsigned => Vint(Int.zero_ext 16 (Int.repr (decode_int bl)))
       | Mint32 => Vint(Int.repr(decode_int bl))
       | Mfloat32 => Vfloat(Float.single_of_bits (Int.repr (decode_int bl)))
-      | Mfloat64 => Vfloat(Float.double_of_bits (Int64.repr (decode_int bl)))
+      | Mfloat64 | Mfloat64al32 => Vfloat(Float.double_of_bits (Int64.repr (decode_int bl)))
       end
   | None =>
       match chunk with
@@ -384,13 +392,13 @@ Definition decode_encode_val (v1: val) (chunk1 chunk2: memory_chunk) (v2: val) :
   | Vint n, Mint16unsigned, Mint16unsigned => v2 = Vint(Int.zero_ext 16 n)
   | Vint n, Mint32, Mint32 => v2 = Vint n
   | Vint n, Mint32, Mfloat32 => v2 = Vfloat(Float.single_of_bits n)
-  | Vint n, (Mfloat32 | Mfloat64), _ => v2 = Vundef
+  | Vint n, (Mfloat32 | Mfloat64 | Mfloat64al32), _ => v2 = Vundef
   | Vint n, _, _ => True (**r nothing meaningful to say about v2 *)
   | Vptr b ofs, Mint32, Mint32 => v2 = Vptr b ofs
   | Vptr b ofs, _, _ => v2 = Vundef
   | Vfloat f, Mfloat32, Mfloat32 => v2 = Vfloat(Float.singleoffloat f)
   | Vfloat f, Mfloat32, Mint32 => v2 = Vint(Float.bits_of_single f)
-  | Vfloat f, Mfloat64, Mfloat64 => v2 = Vfloat f
+  | Vfloat f, (Mfloat64 | Mfloat64al32), (Mfloat64 | Mfloat64al32) => v2 = Vfloat f
   | Vfloat f, (Mint8signed|Mint8unsigned|Mint16signed|Mint16unsigned|Mint32), _ => v2 = Vundef
   | Vfloat f, _, _ => True   (* nothing interesting to say about v2 *)
   end.
@@ -423,6 +431,9 @@ Opaque inj_pointer.
   rewrite decode_encode_int_4. auto.
   rewrite decode_encode_int_4. decEq. apply Float.single_of_bits_of_single. 
   rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double.
+  rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double.
+  rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double.
+  rewrite decode_encode_int_8. decEq. apply Float.double_of_bits_of_double.
   change (proj_bytes (inj_pointer 4 b i)) with (@None (list byte)). simpl. 
   unfold proj_pointer. generalize (check_inj_pointer b i 4%nat).
 Transparent inj_pointer. 
@@ -816,3 +827,18 @@ Proof.
   unfold inject_id; reflexivity. rewrite Int.add_zero; auto. 
 Qed.
 
+(** [memval_inject] and compositions *)
+
+Lemma memval_inject_compose:
+  forall f f' v1 v2 v3,
+  memval_inject f v1 v2 -> memval_inject f' v2 v3 ->
+  memval_inject (compose_meminj f f') v1 v3.
+Proof.
+  intros. inv H.
+  inv H0. constructor.
+  inv H0. econstructor. 
+  unfold compose_meminj; rewrite H1; rewrite H5; eauto. 
+  rewrite Int.add_assoc. decEq. unfold Int.add. apply Int.eqm_samerepr. auto with ints.
+  constructor.
+Qed. 
+
diff --git a/common/Memory.v b/common/Memory.v
index f9b322e..0b99445 100644
--- a/common/Memory.v
+++ b/common/Memory.v
@@ -62,7 +62,7 @@ Record mem' : Type := mkmem {
   access_max: 
     forall b ofs, perm_order'' (mem_access#b ofs Max) (mem_access#b ofs Cur);
   nextblock_noaccess:
-    forall b ofs k, b <= 0 \/ b >= nextblock -> mem_access#b ofs k = None
+    forall b ofs k, b >= nextblock -> mem_access#b ofs k = None
 }.
 
 Definition mem := mem'.
@@ -267,11 +267,12 @@ Qed.
 Lemma valid_access_compat:
   forall m chunk1 chunk2 b ofs p,
   size_chunk chunk1 = size_chunk chunk2 ->
+  align_chunk chunk2 <= align_chunk chunk1 ->
   valid_access m chunk1 b ofs p->
   valid_access m chunk2 b ofs p.
 Proof.
-  intros. inv H0. rewrite H in H1. constructor; auto.
-  rewrite <- (align_chunk_compat _ _ H). auto.
+  intros. inv H1. rewrite H in H2. constructor; auto.
+  eapply Zdivide_trans; eauto. eapply align_le_divides; eauto.
 Qed.
 
 Lemma valid_access_dec:
@@ -678,6 +679,22 @@ Proof.
   rewrite pred_dec_false; auto.
 Qed.
 
+Theorem load_float64al32:
+  forall m b ofs v,
+  load Mfloat64 m b ofs = Some v -> load Mfloat64al32 m b ofs = Some v.
+Proof.
+  unfold load; intros. destruct (valid_access_dec m Mfloat64 b ofs Readable); try discriminate.
+  rewrite pred_dec_true. assumption. 
+  apply valid_access_compat with Mfloat64; auto. simpl; omega. 
+Qed.
+
+Theorem loadv_float64al32:
+  forall m a v,
+  loadv Mfloat64 m a = Some v -> loadv Mfloat64al32 m a = Some v.
+Proof.
+  unfold loadv; intros. destruct a; auto. apply load_float64al32; auto.
+Qed.
+
 (** ** Properties related to [loadbytes] *)
 
 Theorem range_perm_loadbytes:
@@ -925,11 +942,12 @@ Local Hint Resolve store_valid_access_1 store_valid_access_2 store_valid_access_
 Theorem load_store_similar:
   forall chunk',
   size_chunk chunk' = size_chunk chunk ->
+  align_chunk chunk' <= align_chunk chunk ->
   exists v', load chunk' m2 b ofs = Some v' /\ decode_encode_val v chunk chunk' v'.
 Proof.
   intros.
   exploit (valid_access_load m2 chunk').
-    eapply valid_access_compat. symmetry; eauto. eauto with mem. 
+    eapply valid_access_compat. symmetry; eauto. auto. eauto with mem. 
   intros [v' LOAD].
   exists v'; split; auto.
   exploit load_result; eauto. intros B. 
@@ -944,17 +962,18 @@ Qed.
 Theorem load_store_similar_2:
   forall chunk',
   size_chunk chunk' = size_chunk chunk ->
+  align_chunk chunk' <= align_chunk chunk ->
   type_of_chunk chunk' = type_of_chunk chunk ->
   load chunk' m2 b ofs = Some (Val.load_result chunk' v).
 Proof.
-  intros. destruct (load_store_similar chunk') as [v' [A B]]. auto.
+  intros. destruct (load_store_similar chunk') as [v' [A B]]; auto.
   rewrite A. decEq. eapply decode_encode_val_similar with (chunk1 := chunk); eauto.
 Qed.
 
 Theorem load_store_same:
   load chunk m2 b ofs = Some (Val.load_result chunk v).
 Proof.
-  apply load_store_similar_2; auto.
+  apply load_store_similar_2; auto. omega.
 Qed.
 
 Theorem load_store_other:
@@ -1229,6 +1248,7 @@ Qed.
 Lemma store_similar_chunks:
   forall chunk1 chunk2 v1 v2 m b ofs,
   encode_val chunk1 v1 = encode_val chunk2 v2 ->
+  align_chunk chunk1 = align_chunk chunk2 ->
   store chunk1 m b ofs v1 = store chunk2 m b ofs v2.
 Proof.
   intros. unfold store. 
@@ -1238,48 +1258,47 @@ Proof.
     rewrite <- (encode_val_length chunk2 v2).
     congruence.
   unfold store.
-  destruct (valid_access_dec m chunk1 b ofs Writable);
-  destruct (valid_access_dec m chunk2 b ofs Writable); auto.
+  destruct (valid_access_dec m chunk1 b ofs Writable).
+  rewrite pred_dec_true. 
   f_equal. apply mkmem_ext; auto. congruence.
-  elimtype False.
-  destruct chunk1; destruct chunk2;  inv H0; unfold valid_access, align_chunk in *; try contradiction.
-  elimtype False.
-  destruct chunk1; destruct chunk2;  inv H0; unfold valid_access, align_chunk in *; try contradiction.
+  apply valid_access_compat with chunk1; auto. omega.
+  destruct (valid_access_dec m chunk2 b ofs Writable); auto.
+  elim n. apply valid_access_compat with chunk2; auto. omega.
 Qed.
 
 Theorem store_signed_unsigned_8:
   forall m b ofs v,
   store Mint8signed m b ofs v = store Mint8unsigned m b ofs v.
-Proof. intros. apply store_similar_chunks. apply encode_val_int8_signed_unsigned. Qed.
+Proof. intros. apply store_similar_chunks. apply encode_val_int8_signed_unsigned. auto. Qed.
 
 Theorem store_signed_unsigned_16:
   forall m b ofs v,
   store Mint16signed m b ofs v = store Mint16unsigned m b ofs v.
-Proof. intros. apply store_similar_chunks. apply encode_val_int16_signed_unsigned. Qed.
+Proof. intros. apply store_similar_chunks. apply encode_val_int16_signed_unsigned. auto. Qed.
 
 Theorem store_int8_zero_ext:
   forall m b ofs n,
   store Mint8unsigned m b ofs (Vint (Int.zero_ext 8 n)) =
   store Mint8unsigned m b ofs (Vint n).
-Proof. intros. apply store_similar_chunks. apply encode_val_int8_zero_ext. Qed.
+Proof. intros. apply store_similar_chunks. apply encode_val_int8_zero_ext. auto. Qed.
 
 Theorem store_int8_sign_ext:
   forall m b ofs n,
   store Mint8signed m b ofs (Vint (Int.sign_ext 8 n)) =
   store Mint8signed m b ofs (Vint n).
-Proof. intros. apply store_similar_chunks. apply encode_val_int8_sign_ext. Qed.
+Proof. intros. apply store_similar_chunks. apply encode_val_int8_sign_ext. auto. Qed.
 
 Theorem store_int16_zero_ext:
   forall m b ofs n,
   store Mint16unsigned m b ofs (Vint (Int.zero_ext 16 n)) =
   store Mint16unsigned m b ofs (Vint n).
-Proof. intros. apply store_similar_chunks. apply encode_val_int16_zero_ext. Qed.
+Proof. intros. apply store_similar_chunks. apply encode_val_int16_zero_ext. auto. Qed.
 
 Theorem store_int16_sign_ext:
   forall m b ofs n,
   store Mint16signed m b ofs (Vint (Int.sign_ext 16 n)) =
   store Mint16signed m b ofs (Vint n).
-Proof. intros. apply store_similar_chunks. apply encode_val_int16_sign_ext. Qed.
+Proof. intros. apply store_similar_chunks. apply encode_val_int16_sign_ext. auto. Qed.
 
 Theorem store_float32_truncate:
   forall m b ofs n,
@@ -1287,7 +1306,25 @@ Theorem store_float32_truncate:
   store Mfloat32 m b ofs (Vfloat n).
 Proof.
   intros. apply store_similar_chunks. simpl. decEq.
-  repeat rewrite encode_float32_eq. rewrite Float.bits_of_singleoffloat. auto. 
+  repeat rewrite encode_float32_eq. rewrite Float.bits_of_singleoffloat. auto.
+  auto.
+Qed.
+
+Theorem store_float64al32:
+  forall m b ofs v m',
+  store Mfloat64 m b ofs v = Some m' -> store Mfloat64al32 m b ofs v = Some m'.
+Proof.
+  unfold store; intros. 
+  destruct (valid_access_dec m Mfloat64 b ofs Writable); try discriminate.
+  rewrite pred_dec_true. rewrite <- H. auto. 
+  apply valid_access_compat with Mfloat64; auto. simpl; omega.
+Qed.
+
+Theorem storev_float64al32:
+  forall m a v m',
+  storev Mfloat64 m a v = Some m' -> storev Mfloat64al32 m a v = Some m'.
+Proof.
+  unfold storev; intros. destruct a; auto. apply store_float64al32; auto.
 Qed.
 
 (** ** Properties related to [storebytes]. *)
@@ -2861,8 +2898,8 @@ Qed.
 - unallocated blocks in [m1] must be mapped to [None] by [f];
 - if [f b = Some(b', delta)], [b'] must be valid in [m2];
 - distinct blocks in [m1] are mapped to non-overlapping sub-blocks in [m2];
-- the sizes of [m2]'s blocks are representable with signed machine integers;
-- the offsets [delta] are representable with signed machine integers.
+- the sizes of [m2]'s blocks are representable with unsigned machine integers;
+- the offsets [delta] are representable with unsigned machine integers.
 *)
 
 Record inject' (f: meminj) (m1 m2: mem) : Prop :=
@@ -2875,20 +2912,16 @@ Record inject' (f: meminj) (m1 m2: mem) : Prop :=
       forall b b' delta, f b = Some(b', delta) -> valid_block m2 b';
     mi_no_overlap:
       meminj_no_overlap f m1;
-    mi_range_offset:
-      forall b b' delta,
-      f b = Some(b', delta) ->
-      0 <= delta <= Int.max_unsigned;
-    mi_range_block:
+    mi_representable:
       forall b b' delta ofs k p,
       f b = Some(b', delta) ->
-      perm m2 b' ofs k p ->
-      delta = 0 \/ 0 <= ofs <= Int.max_unsigned
+      perm m1 b (Int.unsigned ofs) k p ->
+      delta >= 0 /\ 0 <= Int.unsigned ofs + delta <= Int.max_unsigned
   }.
 
 Definition inject := inject'.
 
-Local Hint Resolve mi_mappedblocks mi_range_offset: mem.
+Local Hint Resolve mi_mappedblocks: mem.
 
 (** Preservation of access validity and pointer validity *)
 
@@ -2957,6 +2990,21 @@ Qed.
 (** The following lemmas establish the absence of machine integer overflow
   during address computations. *)
 
+(*
+Lemma address_no_overflow:
+  forall f m1 m2 b1 b2 delta ofs1 k p,
+  inject f m1 m2 ->
+  perm m1 b1 (Int.unsigned ofs1) k p ->
+  f b1 = Some (b2, delta) ->
+  0 <= Int.unsigned ofs1 + Int.unsigned (Int.repr delta) <= Int.max_unsigned.
+Proof.
+  intros. exploit mi_representable; eauto. intros [A | [A B]].
+  subst delta. change (Int.unsigned (Int.repr 0)) with 0. 
+  rewrite Zplus_0_r. apply Int.unsigned_range_2.
+  rewrite Int.unsigned_repr; auto. 
+Qed.
+*)
+
 Lemma address_inject:
   forall f m1 m2 b1 ofs1 b2 delta,
   inject f m1 m2 ->
@@ -2965,14 +3013,10 @@ Lemma address_inject:
   Int.unsigned (Int.add ofs1 (Int.repr delta)) = Int.unsigned ofs1 + delta.
 Proof.
   intros.
-  exploit perm_inject; eauto. intro A.
-  exploit mi_range_block; eauto. intros [D | E].
-  subst delta. rewrite Int.add_zero. omega.
-  unfold Int.add.
-  repeat rewrite Int.unsigned_repr. auto.
-  eapply mi_range_offset; eauto.
-  auto.
-  eapply mi_range_offset; eauto.
+  exploit mi_representable; eauto. intros [A B].
+  assert (0 <= delta <= Int.max_unsigned).
+    generalize (Int.unsigned_range ofs1). omega.
+  unfold Int.add. repeat rewrite Int.unsigned_repr; auto.
 Qed.
 
 Lemma address_inject':
@@ -2994,10 +3038,11 @@ Theorem valid_pointer_inject_no_overflow:
   0 <= Int.unsigned ofs + Int.unsigned (Int.repr x) <= Int.max_unsigned.
 Proof.
   intros. rewrite valid_pointer_valid_access in H0.
-  exploit address_inject'; eauto. intros.
-  rewrite Int.unsigned_repr; eauto.
-  rewrite <- H2. apply Int.unsigned_range_2. 
-  eapply mi_range_offset; eauto.
+  assert (perm m1 b (Int.unsigned ofs) Cur Nonempty).
+    apply H0. simpl. omega.
+  exploit mi_representable; eauto. intros [A B].
+  rewrite Int.unsigned_repr; auto.
+  generalize (Int.unsigned_range ofs). omega.
 Qed.
 
 Theorem valid_pointer_inject_val:
@@ -3008,11 +3053,9 @@ Theorem valid_pointer_inject_val:
   valid_pointer m2 b' (Int.unsigned ofs') = true.
 Proof.
   intros. inv H1.
-  exploit valid_pointer_inject_no_overflow; eauto. intro NOOV.
-  unfold Int.add. rewrite Int.unsigned_repr; auto.
-  rewrite Int.unsigned_repr.
+  erewrite address_inject'; eauto. 
   eapply valid_pointer_inject; eauto.
-  eapply mi_range_offset; eauto.
+  rewrite valid_pointer_valid_access in H0. eauto.
 Qed.
 
 Theorem inject_no_overlap:
@@ -3085,7 +3128,7 @@ Qed.
 Theorem aligned_area_inject:
   forall f m m' b ofs al sz b' delta,
   inject f m m' ->
-  al = 1 \/ al = 2 \/ al = 4 -> sz > 0 ->
+  al = 1 \/ al = 2 \/ al = 4 \/ al = 8 -> sz > 0 ->
   (al | sz) ->
   range_perm m b ofs (ofs + sz) Cur Nonempty ->
   (al | ofs) ->
@@ -3099,7 +3142,8 @@ Proof.
   assert (R: exists chunk, al = align_chunk chunk /\ al = size_chunk chunk).
     destruct H0. subst; exists Mint8unsigned; auto.
     destruct H0. subst; exists Mint16unsigned; auto.
-    subst; exists Mint32; auto.
+    destruct H0. subst; exists Mint32; auto.
+    subst; exists Mfloat64; auto.
   destruct R as [chunk [A B]].
   assert (valid_access m chunk b ofs Nonempty).
     split. red; intros; apply H3. omega. congruence.
@@ -3168,9 +3212,7 @@ Proof.
   eauto with mem.
 (* no overlap *)
   red; intros. eauto with mem.
-(* range offset *)
-  eauto.
-(* range blocks *)
+(* representable *)
   eauto with mem. 
 Qed.
 
@@ -3191,10 +3233,8 @@ Proof.
   eauto with mem.
 (* no overlap *)
   red; intros. eauto with mem.
-(* range offset *)
-  eauto.
-(* range blocks *)
-  auto.
+(* representable *)
+  eauto with mem.
 Qed.
 
 Theorem store_outside_inject:
@@ -3216,10 +3256,8 @@ Proof.
   eauto with mem.
 (* no overlap *)
   auto.
-(* range offset *)
-  auto.
-(* range blocks *)
-  intros. eauto with mem. 
+(* representable *)
+  eauto with mem.
 Qed.
 
 Theorem storev_mapped_inject:
@@ -3260,10 +3298,8 @@ Proof.
   intros. eapply storebytes_valid_block_1; eauto. 
 (* no overlap *)
   red; intros. eapply mi_no_overlap0; eauto; eapply perm_storebytes_2; eauto. 
-(* range offset *)
-  eauto.
-(* range blocks *)
-  intros. eapply mi_range_block0; eauto. eapply perm_storebytes_2; eauto. 
+(* representable *)
+  intros. eapply mi_representable0; eauto. eapply perm_storebytes_2; eauto. 
 Qed.
 
 Theorem storebytes_unmapped_inject:
@@ -3283,10 +3319,8 @@ Proof.
   eauto with mem.
 (* no overlap *)
   red; intros. eapply mi_no_overlap0; eauto; eapply perm_storebytes_2; eauto. 
-(* range offset *)
-  eauto.
-(* range blocks *)
-  eauto.
+(* representable *)
+  intros. eapply mi_representable0; eauto. eapply perm_storebytes_2; eauto. 
 Qed.
 
 Theorem storebytes_outside_inject:
@@ -3308,10 +3342,8 @@ Proof.
   intros. eapply storebytes_valid_block_1; eauto. 
 (* no overlap *)
   auto.
-(* range offset *)
+(* representable *)
   auto.
-(* range blocks *)
-  intros. eapply mi_range_block0; eauto. eapply perm_storebytes_2; eauto. 
 Qed.
 
 (* Preservation of allocations *)
@@ -3332,11 +3364,8 @@ Proof.
   eauto with mem.
 (* no overlap *)
   auto.
-(* range offset *)
+(* representable *)
   auto.
-(* range block *)
-  intros. exploit perm_alloc_inv; eauto. rewrite zeq_false. eauto. 
-  eapply valid_not_valid_diff; eauto with mem.
 Qed.
 
 Theorem alloc_left_unmapped_inject:
@@ -3375,12 +3404,10 @@ Proof.
   eapply mi_no_overlap0. eexact H3. eauto. eauto.
   exploit perm_alloc_inv. eauto. eexact H6. rewrite zeq_false; auto. 
   exploit perm_alloc_inv. eauto. eexact H7. rewrite zeq_false; auto. 
-(* range offset *)
-  unfold f'; intros.
-  destruct (zeq b b1). congruence. eauto.
-(* range block *)
+(* representable *)
   unfold f'; intros.
-  destruct (zeq b b1). congruence. eauto.
+  exploit perm_alloc_inv; eauto. 
+  destruct (zeq b b1). congruence. eauto with mem.
 (* incr *)
   split. auto. 
 (* image *)
@@ -3450,10 +3477,12 @@ Proof.
     destruct (zeq b1' b2); auto. subst b1'. right; red; intros.
     eapply H6; eauto. omega.
   eauto.
-(* range offset *)
-  unfold f'; intros. destruct (zeq b b1). congruence. eauto. 
-(* range block *)
-  unfold f'; intros. destruct (zeq b b1). inversion H9; subst b b' delta0. eauto. eauto. 
+(* representable *)
+  unfold f'; intros. exploit perm_alloc_inv; eauto. destruct (zeq b b1); intros.
+  inversion H9; subst b' delta0. exploit H3. apply H4 with (k := k) (p := p); eauto.
+  intros [A | A]. generalize (Int.unsigned_range_2 ofs). omega.
+  generalize (Int.unsigned_range_2 ofs). omega.
+  eauto.
 (* incr *)
   split. auto.
 (* image of b1 *)
@@ -3507,10 +3536,8 @@ Proof.
   auto.
 (* no overlap *)
   red; intros. eauto with mem. 
-(* range offset *)
-  auto.
-(* range block *)
-  auto.
+(* representable *)
+  eauto with mem. 
 Qed.
 
 Lemma free_list_left_inject:
@@ -3544,10 +3571,8 @@ Proof.
   eauto with mem.
 (* no overlap *)
   auto.
-(* range offset *)
+(* representable *)
   auto.
-(* range blocks *)
-  intros. eauto with mem. 
 Qed.
 
 Lemma perm_free_list:
@@ -3597,7 +3622,66 @@ Proof.
   intros. destruct H. constructor; eauto.
   eapply drop_outside_inj; eauto.
   intros. unfold valid_block in *. erewrite nextblock_drop; eauto. 
-  intros. eapply mi_range_block0; eauto. eapply perm_drop_4; eauto. 
+Qed.
+
+(** Composing two memory injections. *)
+
+Theorem inject_compose:
+  forall f f' m1 m2 m3,
+  inject f m1 m2 -> inject f' m2 m3 ->
+  inject (compose_meminj f f') m1 m3.
+Proof.
+  unfold compose_meminj; intros.
+  inv H; inv H0. constructor.
+(* inj *)
+  inv mi_inj0; inv mi_inj1; constructor; intros.
+  (* perm *)
+  destruct (f b1) as [[b' delta'] |]_eqn; try discriminate.
+  destruct (f' b') as [[b'' delta''] |]_eqn; inv H. 
+  replace (ofs + (delta' + delta'')) with ((ofs + delta') + delta'') by omega.
+  eauto.
+  (* valid access *)
+  destruct (f b1) as [[b' delta'] |]_eqn; try discriminate.
+  destruct (f' b') as [[b'' delta''] |]_eqn; inv H. 
+  replace (ofs + (delta' + delta'')) with ((ofs + delta') + delta'') by omega.
+  eauto.
+  (* memval *)
+  destruct (f b1) as [[b' delta'] |]_eqn; try discriminate.
+  destruct (f' b') as [[b'' delta''] |]_eqn; inv H. 
+  replace (ofs + (delta' + delta'')) with ((ofs + delta') + delta'') by omega.
+  eapply memval_inject_compose; eauto. 
+(* unmapped *)
+  intros. erewrite mi_freeblocks0; eauto. 
+(* mapped *)
+  intros. 
+  destruct (f b) as [[b1 delta1] |]_eqn; try discriminate.
+  destruct (f' b1) as [[b2 delta2] |]_eqn; inv H. 
+  eauto.
+(* no overlap *)
+  red; intros. 
+  destruct (f b1) as [[b1x delta1x] |]_eqn; try discriminate.
+  destruct (f' b1x) as [[b1y delta1y] |]_eqn; inv H0. 
+  destruct (f b2) as [[b2x delta2x] |]_eqn; try discriminate.
+  destruct (f' b2x) as [[b2y delta2y] |]_eqn; inv H1.
+  exploit mi_no_overlap0; eauto. intros A.
+  destruct (eq_block b1x b2x). 
+  subst b1x. destruct A. congruence. 
+  assert (delta1y = delta2y) by congruence. right; omega.
+  exploit mi_no_overlap1. eauto. eauto. eauto.
+    eapply perm_inj. eauto. eexact H2. eauto. 
+    eapply perm_inj. eauto. eexact H3. eauto. 
+  unfold block; omega.
+(* representable *)
+  intros. 
+  destruct (f b) as [[b1 delta1] |]_eqn; try discriminate.
+  destruct (f' b1) as [[b2 delta2] |]_eqn; inv H. 
+  exploit mi_representable0; eauto. intros [A B].
+  set (ofs' := Int.repr (Int.unsigned ofs + delta1)).
+  assert (Int.unsigned ofs' = Int.unsigned ofs + delta1). 
+    unfold ofs'; apply Int.unsigned_repr. auto.
+  exploit mi_representable1. eauto. instantiate (3 := ofs'). 
+  rewrite H. eapply perm_inj; eauto. rewrite H. intros [C D].
+  omega.
 Qed.
 
 (** Injecting a memory into itself. *)
@@ -3633,11 +3717,7 @@ Proof.
   apply flat_inj_no_overlap.
 (* range *)
   unfold flat_inj; intros.
-  destruct (zlt b (nextblock m)); inv H0.
-  unfold Int.max_unsigned. generalize Int.modulus_pos; omega.
-(* range *)
-  unfold flat_inj; intros.
-  destruct (zlt b (nextblock m)); inv H0. auto.
+  destruct (zlt b (nextblock m)); inv H0. generalize (Int.unsigned_range_2 ofs); omega.
 Qed.
 
 Theorem empty_inject_neutral:
diff --git a/common/Memtype.v b/common/Memtype.v
index b7d953f..a39daf4 100644
--- a/common/Memtype.v
+++ b/common/Memtype.v
@@ -414,6 +414,7 @@ Axiom load_store_similar:
   forall chunk m1 b ofs v m2, store chunk m1 b ofs v = Some m2 ->
   forall chunk',
   size_chunk chunk' = size_chunk chunk ->
+  align_chunk chunk' <= align_chunk chunk ->
   exists v', load chunk' m2 b ofs = Some v' /\ decode_encode_val v chunk chunk' v'.
 
 Axiom load_store_same:
diff --git a/common/PrintAST.ml b/common/PrintAST.ml
index baa5578..971bf77 100644
--- a/common/PrintAST.ml
+++ b/common/PrintAST.ml
@@ -26,6 +26,7 @@ let name_of_chunk = function
   | Mint32 -> "int32"
   | Mfloat32 -> "float32"
   | Mfloat64 -> "float64"
+  | Mfloat64al32 -> "float64al32"
 
 let name_of_external = function
   | EF_external(name, sg) -> extern_atom name
diff --git a/common/Values.v b/common/Values.v
index f0b125a..bcb7c4f 100644
--- a/common/Values.v
+++ b/common/Values.v
@@ -429,7 +429,7 @@ Definition load_result (chunk: memory_chunk) (v: val) :=
   | Mint32, Vint n => Vint n
   | Mint32, Vptr b ofs => Vptr b ofs
   | Mfloat32, Vfloat f => Vfloat(Float.singleoffloat f)
-  | Mfloat64, Vfloat f => Vfloat f
+  | (Mfloat64 | Mfloat64al32), Vfloat f => Vfloat f
   | _, _ => Vundef
   end.
 
@@ -1168,3 +1168,26 @@ Proof.
   constructor.
 Qed.
 
+(** Composing two memory injections *)
+
+Definition compose_meminj (f f': meminj) : meminj :=
+  fun b =>
+    match f b with
+    | None => None
+    | Some(b', delta) =>
+        match f' b' with
+        | None => None
+        | Some(b'', delta') => Some(b'', delta + delta')
+        end
+    end.
+
+Lemma val_inject_compose:
+  forall f f' v1 v2 v3,
+  val_inject f v1 v2 -> val_inject f' v2 v3 ->
+  val_inject (compose_meminj f f') v1 v3.
+Proof.
+  intros. inv H; auto; inv H0; auto. econstructor.
+  unfold compose_meminj; rewrite H1; rewrite H3; eauto. 
+  rewrite Int.add_assoc. decEq. unfold Int.add. apply Int.eqm_samerepr. auto with ints.
+Qed. 
+
diff --git a/driver/Interp.ml b/driver/Interp.ml
index abd28ac..4061515 100644
--- a/driver/Interp.ml
+++ b/driver/Interp.ml
@@ -353,7 +353,7 @@ and world_vload chunk id ofs =
     | Mint32 -> EVint(coqint_of_camlint(Random.int32 0x7FFF_FFFFl))
     | Mfloat32 -> EVfloat(
 	Floats.Float.singleoffloat(coqfloat_of_camlfloat(Random.float 1.0)))
-    | Mfloat64 -> EVfloat(coqfloat_of_camlfloat(Random.float 1.0))
+    | Mfloat64 | Mfloat64al32 -> EVfloat(coqfloat_of_camlfloat(Random.float 1.0))
   in Some(res, world)
 
 and world_vstore chunk id ofs v =
diff --git a/ia32/Asm.v b/ia32/Asm.v
index 6210286..a78c8bf 100644
--- a/ia32/Asm.v
+++ b/ia32/Asm.v
@@ -474,17 +474,17 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pmovsd_fi rd n =>
       Next (nextinstr (rs#rd <- (Vfloat n))) m
   | Pmovsd_fm rd a =>
-      exec_load Mfloat64 m a rs rd
+      exec_load Mfloat64al32 m a rs rd
   | Pmovsd_mf a r1 =>
-      exec_store Mfloat64 m a rs r1
+      exec_store Mfloat64al32 m a rs r1
   | Pfld_f r1 =>
       Next (nextinstr (rs#ST0 <- (rs r1))) m
   | Pfld_m a =>
-      exec_load Mfloat64 m a rs ST0
+      exec_load Mfloat64al32 m a rs ST0
   | Pfstp_f rd =>
       Next (nextinstr (rs#rd <- (rs ST0) #ST0 <- Vundef)) m
   | Pfstp_m a =>
-      exec_store Mfloat64 m a rs ST0
+      exec_store Mfloat64al32 m a rs ST0
   | Pxchg_rr r1 r2 =>
       Next (nextinstr (rs#r1 <- (rs r2) #r2 <- (rs r1))) m
   (** Moves with conversion *)
@@ -709,7 +709,7 @@ Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
       extcall_arg rs m (S (Outgoing ofs Tint)) v
   | extcall_arg_float_stack: forall ofs bofs v,
       bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mfloat64 m (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v ->
+      Mem.loadv Mfloat64al32 m (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v ->
       extcall_arg rs m (S (Outgoing ofs Tfloat)) v.
 
 Definition extcall_arguments
diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v
index 3fc3efb..5a6c1ab 100644
--- a/ia32/Asmgen.v
+++ b/ia32/Asmgen.v
@@ -452,7 +452,7 @@ Definition transl_load (chunk: memory_chunk)
       do r <- ireg_of dest; OK(Pmov_rm r am :: k)
   | Mfloat32 =>
       do r <- freg_of dest; OK(Pcvtss2sd_fm r am :: k)
-  | Mfloat64 =>
+  | Mfloat64 | Mfloat64al32 =>
       do r <- freg_of dest; OK(Pmovsd_fm r am :: k)
   end.
 
@@ -469,7 +469,7 @@ Definition transl_store (chunk: memory_chunk)
       do r <- ireg_of src; OK(Pmov_mr am r :: k)
   | Mfloat32 =>
       do r <- freg_of src; OK(Pcvtsd2ss_mf am r :: k)
-  | Mfloat64 =>
+  | Mfloat64 | Mfloat64al32 =>
       do r <- freg_of src; OK(Pmovsd_mf am r :: k)
   end.
 
diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v
index 7b3427b..7ad8a02 100644
--- a/ia32/Asmgenproof1.v
+++ b/ia32/Asmgenproof1.v
@@ -1679,7 +1679,11 @@ Proof.
     transitivity (Val.add ((rs#(preg_of dest) <- v) PC) Vone).
     auto. decEq. apply Pregmap.gso; auto with ppcgen.
   exists rs2. split. 
-  destruct chunk; ArgsInv; apply exec_straight_one; simpl; auto.
+  destruct chunk; ArgsInv; apply exec_straight_one; auto.
+  (* Mfloat64 -> Mfloat64al32 *)
+  rewrite <- H. simpl. unfold exec_load. rewrite H1.
+  destruct (eval_addrmode ge x rs); simpl in *; try discriminate.
+  erewrite Mem.load_float64al32; eauto. 
   split. unfold rs2. rewrite nextinstr_nf_inv1. SRes. apply preg_of_important.
   intros. unfold rs2. repeat SOther. 
 Qed.
@@ -1725,6 +1729,10 @@ Proof.
   apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto.
   intros. SOther.
 (* float64 *)
+  econstructor; split.
+  apply exec_straight_one. simpl. unfold exec_store. erewrite Mem.storev_float64al32; eauto. auto.
+  intros. SOther.
+(* float64al32 *)
   econstructor; split.
   apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto.
   intros. SOther.
diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml
index 8b795ee..b473464 100644
--- a/ia32/PrintAsm.ml
+++ b/ia32/PrintAsm.ml
@@ -363,7 +363,7 @@ let print_builtin_vload_common oc chunk addr res =
       fprintf oc "	movl	%a, %a\n" addressing addr ireg res
   | Mfloat32, FR res ->
       fprintf oc "	cvtss2sd %a, %a\n" addressing addr freg res
-  | Mfloat64, FR res ->
+  | (Mfloat64 | Mfloat64al32), FR res ->
       fprintf oc "	movsd	%a, %a\n" addressing addr freg res
   | _ ->
       assert false
@@ -406,7 +406,7 @@ let print_builtin_vstore_common oc chunk addr src =
   | Mfloat32, FR src ->
       fprintf oc "	cvtsd2ss %a, %%xmm7\n" freg src;
       fprintf oc "	movss	%%xmm7, %a\n" addressing addr
-  | Mfloat64, FR src ->
+  | (Mfloat64 | Mfloat64al32), FR src ->
       fprintf oc "	movsd	%a, %a\n" freg src addressing addr
   | _ ->
       assert false
diff --git a/ia32/standard/Stacklayout.v b/ia32/standard/Stacklayout.v
index 063fb4f..be854fd 100644
--- a/ia32/standard/Stacklayout.v
+++ b/ia32/standard/Stacklayout.v
@@ -107,7 +107,7 @@ Remark frame_env_aligned:
   /\ (8 | fe.(fe_ofs_float_local))
   /\ (8 | fe.(fe_ofs_float_callee_save))
   /\ (4 | fe.(fe_ofs_retaddr))
-  /\ (4 | fe.(fe_stack_data))
+  /\ (8 | fe.(fe_stack_data))
   /\ (4 | fe.(fe_size)).
 Proof.
   intros.
@@ -128,8 +128,7 @@ Proof.
   set (x6 := x5 + 8 * bound_float_local b).
   assert (8 | x6). unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring.
   set (x7 := x6 + 8 * bound_float_callee_save b).
-  assert (4 | x7).
-    apply Zdivides_trans with 8. exists 2; auto.
+  assert (8 | x7).
     unfold x7. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
   set (x8 := align (x7 + bound_stack_data b) 4).
   assert (4 | x8). apply align_divides. omega.
diff --git a/powerpc/Asm.v b/powerpc/Asm.v
index ea5e416..5d815fd 100644
--- a/powerpc/Asm.v
+++ b/powerpc/Asm.v
@@ -647,9 +647,9 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Plbzx rd r1 r2 =>
       load2 Mint8unsigned rd r1 r2 rs m
   | Plfd rd cst r1 =>
-      load1 Mfloat64 rd cst r1 rs m
+      load1 Mfloat64al32 rd cst r1 rs m
   | Plfdx rd r1 r2 =>
-      load2 Mfloat64 rd r1 r2 rs m
+      load2 Mfloat64al32 rd r1 r2 rs m
   | Plfs rd cst r1 =>
       load1 Mfloat32 rd cst r1 rs m
   | Plfsx rd r1 r2 =>
@@ -712,9 +712,9 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pstbx rd r1 r2 =>
       store2 Mint8unsigned rd r1 r2 rs m
   | Pstfd rd cst r1 =>
-      store1 Mfloat64 rd cst r1 rs m
+      store1 Mfloat64al32 rd cst r1 rs m
   | Pstfdx rd r1 r2 =>
-      store2 Mfloat64 rd r1 r2 rs m
+      store2 Mfloat64al32 rd r1 r2 rs m
   | Pstfs rd cst r1 =>
       match store1 Mfloat32 rd cst r1 rs m with
       | OK rs' m' => OK (rs'#FPR13 <- Vundef) m'
@@ -814,7 +814,7 @@ Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
       extcall_arg rs m (S (Outgoing ofs Tint)) v
   | extcall_arg_float_stack: forall ofs bofs v,
       bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mfloat64 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v ->
+      Mem.loadv Mfloat64al32 m (Val.add (rs (IR GPR1)) (Vint (Int.repr bofs))) = Some v ->
       extcall_arg rs m (S (Outgoing ofs Tfloat)) v.
 
 Definition extcall_arguments
diff --git a/powerpc/Asmgen.v b/powerpc/Asmgen.v
index f9e4b2c..b34d939 100644
--- a/powerpc/Asmgen.v
+++ b/powerpc/Asmgen.v
@@ -485,7 +485,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) (k: code) :=
       | Mfloat32 =>
           transl_load_store
             (Plfs (freg_of dst)) (Plfsx (freg_of dst)) addr args GPR12 k
-      | Mfloat64 =>
+      | Mfloat64 | Mfloat64al32 =>
           transl_load_store
             (Plfd (freg_of dst)) (Plfdx (freg_of dst)) addr args GPR12 k
       end
@@ -510,7 +510,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction) (k: code) :=
       | Mfloat32 =>
           transl_load_store
             (Pstfs (freg_of src)) (Pstfsx (freg_of src)) addr args temp k
-      | Mfloat64 =>
+      | Mfloat64 | Mfloat64al32 =>
           transl_load_store
             (Pstfd (freg_of src)) (Pstfdx (freg_of src)) addr args temp k
       end
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index e7b7385..e99049c 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -827,9 +827,9 @@ Proof.
     rewrite <- H. apply eval_addressing_preserved. exact symbols_preserved.
   left; eapply exec_straight_steps; eauto with coqlib;
   destruct chunk; simpl; simpl in H6;
-  (* all cases but Mint8signed *)
+  (* all cases but Mint8signed and Mfloat64 *)
   try (eapply transl_load_correct; eauto;
-       intros; simpl; unfold preg_of; rewrite H6; auto).
+       intros; simpl; unfold preg_of; rewrite H6; auto; fail).
   (* Mint8signed *)
   exploit loadv_8_signed_unsigned; eauto. intros [v' [LOAD EQ]].
   assert (X1: forall (cst : constant) (r1 : ireg) (rs1 : regset),
@@ -849,6 +849,10 @@ Proof.
   eapply agree_set_twice_mireg; eauto.
   rewrite EQ. apply Val.sign_ext_lessdef. 
   generalize (ireg_val _ _ _ dst AG1 H6). rewrite Regmap.gss. auto.
+  (* Mfloat64 *)
+  exploit Mem.loadv_float64al32; eauto. intros. clear H0.
+  eapply transl_load_correct; eauto;
+  intros; simpl; unfold preg_of; rewrite H6; auto.
 Qed.
 
 Lemma storev_8_signed_unsigned:
@@ -883,6 +887,7 @@ Proof.
   rewrite <- (eval_addressing_preserved _ _ symbols_preserved) in H.
   left; eapply exec_straight_steps_bis; eauto with coqlib.
   destruct chunk; simpl; simpl in H6;
+  try (generalize (Mem.storev_float64al32 _ _ _ _ H0); intros);
   try (rewrite storev_8_signed_unsigned in H0);
   try (rewrite storev_16_signed_unsigned in H0);
   simpl; eapply transl_store_correct; eauto;
diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml
index 3331a1c..ac9a5da 100644
--- a/powerpc/PrintAsm.ml
+++ b/powerpc/PrintAsm.ml
@@ -350,7 +350,7 @@ let print_builtin_vload_common oc chunk base offset res =
       fprintf oc "	lwz	%a, %a(%a)\n" ireg res constant offset ireg base
   | Mfloat32, FR res ->
       fprintf oc "	lfs	%a, %a(%a)\n" freg res constant offset ireg base
-  | Mfloat64, FR res ->
+  | (Mfloat64 | Mfloat64al32), FR res ->
       fprintf oc "	lfd	%a, %a(%a)\n" freg res constant offset ireg base
   | _ ->
       assert false
@@ -383,7 +383,7 @@ let print_builtin_vstore_common oc chunk base offset src =
   | Mfloat32, FR src ->
       fprintf oc "	frsp	%a, %a\n" freg FPR13 freg src;
       fprintf oc "	stfs	%a, %a(%a)\n" freg FPR13 constant offset ireg base
-  | Mfloat64, FR src ->
+  | (Mfloat64 | Mfloat64al32), FR src ->
       fprintf oc "	stfd	%a, %a(%a)\n" freg src constant offset ireg base
   | _ ->
       assert false
diff --git a/powerpc/eabi/Stacklayout.v b/powerpc/eabi/Stacklayout.v
index 22a2826..cd4d9ae 100644
--- a/powerpc/eabi/Stacklayout.v
+++ b/powerpc/eabi/Stacklayout.v
@@ -113,7 +113,7 @@ Remark frame_env_aligned:
   /\ (8 | fe.(fe_ofs_float_local))
   /\ (8 | fe.(fe_ofs_float_callee_save))
   /\ (4 | fe.(fe_ofs_retaddr))
-  /\ (4 | fe.(fe_stack_data))
+  /\ (8 | fe.(fe_stack_data))
   /\ (16 | fe.(fe_size)).
 Proof.
   intros.
@@ -133,8 +133,7 @@ Proof.
   set (x5 := x4 + 8 * bound_float_local b).
   assert (8 | x5). unfold x5. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring.
   set (x6 := x5 + 8 * bound_float_callee_save b).
-  assert (4 | x6).
-    apply Zdivides_trans with 8. exists 2; auto.
+  assert (8 | x6).
     unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
   set (x7 := align (x6 + bound_stack_data b) 16).
   assert (16 | x7). unfold x7; apply align_divides. omega.
-- 
cgit v1.2.3