Big merge of the newregalloc-int64 branch. Lots of changes in two directions:

1- new register allocator (+ live range splitting, spilling&reloading, etc) based on a posteriori validation using the Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long long"). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
author: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2013-04-20 07:54:52 +0000
committer: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e> 2013-04-20 07:54:52 +0000
commit: 255cee09b71255051c2b40eae0c88bffce1f6f32 (patch)
tree: 7951b1b13e8fd5e525b9223e8be0580e83550f55 /ia32
parent: 6e5041958df01c56762e90770abd704b95a36e5d (diff)
15 files changed, 756 insertions, 918 deletions
diff --git a/ia32/Asm.v b/ia32/Asm.v
index df901db..2757061 100644
--- a/ia32/Asm.v
+++ b/ia32/Asm.v
@@ -163,6 +163,7 @@ Inductive instruction: Type :=
   | Pshr_ri (rd: ireg) (n: int)
   | Psar_rcl (rd: ireg)
   | Psar_ri (rd: ireg) (n: int)
+  | Pshld_ri (rd: ireg) (r1: ireg) (n: int)
   | Pror_ri (rd: ireg) (n: int)
   | Pcmp_rr (r1 r2: ireg) 
   | Pcmp_ri (r1: ireg) (n: int)
@@ -193,7 +194,7 @@ Inductive instruction: Type :=
   | Plabel(l: label)
   | Pallocframe(sz: Z)(ofs_ra ofs_link: int)
   | Pfreeframe(sz: Z)(ofs_ra ofs_link: int)
-  | Pbuiltin(ef: external_function)(args: list preg)(res: preg)
+  | Pbuiltin(ef: external_function)(args: list preg)(res: list preg)
   | Pannot(ef: external_function)(args: list annot_param)
 
 with annot_param : Type :=
@@ -232,6 +233,14 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset :=
   | r :: l' => undef_regs l' (rs#r <- Vundef)
   end.
 
+(** Assigning multiple registers *)
+
+Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset :=
+  match rl, vl with
+  | r1 :: rl', v1 :: vl' => set_regs rl' vl' (rs#r1 <- v1)
+  | _, _ => rs
+  end.
+
 Section RELSEM.
 
 (** Looking up instructions in a code sequence by position. *)
@@ -433,9 +442,10 @@ Definition exec_load (chunk: memory_chunk) (m: mem)
   end.
 
 Definition exec_store (chunk: memory_chunk) (m: mem)
-                      (a: addrmode) (rs: regset) (r1: preg) :=
+                      (a: addrmode) (rs: regset) (r1: preg)
+                      (destroyed: list preg) :=
   match Mem.storev chunk m (eval_addrmode a rs) (rs r1) with
-  | Some m' => Next (nextinstr_nf (if preg_eq r1 ST0 then rs#ST0 <- Vundef else rs)) m'
+  | Some m' => Next (nextinstr_nf (undef_regs destroyed rs)) m'
   | None => Stuck
   end.
 
@@ -470,7 +480,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pmov_rm rd a =>
       exec_load Mint32 m a rs rd
   | Pmov_mr a r1 =>
-      exec_store Mint32 m a rs r1
+      exec_store Mint32 m a rs r1 nil
   | Pmovd_fr rd r1 =>
       Next (nextinstr (rs#rd <- (rs r1))) m
   | Pmovd_rf rd r1 =>
@@ -482,7 +492,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pmovsd_fm rd a =>
       exec_load Mfloat64al32 m a rs rd
   | Pmovsd_mf a r1 =>
-      exec_store Mfloat64al32 m a rs r1
+      exec_store Mfloat64al32 m a rs r1 nil
   | Pfld_f r1 =>
       Next (nextinstr (rs#ST0 <- (rs r1))) m
   | Pfld_m a =>
@@ -490,14 +500,14 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pfstp_f rd =>
       Next (nextinstr (rs#rd <- (rs ST0) #ST0 <- Vundef)) m
   | Pfstp_m a =>
-      exec_store Mfloat64al32 m a rs ST0
+      exec_store Mfloat64al32 m a rs ST0 (ST0 :: nil)
   | Pxchg_rr r1 r2 =>
       Next (nextinstr (rs#r1 <- (rs r2) #r2 <- (rs r1))) m
   (** Moves with conversion *)
   | Pmovb_mr a r1 =>
-      exec_store Mint8unsigned m a rs r1
+      exec_store Mint8unsigned m a rs r1 nil
   | Pmovw_mr a r1 =>
-      exec_store Mint16unsigned m a rs r1
+      exec_store Mint16unsigned m a rs r1 nil
   | Pmovzb_rr rd r1 =>
       Next (nextinstr (rs#rd <- (Val.zero_ext 8 rs#r1))) m
   | Pmovzb_rm rd a =>
@@ -519,7 +529,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
   | Pcvtsd2ss_ff rd r1 =>
       Next (nextinstr (rs#rd <- (Val.singleoffloat rs#r1))) m
   | Pcvtsd2ss_mf a r1 =>
-      exec_store Mfloat32 m a rs r1
+      exec_store Mfloat32 m a rs r1 (FR XMM7 :: nil)
   | Pcvttsd2si_rf rd r1 =>
       Next (nextinstr (rs#rd <- (Val.maketotal (Val.intoffloat rs#r1)))) m
   | Pcvtsi2sd_fr rd r1 =>
@@ -573,6 +583,10 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       Next (nextinstr_nf (rs#rd <- (Val.shr rs#rd rs#ECX))) m
   | Psar_ri rd n =>
       Next (nextinstr_nf (rs#rd <- (Val.shr rs#rd (Vint n)))) m
+  | Pshld_ri rd r1 n =>
+      Next (nextinstr_nf
+              (rs#rd <- (Val.or (Val.shl rs#rd (Vint n))
+                                (Val.shru rs#r1 (Vint (Int.sub Int.iwordsize n)))))) m
   | Pror_ri rd n =>
       Next (nextinstr_nf (rs#rd <- (Val.ror rs#rd (Vint n)))) m
   | Pcmp_rr r1 r2 =>
@@ -590,7 +604,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       | None => Next (nextinstr (rs#rd <- Vundef)) m
       end
   | Psetcc c rd =>
-      Next (nextinstr (rs#ECX <- Vundef #rd <- (Val.of_optbool (eval_testcond c rs)))) m
+      Next (nextinstr (rs#rd <- (Val.of_optbool (eval_testcond c rs)))) m
   (** Arithmetic operations over floats *)
   | Paddd_ff rd r1 =>
       Next (nextinstr (rs#rd <- (Val.addf rs#rd rs#r1))) m
@@ -632,7 +646,7 @@ Definition exec_instr (c: code) (i: instruction) (rs: regset) (m: mem) : outcome
       | Vint n => 
           match list_nth_z tbl (Int.unsigned n) with
           | None => Stuck
-          | Some lbl => goto_label c lbl (rs #ECX <- Vundef #EDX <- Vundef) m
+          | Some lbl => goto_label c lbl rs m
           end
       | _ => Stuck
       end
@@ -686,6 +700,8 @@ Definition preg_of (r: mreg) : preg :=
   match r with
   | AX => IR EAX
   | BX => IR EBX
+  | CX => IR ECX
+  | DX => IR EDX
   | SI => IR ESI
   | DI => IR EDI
   | BP => IR EBP
@@ -695,10 +711,8 @@ Definition preg_of (r: mreg) : preg :=
   | X3 => FR XMM3
   | X4 => FR XMM4
   | X5 => FR XMM5
-  | IT1 => IR EDX
-  | IT2 => IR ECX
-  | FT1 => FR XMM6
-  | FT2 => FR XMM7
+  | X6 => FR XMM6
+  | X7 => FR XMM7
   | FP0 => ST0
   end.
 
@@ -706,24 +720,24 @@ Definition preg_of (r: mreg) : preg :=
     We exploit the calling conventions from module [Conventions], except that
     we use machine registers instead of locations. *)
 
+Definition chunk_of_type (ty: typ) :=
+  match ty with Tint => Mint32 | Tfloat => Mfloat64al32 | Tlong => Mint64 end.
+
 Inductive extcall_arg (rs: regset) (m: mem): loc -> val -> Prop :=
   | extcall_arg_reg: forall r,
       extcall_arg rs m (R r) (rs (preg_of r))
-  | extcall_arg_int_stack: forall ofs bofs v,
+  | extcall_arg_stack: forall ofs ty bofs v,
       bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mint32 m (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v ->
-      extcall_arg rs m (S (Outgoing ofs Tint)) v
-  | extcall_arg_float_stack: forall ofs bofs v,
-      bofs = Stacklayout.fe_ofs_arg + 4 * ofs ->
-      Mem.loadv Mfloat64al32 m (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v ->
-      extcall_arg rs m (S (Outgoing ofs Tfloat)) v.
+      Mem.loadv (chunk_of_type ty) m
+                (Val.add (rs (IR ESP)) (Vint (Int.repr bofs))) = Some v ->
+      extcall_arg rs m (S Outgoing ofs ty) v.
 
 Definition extcall_arguments
     (rs: regset) (m: mem) (sg: signature) (args: list val) : Prop :=
   list_forall2 (extcall_arg rs m) (loc_arguments sg) args.
 
-Definition loc_external_result (sg: signature) : preg :=
-  preg_of (loc_result sg).
+Definition loc_external_result (sg: signature) : list preg :=
+  map preg_of (loc_result sg).
 
 (** Extract the values of the arguments of an annotation. *)
 
@@ -753,33 +767,31 @@ Inductive step: state -> trace -> state -> Prop :=
       exec_instr c i rs m = Next rs' m' ->
       step (State rs m) E0 (State rs' m')
   | exec_step_builtin:
-      forall b ofs c ef args res rs m t v m',
+      forall b ofs c ef args res rs m t vl rs' m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal c) ->
       find_instr (Int.unsigned ofs) c = Some (Pbuiltin ef args res) ->
-      external_call ef ge (map rs args) m t v m' ->
-      step (State rs m) t
-           (State (nextinstr_nf(rs #EDX <- Vundef #ECX <- Vundef
-                                #XMM6 <- Vundef #XMM7 <- Vundef
-                                #ST0 <- Vundef
-                                #res <- v)) m')
+      external_call' ef ge (map rs args) m t vl m' ->
+      rs' = nextinstr_nf 
+             (set_regs res vl
+               (undef_regs (map preg_of (destroyed_by_builtin ef)) rs)) ->
+      step (State rs m) t (State rs' m')
   | exec_step_annot:
       forall b ofs c ef args rs m vargs t v m',
       rs PC = Vptr b ofs ->
       Genv.find_funct_ptr ge b = Some (Internal c) ->
       find_instr (Int.unsigned ofs) c = Some (Pannot ef args) ->
       annot_arguments rs m args vargs ->
-      external_call ef ge vargs m t v m' ->
+      external_call' ef ge vargs m t v m' ->
       step (State rs m) t
            (State (nextinstr rs) m')
   | exec_step_external:
       forall b ef args res rs m t rs' m',
       rs PC = Vptr b Int.zero ->
       Genv.find_funct_ptr ge b = Some (External ef) ->
-      external_call ef ge args m t res m' ->
       extcall_arguments rs m (ef_sig ef) args ->
-      rs' = (rs#(loc_external_result (ef_sig ef)) <- res
-               #PC <- (rs RA)) ->
+      external_call' ef ge args m t res m' ->
+      rs' = (set_regs (loc_external_result (ef_sig ef)) res rs) #PC <- (rs RA) ->
       step (State rs m) t (State rs' m').
 
 End RELSEM.
@@ -847,21 +859,21 @@ Ltac Equalities :=
   discriminate.
   discriminate.
   inv H11. 
-  exploit external_call_determ. eexact H4. eexact H11. intros [A B].
+  exploit external_call_determ'. eexact H4. eexact H9. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
   inv H12.
   assert (vargs0 = vargs) by (eapply annot_arguments_determ; eauto). subst vargs0.
-  exploit external_call_determ. eexact H5. eexact H13. intros [A B].
+  exploit external_call_determ'. eexact H5. eexact H13. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
   assert (args0 = args) by (eapply extcall_arguments_determ; eauto). subst args0.
-  exploit external_call_determ. eexact H3. eexact H8. intros [A B].
+  exploit external_call_determ'. eexact H4. eexact H9. intros [A B].
   split. auto. intros. destruct B; auto. subst. auto.
 (* trace length *)
   red; intros; inv H; simpl.
   omega.
-  eapply external_call_trace_length; eauto.
-  eapply external_call_trace_length; eauto.
-  eapply external_call_trace_length; eauto.
+  inv H3. eapply external_call_trace_length; eauto.
+  inv H4. eapply external_call_trace_length; eauto.
+  inv H3. eapply external_call_trace_length; eauto.
 (* initial states *)
   inv H; inv H0. f_equal. congruence.
 (* final no step *)
@@ -882,17 +894,3 @@ Definition data_preg (r: preg) : bool :=
   | RA => false
   end.
 
-Definition nontemp_preg (r: preg) : bool :=
-  match r with
-  | PC => false
-  | IR ECX => false
-  | IR EDX => false
-  | IR _ => true
-  | FR XMM6 => false
-  | FR XMM7 => false
-  | FR _ => true
-  | ST0 => false
-  | CR _ => false
-  | RA => false
-  end.
-
diff --git a/ia32/Asmgen.v b/ia32/Asmgen.v
index 6e3ccf8..78f7d6e 100644
--- a/ia32/Asmgen.v
+++ b/ia32/Asmgen.v
@@ -17,6 +17,7 @@ Require Import Errors.
 Require Import AST.
 Require Import Integers.
 Require Import Floats.
+Require Import Memdata.
 Require Import Op.
 Require Import Locations.
 Require Import Mach.
@@ -54,59 +55,12 @@ Definition mk_mov (rd rs: preg) (k: code) : res code :=
   | _, _ => Error(msg "Asmgen.mk_mov")
   end.
 
-Definition mk_shift (shift_instr: ireg -> instruction)
-                    (r1 r2: ireg) (k: code) : res code :=
-  if ireg_eq r2 ECX then
-    OK (shift_instr r1 :: k)
-  else if ireg_eq r1 ECX then
-    OK (Pxchg_rr r2 ECX :: shift_instr r2 :: Pxchg_rr r2 ECX :: k)
-  else
-    OK (Pmov_rr ECX r2 :: shift_instr r1 :: k).
-
-Definition mk_mov2 (src1 dst1 src2 dst2: ireg) (k: code) : code :=
-  if ireg_eq src1 dst1 then
-    Pmov_rr dst2 src2 :: k
-  else if ireg_eq src2 dst2 then
-    Pmov_rr dst1 src1 :: k
-  else if ireg_eq src2 dst1 then
-    if ireg_eq src1 dst2 then
-      Pxchg_rr src1 src2 :: k
-    else
-      Pmov_rr dst2 src2 :: Pmov_rr dst1 src1 :: k
-  else
-    Pmov_rr dst1 src1 :: Pmov_rr dst2 src2 :: k.
-
-Definition mk_div (div_instr: ireg -> instruction)
-                  (r1 r2: ireg) (k: code) : res code :=
-  if ireg_eq r1 EAX then
-    if ireg_eq r2 EDX then
-      OK (Pmov_rr ECX EDX :: div_instr ECX :: k)
-    else
-      OK (div_instr r2 :: k)
-  else
-      OK (Pmovd_fr XMM7 EAX ::
-          mk_mov2 r1 EAX r2 ECX 
-            (div_instr ECX :: Pmov_rr r1 EAX :: Pmovd_rf EAX XMM7 :: k)).
-
-Definition mk_mod (div_instr: ireg -> instruction)
-                  (r1 r2: ireg) (k: code) : res code :=
-  if ireg_eq r1 EAX then
-    if ireg_eq r2 EDX then
-      OK (Pmov_rr ECX EDX :: div_instr ECX :: Pmov_rr EAX EDX :: k)
-    else
-      OK (div_instr r2 :: Pmov_rr EAX EDX :: k)
-  else
-      OK (Pmovd_fr XMM7 EAX ::
-          mk_mov2 r1 EAX r2 ECX 
-            (div_instr ECX :: Pmov_rr r1 EDX :: Pmovd_rf EAX XMM7 :: k)).
-
-Definition mk_shrximm (r: ireg) (n: int) (k: code) : res code :=
-  let tmp := if ireg_eq r ECX then EDX else ECX in
+Definition mk_shrximm (n: int) (k: code) : res code :=
   let p := Int.sub (Int.shl Int.one n) Int.one in
-  OK (Ptest_rr r r ::
-      Plea tmp (Addrmode (Some r) None (inl _ p)) ::
-      Pcmov Cond_l r tmp ::
-      Psar_ri r n :: k).
+  OK (Ptest_rr EAX EAX ::
+      Plea ECX (Addrmode (Some EAX) None (inl _ p)) ::
+      Pcmov Cond_l EAX ECX ::
+      Psar_ri EAX n :: k).
 
 Definition low_ireg (r: ireg) : bool :=
   match r with
@@ -118,7 +72,7 @@ Definition mk_intconv (mk: ireg -> ireg -> instruction) (rd rs: ireg) (k: code)
   if low_ireg rs then
     OK (mk rd rs :: k)
   else
-    OK (Pmov_rr EDX rs :: mk rd EDX :: k).
+    OK (Pmov_rr EAX rs :: mk rd EAX :: k).
 
 Definition addressing_mentions (addr: addrmode) (r: ireg) : bool :=
   match addr with Addrmode base displ const =>
@@ -130,11 +84,11 @@ Definition mk_smallstore (sto: addrmode -> ireg ->instruction)
                          (addr: addrmode) (rs: ireg) (k: code) :=
   if low_ireg rs then
     OK (sto addr rs :: k)
-  else if addressing_mentions addr ECX then
-    OK (Plea ECX addr :: Pmov_rr EDX rs ::
-        sto (Addrmode (Some ECX) None (inl _ Int.zero)) EDX :: k)
+  else if addressing_mentions addr EAX then
+    OK (Plea ECX addr :: Pmov_rr EAX rs ::
+        sto (Addrmode (Some ECX) None (inl _ Int.zero)) EAX :: k)
   else
-    OK (Pmov_rr ECX rs :: sto addr ECX :: k).
+    OK (Pmov_rr EAX rs :: sto addr EAX :: k).
 
 (** Accessing slots in the stack frame.  *)
 
@@ -149,6 +103,8 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
       | ST0  => OK (Pfld_m (Addrmode (Some base) None (inl _ ofs)) :: k)
       | _ => Error (msg "Asmgen.loadind")
       end
+  | Tlong =>
+      Error (msg "Asmgen.loadind")
   end.
 
 Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
@@ -162,6 +118,8 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
       | ST0  => OK (Pfstp_m (Addrmode (Some base) None (inl _ ofs)) :: k)
       | _ => Error (msg "Asmgen.loadind")
       end
+  | Tlong =>
+      Error (msg "Asmgen.storeind")
   end.
 
 (** Translation of addressing modes *)
@@ -284,19 +242,25 @@ Definition testcond_for_condition (cond: condition) : extcond :=
 
 (** Acting upon extended conditions. *)
 
-Definition mk_setcc (cond: extcond) (rd: ireg) (k: code) :=
+Definition mk_setcc_base (cond: extcond) (rd: ireg) (k: code) :=
   match cond with
-  | Cond_base c => Psetcc c rd :: k
+  | Cond_base c =>
+      Psetcc c rd :: k
   | Cond_and c1 c2 =>
-      if ireg_eq rd EDX
-      then Psetcc c1 EDX :: Psetcc c2 ECX :: Pand_rr EDX ECX :: k
-      else Psetcc c1 EDX :: Psetcc c2 rd  :: Pand_rr rd EDX :: k
+      if ireg_eq rd EAX
+      then Psetcc c1 EAX :: Psetcc c2 ECX :: Pand_rr EAX ECX :: k
+      else Psetcc c1 EAX :: Psetcc c2 rd  :: Pand_rr rd EAX :: k
   | Cond_or c1 c2 => 
-      if ireg_eq rd EDX
-      then Psetcc c1 EDX :: Psetcc c2 ECX :: Por_rr EDX ECX :: k
-      else Psetcc c1 EDX :: Psetcc c2 rd  :: Por_rr rd EDX :: k
+      if ireg_eq rd EAX
+      then Psetcc c1 EAX :: Psetcc c2 ECX :: Por_rr EAX ECX :: k
+      else Psetcc c1 EAX :: Psetcc c2 rd  :: Por_rr rd EAX :: k
   end.
 
+Definition mk_setcc (cond: extcond) (rd: ireg) (k: code) :=
+  if low_ireg rd
+  then mk_setcc_base cond rd k
+  else mk_setcc_base cond EAX (Pmov_rr rd EAX :: k).
+
 Definition mk_jcc (cond: extcond) (lbl: label) (k: code) :=
   match cond with
   | Cond_base c => Pjcc c lbl :: k
@@ -330,91 +294,106 @@ Definition transl_op
   | Ocast16unsigned, a1 :: nil =>
       do r1 <- ireg_of a1; do r <- ireg_of res; mk_intconv Pmovzw_rr r r1 k
   | Oneg, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pneg r :: k)
   | Osub, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Psub_rr r r2 :: k)
   | Omul, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pimul_rr r r2 :: k)
   | Omulimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pimul_ri r n :: k)
   | Odiv, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_div Pidiv r r2 k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq a2 CX);
+      assertion (mreg_eq res AX);
+      OK(Pidiv ECX :: k)
   | Odivu, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_div Pdiv r r2 k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq a2 CX);
+      assertion (mreg_eq res AX);
+      OK(Pdiv ECX :: k)
   | Omod, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_mod Pidiv r r2 k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq a2 CX);
+      assertion (mreg_eq res DX);
+      OK(Pidiv ECX :: k)
   | Omodu, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_mod Pdiv r r2 k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq a2 CX);
+      assertion (mreg_eq res DX);
+      OK(Pdiv ECX :: k)
   | Oand, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pand_rr r r2 :: k)
   | Oandimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pand_ri r n :: k)
   | Oor, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Por_rr r r2 :: k)
   | Oorimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Por_ri r n :: k)
   | Oxor, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pxor_rr r r2 :: k)
   | Oxorimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pxor_ri r n :: k)
   | Oshl, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_shift Psal_rcl r r2 k
+      assertion (mreg_eq a1 res);
+      assertion (mreg_eq a2 CX);
+      do r <- ireg_of res; OK (Psal_rcl r :: k)
   | Oshlimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Psal_ri r n :: k)
   | Oshr, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_shift Psar_rcl r r2 k
+      assertion (mreg_eq a1 res);
+      assertion (mreg_eq a2 CX);
+      do r <- ireg_of res; OK (Psar_rcl r :: k)
   | Oshrimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Psar_ri r n :: k)
   | Oshru, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; do r2 <- ireg_of a2; mk_shift Pshr_rcl r r2 k
+      assertion (mreg_eq a1 res);
+      assertion (mreg_eq a2 CX);
+      do r <- ireg_of res; OK (Pshr_rcl r :: k)
   | Oshruimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pshr_ri r n :: k)
   | Oshrximm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
-      do r <- ireg_of res; mk_shrximm r n k
+      assertion (mreg_eq a1 AX);
+      assertion (mreg_eq res AX);
+      mk_shrximm n k
   | Ororimm n, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- ireg_of res; OK (Pror_ri r n :: k)
+  | Oshldimm n, a1 :: a2 :: nil =>
+      assertion (mreg_eq a1 res);
+      do r <- ireg_of res; do r2 <- ireg_of a2; OK (Pshld_ri r r2 n :: k)
   | Olea addr, _ =>
       do am <- transl_addressing addr args; do r <- ireg_of res;
       OK (Plea r am :: k)
   | Onegf, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; OK (Pnegd r :: k)
   | Oabsf, a1 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; OK (Pabsd r :: k)
   | Oaddf, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Paddd_ff r r2 :: k)
   | Osubf, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Psubd_ff r r2 :: k)
   | Omulf, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Pmuld_ff r r2 :: k)
   | Odivf, a1 :: a2 :: nil =>
-      do x <- assertion (mreg_eq a1 res);
+      assertion (mreg_eq a1 res);
       do r <- freg_of res; do r2 <- freg_of a2; OK (Pdivd_ff r r2 :: k)
   | Osingleoffloat, a1 :: nil =>
       do r <- freg_of res; do r1 <- freg_of a1; OK (Pcvtsd2ss_ff r r1 :: k)
@@ -450,6 +429,8 @@ Definition transl_load (chunk: memory_chunk)
       do r <- freg_of dest; OK(Pcvtss2sd_fm r am :: k)
   | Mfloat64 | Mfloat64al32 =>
       do r <- freg_of dest; OK(Pmovsd_fm r am :: k)
+  | Mint64 =>
+      Error (msg "Asmgen.transl_load")
   end.
 
 Definition transl_store (chunk: memory_chunk)
@@ -467,6 +448,8 @@ Definition transl_store (chunk: memory_chunk)
       do r <- freg_of src; OK(Pcvtsd2ss_mf am r :: k)
   | Mfloat64 | Mfloat64al32 =>
       do r <- freg_of src; OK(Pmovsd_mf am r :: k)
+  | Mint64 =>
+      Error (msg "Asmgen.transl_store")
   end.
 
 (** Translation of arguments to annotations *)
@@ -491,7 +474,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
         loadind EDX ofs ty dst k
       else
         (do k1 <- loadind EDX ofs ty dst k;
-         loadind ESP f.(fn_link_ofs) Tint IT1 k1)
+         loadind ESP f.(fn_link_ofs) Tint DX k1)
   | Mop op args res =>
       transl_op op args res k
   | Mload chunk addr args dst =>
@@ -521,7 +504,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
       OK (Pfreeframe f.(fn_stacksize) f.(fn_retaddr_ofs) f.(fn_link_ofs) :: 
           Pret :: k)
   | Mbuiltin ef args res =>
-      OK (Pbuiltin ef (List.map preg_of args) (preg_of res) :: k)
+      OK (Pbuiltin ef (List.map preg_of args) (List.map preg_of res) :: k)
   | Mannot ef args =>
       OK (Pannot ef (map transl_annot_param args) :: k)
   end.
@@ -531,7 +514,7 @@ Definition transl_instr (f: Mach.function) (i: Mach.instruction)
 Definition it1_is_parent (before: bool) (i: Mach.instruction) : bool :=
   match i with
   | Msetstack src ofs ty => before
-  | Mgetparam ofs ty dst => negb (mreg_eq dst IT1)
+  | Mgetparam ofs ty dst => negb (mreg_eq dst DX)
   | _ => false
   end.
 
diff --git a/ia32/Asmgenproof.v b/ia32/Asmgenproof.v
index e43392a..ca0fd18 100644
--- a/ia32/Asmgenproof.v
+++ b/ia32/Asmgenproof.v
@@ -138,49 +138,8 @@ Proof.
 Qed.
 Hint Resolve mk_mov_label: labels.
 
-Remark mk_shift_label:
-  forall f r1 r2 k c, mk_shift f r1 r2 k = OK c -> 
-  (forall r, nolabel (f r)) ->
-  tail_nolabel k c.
-Proof.
-  unfold mk_shift; intros. TailNoLabel.
-Qed.
-Hint Resolve mk_shift_label: labels.
-
-Remark mk_mov2_label:
-  forall r1 r2 r3 r4 k,
-  tail_nolabel k (mk_mov2 r1 r2 r3 r4 k).
-Proof.
-  intros; unfold mk_mov2. 
-  destruct (ireg_eq r1 r2); TailNoLabel.
-  destruct (ireg_eq r3 r4); TailNoLabel.
-  destruct (ireg_eq r3 r2); TailNoLabel.
-  destruct (ireg_eq r1 r4); TailNoLabel.
-Qed.
-Hint Resolve mk_mov2_label: labels.
-
-Remark mk_div_label:
-  forall f r1 r2 k c, mk_div f r1 r2 k = OK c -> 
-  (forall r, nolabel (f r)) ->
-  tail_nolabel k c.
-Proof.
-  unfold mk_div; intros. TailNoLabel. 
-  eapply tail_nolabel_trans; TailNoLabel.
-Qed.
-Hint Resolve mk_div_label: labels.
-
-Remark mk_mod_label:
-  forall f r1 r2 k c, mk_mod f r1 r2 k = OK c -> 
-  (forall r, nolabel (f r)) ->
-  tail_nolabel k c.
-Proof.
-  unfold mk_mod; intros. TailNoLabel. 
-  eapply tail_nolabel_trans; TailNoLabel.
-Qed.
-Hint Resolve mk_mod_label: labels.
-
 Remark mk_shrximm_label:
-  forall r n k c, mk_shrximm r n k = OK c -> tail_nolabel k c.
+  forall n k c, mk_shrximm n k = OK c -> tail_nolabel k c.
 Proof.
   intros. monadInv H; TailNoLabel.
 Qed.
@@ -212,6 +171,7 @@ Proof.
   unfold loadind; intros. destruct ty.
   TailNoLabel.
   destruct (preg_of dst); TailNoLabel.
+  discriminate.
 Qed.
 
 Remark storeind_label:
@@ -222,13 +182,23 @@ Proof.
   unfold storeind; intros. destruct ty.
   TailNoLabel.
   destruct (preg_of src); TailNoLabel.
+  discriminate.
+Qed.
+
+Remark mk_setcc_base_label:
+  forall xc rd k,
+  tail_nolabel k (mk_setcc_base xc rd k).
+Proof.
+  intros. destruct xc; simpl; destruct (ireg_eq rd EAX); TailNoLabel.
 Qed.
 
 Remark mk_setcc_label:
   forall xc rd k,
   tail_nolabel k (mk_setcc xc rd k).
 Proof.
-  intros. destruct xc; simpl; destruct (ireg_eq rd EDX); TailNoLabel.
+  intros. unfold mk_setcc. destruct (low_ireg rd).
+  apply mk_setcc_base_label.
+  eapply tail_nolabel_trans. apply mk_setcc_base_label. TailNoLabel.
 Qed.
 
 Remark mk_jcc_label:
@@ -534,7 +504,7 @@ Proof.
   rewrite (sp_val _ _ _ AG) in A. intros. simpl in TR.
   exploit storeind_correct; eauto. intros [rs' [P Q]].
   exists rs'; split. eauto.
-  split. unfold undef_setstack. eapply agree_undef_move; eauto.
+  split. eapply agree_undef_regs; eauto. 
   simpl; intros. rewrite Q; auto with asmgen.
 
 - (* Mgetparam *)
@@ -547,9 +517,9 @@ Proof.
   intros [v' [C D]].
 Opaque loadind.
   left; eapply exec_straight_steps; eauto; intros. 
-  assert (DIFF: negb (mreg_eq dst IT1) = true -> IR EDX <> preg_of dst).
-    intros. change (IR EDX) with (preg_of IT1). red; intros. 
-    unfold proj_sumbool in H1. destruct (mreg_eq dst IT1); try discriminate.
+  assert (DIFF: negb (mreg_eq dst DX) = true -> IR EDX <> preg_of dst).
+    intros. change (IR EDX) with (preg_of DX). red; intros. 
+    unfold proj_sumbool in H1. destruct (mreg_eq dst DX); try discriminate.
     elim n. eapply preg_of_injective; eauto.
   destruct ep; simpl in TR.
 (* EDX contains parent *)
@@ -577,10 +547,7 @@ Opaque loadind.
   exploit transl_op_correct; eauto. intros [rs2 [P [Q R]]]. 
   assert (S: Val.lessdef v (rs2 (preg_of res))) by (eapply Val.lessdef_trans; eauto).
   exists rs2; split. eauto.
-  split.
-  unfold undef_op.
-  destruct op; try (eapply agree_set_undef_mreg; eauto).
-  eapply agree_set_undef_move_mreg; eauto. 
+  split. eapply agree_set_undef_mreg; eauto.
   simpl; congruence.
 
 - (* Mload *)
@@ -606,7 +573,7 @@ Opaque loadind.
   intros. simpl in TR. 
   exploit transl_store_correct; eauto. intros [rs2 [P Q]]. 
   exists rs2; split. eauto.
-  split. eapply agree_exten_temps; eauto. 
+  split. eapply agree_undef_regs; eauto.  
   simpl; congruence.
 
 - (* Mcall *)
@@ -700,22 +667,26 @@ Opaque loadind.
   inv AT. monadInv H3. 
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H2); intro NOOV.
-  exploit external_call_mem_extends; eauto. eapply preg_vals; eauto.
+  exploit external_call_mem_extends'; eauto. eapply preg_vals; eauto.
   intros [vres' [m2' [A [B [C D]]]]].
   left. econstructor; split. apply plus_one. 
   eapply exec_step_builtin. eauto. eauto.
   eapply find_instr_tail; eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
+  eauto.
   econstructor; eauto.
   instantiate (2 := tf); instantiate (1 := x).
   unfold nextinstr_nf, nextinstr. rewrite Pregmap.gss.
-  simpl undef_regs. repeat rewrite Pregmap.gso; auto with asmgen. 
+  rewrite undef_regs_other. rewrite set_pregs_other_2. rewrite undef_regs_other_2.
   rewrite <- H0. simpl. econstructor; eauto.
   eapply code_tail_next_int; eauto.
-  apply agree_nextinstr_nf. eapply agree_set_undef_mreg; eauto. 
-  rewrite Pregmap.gss. auto. 
-  intros. Simplifs. 
+  rewrite preg_notin_charact. intros. auto with asmgen.
+  rewrite preg_notin_charact. intros. auto with asmgen.
+  auto with asmgen.
+  simpl; intros. intuition congruence.
+  apply agree_nextinstr_nf. eapply agree_set_mregs; auto.
+  eapply agree_undef_regs; eauto. intros; apply undef_regs_other_2; auto. 
   congruence.
 
 - (* Mannot *)
@@ -723,12 +694,12 @@ Opaque loadind.
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H3); intro NOOV.
   exploit annot_arguments_match; eauto. intros [vargs' [P Q]]. 
-  exploit external_call_mem_extends; eauto.
+  exploit external_call_mem_extends'; eauto.
   intros [vres' [m2' [A [B [C D]]]]].
   left. econstructor; split. apply plus_one. 
   eapply exec_step_annot. eauto. eauto.
   eapply find_instr_tail; eauto. eauto.
-  eapply external_call_symbols_preserved; eauto.
+  eapply external_call_symbols_preserved'; eauto.
   exact symbols_preserved. exact varinfo_preserved.
   eapply match_states_intro with (ep := false); eauto with coqlib.
   unfold nextinstr. rewrite Pregmap.gss. 
@@ -762,7 +733,7 @@ Opaque loadind.
 (* simple jcc *)
   exists (Pjcc c1 lbl); exists k; exists rs'.
   split. eexact A.
-  split. eapply agree_exten_temps; eauto. 
+  split. eapply agree_exten; eauto. 
   simpl. rewrite B. auto.
 (* jcc; jcc *)
   destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
@@ -771,13 +742,13 @@ Opaque loadind.
   (* first jcc jumps *)
   exists (Pjcc c1 lbl); exists (Pjcc c2 lbl :: k); exists rs'.
   split. eexact A.
-  split. eapply agree_exten_temps; eauto. 
+  split. eapply agree_exten; eauto. 
   simpl. rewrite TC1. auto.
   (* second jcc jumps *)
   exists (Pjcc c2 lbl); exists k; exists (nextinstr rs').
   split. eapply exec_straight_trans. eexact A. 
   eapply exec_straight_one. simpl. rewrite TC1. auto. auto.
-  split. eapply agree_exten_temps; eauto.
+  split. eapply agree_exten; eauto.
   intros; Simplifs.
   simpl. rewrite eval_testcond_nextinstr. rewrite TC2.
   destruct b2; auto || discriminate.
@@ -787,7 +758,7 @@ Opaque loadind.
   destruct (andb_prop _ _ H3). subst. 
   exists (Pjcc2 c1 c2 lbl); exists k; exists rs'.
   split. eexact A.
-  split. eapply agree_exten_temps; eauto. 
+  split. eapply agree_exten; eauto. 
   simpl. rewrite TC1; rewrite TC2; auto.
 
 - (* Mcond false *)
@@ -801,7 +772,7 @@ Opaque loadind.
   econstructor; split.
   eapply exec_straight_trans. eexact A. 
   apply exec_straight_one. simpl. rewrite B. eauto. auto. 
-  split. apply agree_nextinstr. eapply agree_exten_temps; eauto.
+  split. apply agree_nextinstr. eapply agree_exten; eauto.
   simpl; congruence.
 (* jcc ; jcc *)
   destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
@@ -811,7 +782,7 @@ Opaque loadind.
   eapply exec_straight_trans. eexact A. 
   eapply exec_straight_two. simpl. rewrite TC1. eauto. auto. 
   simpl. rewrite eval_testcond_nextinstr. rewrite TC2. eauto. auto. auto.
-  split. apply agree_nextinstr. apply agree_nextinstr. eapply agree_exten_temps; eauto.
+  split. apply agree_nextinstr. apply agree_nextinstr. eapply agree_exten; eauto.
   simpl; congruence.
 (* jcc2 *)
   destruct (eval_testcond c1 rs') as [b1|] eqn:TC1;
@@ -822,7 +793,7 @@ Opaque loadind.
   rewrite TC1; rewrite TC2. 
   destruct b1. simpl in *. subst b2. auto. auto.
   auto.
-  split. apply agree_nextinstr. eapply agree_exten_temps; eauto.
+  split. apply agree_nextinstr. eapply agree_exten; eauto.
   rewrite H1; congruence.
 
 - (* Mjumptable *)
@@ -830,8 +801,7 @@ Opaque loadind.
   inv AT. monadInv H6. 
   exploit functions_transl; eauto. intro FN.
   generalize (transf_function_no_overflow _ _ H5); intro NOOV.
-  exploit find_label_goto_label. eauto. eauto. instantiate (2 := rs0#ECX <- Vundef #EDX <- Vundef). 
-  repeat (rewrite Pregmap.gso by auto with asmgen). eauto. eauto. 
+  exploit find_label_goto_label; eauto. 
   intros [tc' [rs' [A [B C]]]].
   exploit ireg_val; eauto. rewrite H. intros LD; inv LD.
   left; econstructor; split.
@@ -839,7 +809,8 @@ Opaque loadind.
   eapply find_instr_tail; eauto. 
   simpl. rewrite <- H9. unfold Mach.label in H0; unfold label; rewrite H0. eauto.
   econstructor; eauto. 
-  eapply agree_exten_temps; eauto. intros. rewrite C; auto with asmgen. Simplifs. 
+Transparent destroyed_by_jumptable. 
+  simpl. eapply agree_exten; eauto. intros. rewrite C; auto with asmgen.
   congruence.
 
 - (* Mreturn *)
@@ -890,8 +861,9 @@ Opaque loadind.
   subst x. unfold fn_code. eapply code_tail_next_int. rewrite list_length_z_cons. omega. 
   constructor. 
   apply agree_nextinstr. eapply agree_change_sp; eauto.
-  apply agree_exten_temps with rs0; eauto.
-  intros; Simplifs.
+Transparent destroyed_at_function_entry.
+  apply agree_undef_regs with rs0; eauto.
+  simpl; intros. apply Pregmap.gso; auto with asmgen. tauto. 
   congruence. 
   intros. Simplifs. eapply agree_sp; eauto.
 
@@ -900,17 +872,15 @@ Opaque loadind.
   intros [tf [A B]]. simpl in B. inv B.
   exploit extcall_arguments_match; eauto. 
   intros [args' [C D]].
-  exploit external_call_mem_extends; eauto.
+  exploit external_call_mem_extends'; eauto.
   intros [res' [m2' [P [Q [R S]]]]].
   left; econstructor; split.
   apply plus_one. eapply exec_step_external; eauto. 
-  eapply external_call_symbols_preserved; eauto. 
+  eapply external_call_symbols_preserved'; eauto. 
   exact symbols_preserved. exact varinfo_preserved.
   econstructor; eauto.
   unfold loc_external_result.
-  eapply agree_set_mreg; eauto. 
-  rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss. auto. 
-  intros; Simplifs.
+  apply agree_set_other; auto. apply agree_set_mregs; auto.
 
 - (* return *)
   inv STACKS. simpl in *.
@@ -942,10 +912,9 @@ Lemma transf_final_states:
   forall st1 st2 r, 
   match_states st1 st2 -> Mach.final_state st1 r -> Asm.final_state st2 r.
 Proof.
-  intros. inv H0. inv H. inv STACKS. constructor.
-  auto. 
-  compute in H1. 
-  generalize (preg_val _ _ _ AX AG). rewrite H1. intros LD; inv LD. auto.
+  intros. inv H0. inv H. constructor. auto. 
+  compute in H1. inv H1.  
+  generalize (preg_val _ _ _ AX AG). rewrite H2. intros LD; inv LD. auto.
 Qed.
 
 Theorem transf_program_correct:
diff --git a/ia32/Asmgenproof1.v b/ia32/Asmgenproof1.v
index e3e62cc..303337e 100644
--- a/ia32/Asmgenproof1.v
+++ b/ia32/Asmgenproof1.v
@@ -37,10 +37,11 @@ Lemma agree_nextinstr_nf:
   agree ms sp rs -> agree ms sp (nextinstr_nf rs).
 Proof.
   intros. unfold nextinstr_nf. apply agree_nextinstr. 
-  apply agree_undef_regs. auto.
+  apply agree_undef_nondata_regs. auto.
   intro. simpl. ElimOrEq; auto. 
 Qed.
 
+(*
 Lemma agree_undef_move:
   forall ms sp rs rs',
   agree ms sp rs ->
@@ -71,6 +72,7 @@ Proof.
   congruence. auto. 
   intros. rewrite Pregmap.gso; auto. 
 Qed.
+*)
 
 (** Useful properties of the PC register. *)
 
@@ -95,13 +97,6 @@ Proof.
   intros. apply nextinstr_nf_inv. destruct r; auto || discriminate.
 Qed.
 
-Lemma nextinstr_nf_inv2:
-  forall r rs, 
-  nontemp_preg r = true -> (nextinstr_nf rs)#r = rs#r.
-Proof.
-  intros. apply nextinstr_nf_inv1; auto with asmgen.
-Qed.
-
 Lemma nextinstr_nf_set_preg:
   forall rs m v,
   (nextinstr_nf (rs#(preg_of m) <- v))#PC = Val.add rs#PC Vone.
@@ -166,180 +161,7 @@ Proof.
   split. Simplifs. intros; Simplifs.
 Qed.
 
-(** Smart constructor for shifts *)
-
-Lemma mk_shift_correct:
-  forall sinstr ssem r1 r2 k c rs1 m,
-  mk_shift sinstr r1 r2 k = OK c ->
-  (forall r c rs m,
-   exec_instr ge c (sinstr r) rs m = Next (nextinstr_nf (rs#r <- (ssem rs#r rs#ECX))) m) ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#r1 = ssem rs1#r1 rs1#r2
-  /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r.
-Proof.
-  unfold mk_shift; intros. 
-  destruct (ireg_eq r2 ECX).
-(* fast case *)
-  monadInv H.
-  econstructor. split. apply exec_straight_one. apply H0. auto. 
-  split. Simplifs. intros; Simplifs.
-(* xchg case *)
-  destruct (ireg_eq r1 ECX); monadInv H. 
-  econstructor. split. eapply exec_straight_three.
-  simpl; eauto. 
-  apply H0.
-  simpl; eauto.
-  auto. auto. auto. 
-  split. Simplifs. f_equal. Simplifs.
-  intros; Simplifs. destruct (preg_eq r r2). subst r. Simplifs. Simplifs.
-(* general case *)
-  econstructor. split. eapply exec_straight_two. simpl; eauto. apply H0. 
-  auto. auto. 
-  split. Simplifs. f_equal. Simplifs. intros. Simplifs. 
-Qed.
-
-(** Parallel move 2 *)
-
-Lemma mk_mov2_correct:
-  forall src1 dst1 src2 dst2 k rs m,
-  dst1 <> dst2 ->
-  exists rs',
-     exec_straight ge fn (mk_mov2 src1 dst1 src2 dst2 k) rs m k rs' m
-  /\ rs'#dst1 = rs#src1
-  /\ rs'#dst2 = rs#src2
-  /\ forall r, r <> PC -> r <> dst1 -> r <> dst2 -> rs'#r = rs#r.
-Proof.
-  intros. unfold mk_mov2. 
-(* single moves *)
-  destruct (ireg_eq src1 dst1). subst.
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  intuition Simplifs.
-  destruct (ireg_eq src2 dst2). subst.
-  econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  intuition Simplifs.
-(* xchg *)
-  destruct (ireg_eq src2 dst1). destruct (ireg_eq src1 dst2).
-  subst. econstructor; split. apply exec_straight_one. simpl; eauto. auto.
-  intuition Simplifs.
-(* move 2; move 1 *)
-  subst. econstructor; split. eapply exec_straight_two.
-  simpl; eauto. simpl; eauto. auto. auto.
-  intuition Simplifs.
-(* move 1; move 2*)
-  subst. econstructor; split. eapply exec_straight_two.
-  simpl; eauto. simpl; eauto. auto. auto.
-  intuition Simplifs.
-Qed.
-
-(** Smart constructor for division *)
-
-Lemma mk_div_correct:
-  forall mkinstr dsem msem r1 r2 k c (rs1: regset) m vq vr,
-  mk_div mkinstr r1 r2 k = OK c ->
-  (forall r c rs m,
-   exec_instr ge c (mkinstr r) rs m =
-      let vn := rs#EAX in let vd := (rs#EDX <- Vundef)#r in
-      match dsem vn vd, msem vn vd with
-      | Some vq, Some vr => Next (nextinstr_nf (rs#EAX <- vq #EDX <- vr)) m
-      | _, _ => Stuck
-      end) ->
-  dsem rs1#r1 rs1#r2 = Some vq ->
-  msem rs1#r1 rs1#r2 = Some vr ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#r1 = vq
-  /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r.
-Proof.
-  unfold mk_div; intros. 
-  destruct (ireg_eq r1 EAX). destruct (ireg_eq r2 EDX); monadInv H.
-(* r1=EAX r2=EDX *)
-  econstructor. split. eapply exec_straight_two. simpl; eauto. 
-  rewrite H0.
-  change (nextinstr rs1 # ECX <- (rs1 EDX) EAX) with (rs1#EAX). 
-  change ((nextinstr rs1 # ECX <- (rs1 EDX)) # EDX <- Vundef ECX) with (rs1#EDX).
-  rewrite H1. rewrite H2. eauto. auto. auto.
-  intuition Simplifs.
-(* r1=EAX r2<>EDX *)
-  econstructor. split. eapply exec_straight_one. rewrite H0. 
-  replace (rs1 # EDX <- Vundef r2) with (rs1 r2). rewrite H1; rewrite H2. eauto. 
-  symmetry. Simplifs. auto. 
-  intuition Simplifs.
-(* r1 <> EAX *)
-  monadInv H.
-  set (rs2 := nextinstr (rs1#XMM7 <- (rs1#EAX))).
-  exploit (mk_mov2_correct r1 EAX r2 ECX). congruence. instantiate (1 := rs2). 
-  intros [rs3 [A [B [C D]]]].
-  econstructor; split.
-  apply exec_straight_step with rs2 m; auto.
-  eapply exec_straight_trans. eexact A. 
-  eapply exec_straight_three.
-  rewrite H0. replace (rs3 EAX) with (rs1 r1). replace (rs3 # EDX <- Vundef ECX) with (rs1 r2).
-  rewrite H1; rewrite H2. eauto.  
-  simpl; eauto. simpl; eauto.
-  auto. auto. auto. 
-  split. Simplifs. 
-  intros. destruct (preg_eq r EAX). subst.
-  Simplifs. rewrite D; auto with asmgen. 
-  Simplifs. rewrite D; auto with asmgen. unfold rs2; Simplifs.
-Qed.
-
-(** Smart constructor for modulus *)
-
-Lemma mk_mod_correct:
- forall mkinstr dsem msem r1 r2 k c (rs1: regset) m vq vr,
-  mk_mod mkinstr r1 r2 k = OK c ->
-  (forall r c rs m,
-   exec_instr ge c (mkinstr r) rs m =
-      let vn := rs#EAX in let vd := (rs#EDX <- Vundef)#r in
-      match dsem vn vd, msem vn vd with
-      | Some vq, Some vr => Next (nextinstr_nf (rs#EAX <- vq #EDX <- vr)) m
-      | _, _ => Stuck
-      end) ->
-  dsem rs1#r1 rs1#r2 = Some vq ->
-  msem rs1#r1 rs1#r2 = Some vr ->
-  exists rs2,
-     exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#r1 = vr
-  /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r.
-Proof.
-  unfold mk_mod; intros. 
-  destruct (ireg_eq r1 EAX). destruct (ireg_eq r2 EDX); monadInv H.
-(* r1=EAX r2=EDX *)
-  econstructor. split. eapply exec_straight_three.
-  simpl; eauto.
-  rewrite H0.
-  change (nextinstr rs1 # ECX <- (rs1 EDX) EAX) with (rs1#EAX). 
-  change ((nextinstr rs1 # ECX <- (rs1 EDX)) # EDX <- Vundef ECX) with (rs1#EDX).
-  rewrite H1. rewrite H2. eauto.
-  simpl; eauto.
-  auto. auto. auto. 
-  intuition Simplifs.
-(* r1=EAX r2<>EDX *)
-  econstructor. split. eapply exec_straight_two. rewrite H0. 
-  replace (rs1 # EDX <- Vundef r2) with (rs1 r2). rewrite H1; rewrite H2. eauto. 
-  symmetry. Simplifs.
-  simpl; eauto. 
-  auto. auto.
-  intuition Simplifs.
-(* r1 <> EAX *)
-  monadInv H.
-  set (rs2 := nextinstr (rs1#XMM7 <- (rs1#EAX))).
-  exploit (mk_mov2_correct r1 EAX r2 ECX). congruence. instantiate (1 := rs2). 
-  intros [rs3 [A [B [C D]]]].
-  econstructor; split.
-  apply exec_straight_step with rs2 m; auto.
-  eapply exec_straight_trans. eexact A. 
-  eapply exec_straight_three.
-  rewrite H0. replace (rs3 EAX) with (rs1 r1). replace (rs3 # EDX <- Vundef ECX) with (rs1 r2).
-  rewrite H1; rewrite H2. eauto.  
-  simpl; eauto. simpl; eauto.
-  auto. auto. auto. 
-  split. Simplifs. 
-  intros. destruct (preg_eq r EAX). subst.
-  Simplifs. rewrite D; auto with asmgen.
-  Simplifs. rewrite D; auto with asmgen. unfold rs2; Simplifs.
-Qed.
+(** Properties of division *)
 
 Remark divs_mods_exist:
   forall v1 v2,
@@ -368,46 +190,42 @@ Qed.
 (** Smart constructor for [shrx] *)
 
 Lemma mk_shrximm_correct:
-  forall r1 n k c (rs1: regset) v m,
-  mk_shrximm r1 n k = OK c ->
-  Val.shrx (rs1#r1) (Vint n) = Some v ->
+  forall n k c (rs1: regset) v m,
+  mk_shrximm n k = OK c ->
+  Val.shrx (rs1#EAX) (Vint n) = Some v ->
   exists rs2,
      exec_straight ge fn c rs1 m k rs2 m
-  /\ rs2#r1 = v
-  /\ forall r, nontemp_preg r = true -> r <> r1 -> rs2#r = rs1#r.
+  /\ rs2#EAX = v
+  /\ forall r, data_preg r = true -> r <> EAX -> r <> ECX -> rs2#r = rs1#r.
 Proof.
   unfold mk_shrximm; intros. inv H.
   exploit Val.shrx_shr; eauto. intros [x [y [A [B C]]]].
   inversion B; clear B; subst y; subst v; clear H0.
-  set (tmp := if ireg_eq r1 ECX then EDX else ECX).
-  assert (TMP1: tmp <> r1). unfold tmp; destruct (ireg_eq r1 ECX); congruence. 
-  assert (TMP2: nontemp_preg tmp = false). unfold tmp; destruct (ireg_eq r1 ECX); auto. 
   set (tnm1 := Int.sub (Int.shl Int.one n) Int.one).
   set (x' := Int.add x tnm1).
   set (rs2 := nextinstr (compare_ints (Vint x) (Vint Int.zero) rs1 m)).
-  set (rs3 := nextinstr (rs2#tmp <- (Vint x'))).
-  set (rs4 := nextinstr (if Int.lt x Int.zero then rs3#r1 <- (Vint x') else rs3)).
-  set (rs5 := nextinstr_nf (rs4#r1 <- (Val.shr rs4#r1 (Vint n)))).
-  assert (rs3#r1 = Vint x). unfold rs3. Simplifs. 
-  assert (rs3#tmp = Vint x'). unfold rs3. Simplifs. 
+  set (rs3 := nextinstr (rs2#ECX <- (Vint x'))).
+  set (rs4 := nextinstr (if Int.lt x Int.zero then rs3#EAX <- (Vint x') else rs3)).
+  set (rs5 := nextinstr_nf (rs4#EAX <- (Val.shr rs4#EAX (Vint n)))).
+  assert (rs3#EAX = Vint x). unfold rs3. Simplifs.
+  assert (rs3#ECX = Vint x'). unfold rs3. Simplifs.
   exists rs5. split. 
   apply exec_straight_step with rs2 m. simpl. rewrite A. simpl. rewrite Int.and_idem. auto. auto.
   apply exec_straight_step with rs3 m. simpl. 
-  change (rs2 r1) with (rs1 r1). rewrite A. simpl. 
+  change (rs2 EAX) with (rs1 EAX). rewrite A. simpl. 
   rewrite (Int.add_commut Int.zero tnm1). rewrite Int.add_zero. auto. auto.
   apply exec_straight_step with rs4 m. simpl. 
   change (rs3 SOF) with (rs2 SOF). unfold rs2. rewrite nextinstr_inv; auto with asmgen. 
   unfold compare_ints. rewrite Pregmap.gso; auto with asmgen. rewrite Pregmap.gss.
-  unfold Val.cmp. simpl. unfold rs4. destruct (Int.lt x Int.zero); simpl; auto. rewrite H0; auto.
+  unfold Val.cmp. simpl. unfold rs4. destruct (Int.lt x Int.zero); simpl; auto.
   unfold rs4. destruct (Int.lt x Int.zero); simpl; auto.
   apply exec_straight_one. auto. auto.
-  split. unfold rs5. Simplifs. unfold rs4. Simplifs.
-  f_equal. destruct (Int.lt x Int.zero).
-  Simplifs. rewrite A. auto. Simplifs. congruence. 
-  intros. unfold rs5; Simplifs. unfold rs4; Simplifs. 
-  transitivity (rs3#r).
-  destruct (Int.lt x Int.zero). Simplifs. auto. 
-  unfold rs3; Simplifs. unfold rs2; Simplifs. unfold compare_ints; Simplifs.
+  split. unfold rs5. Simplifs. unfold rs4. rewrite nextinstr_inv; auto with asmgen.
+  destruct (Int.lt x Int.zero). rewrite Pregmap.gss. rewrite A; auto. rewrite A; rewrite H; auto.
+  intros. unfold rs5. Simplifs. unfold rs4. Simplifs. 
+  transitivity (rs3#r). destruct (Int.lt x Int.zero). Simplifs. auto. 
+  unfold rs3. Simplifs. unfold rs2. Simplifs.  
+  unfold compare_ints. Simplifs.  
 Qed.
 
 (** Smart constructor for integer conversions *)
@@ -420,14 +238,14 @@ Lemma mk_intconv_correct:
   exists rs2,
      exec_straight ge fn c rs1 m k rs2 m
   /\ rs2#rd = sem rs1#rs
-  /\ forall r, nontemp_preg r = true -> r <> rd -> rs2#r = rs1#r.
+  /\ forall r, data_preg r = true -> r <> rd -> r <> EAX -> rs2#r = rs1#r.
 Proof.
   unfold mk_intconv; intros. destruct (low_ireg rs); monadInv H.
   econstructor. split. apply exec_straight_one. rewrite H0. eauto. auto. 
-  intuition Simplifs.
+  split. Simplifs. intros. Simplifs.
   econstructor. split. eapply exec_straight_two. 
-  simpl. eauto. apply H0. auto. auto.
-  intuition Simplifs.
+  simpl. eauto. apply H0. auto. auto. 
+  split. Simplifs. intros. Simplifs.  
 Qed.
 
 (** Smart constructor for small stores *)
@@ -449,10 +267,10 @@ Lemma mk_smallstore_correct:
   mk_smallstore sto addr r k = OK c ->
   Mem.storev chunk m1 (eval_addrmode ge addr rs1) (rs1 r) = Some m2 ->
   (forall c r addr rs m,
-   exec_instr ge c (sto addr r) rs m = exec_store ge chunk m addr rs r) ->
+   exec_instr ge c (sto addr r) rs m = exec_store ge chunk m addr rs r nil) ->
   exists rs2,
      exec_straight ge fn c rs1 m1 k rs2 m2
-  /\ forall r, nontemp_preg r = true -> rs2#r = rs1#r.
+  /\ forall r, data_preg r = true -> r <> EAX /\ r <> ECX -> rs2#r = rs1#r.
 Proof.
   unfold mk_smallstore; intros. 
   remember (low_ireg r) as low. destruct low.
@@ -461,17 +279,17 @@ Proof.
   unfold exec_store. rewrite H0. eauto. auto.
   intros; Simplifs.
 (* high reg *)
-   remember (addressing_mentions addr ECX) as mentions. destruct mentions; monadInv H.
-(* ECX is mentioned. *)
+   remember (addressing_mentions addr EAX) as mentions. destruct mentions; monadInv H.
+(* EAX is mentioned. *)
   assert (r <> ECX). red; intros; subst r; discriminate. 
   set (rs2 := nextinstr (rs1#ECX <- (eval_addrmode ge addr rs1))).
-  set (rs3 := nextinstr (rs2#EDX <- (rs1 r))).
+  set (rs3 := nextinstr (rs2#EAX <- (rs1 r))).
   econstructor; split.
   apply exec_straight_three with rs2 m1 rs3 m1. 
   simpl. auto. 
   simpl. replace (rs2 r) with (rs1 r). auto. symmetry. unfold rs2; Simplifs. 
   rewrite H1. unfold exec_store. simpl. rewrite Int.add_zero. 
-  change (rs3 EDX) with (rs1 r).
+  change (rs3 EAX) with (rs1 r).
   change (rs3 ECX) with (eval_addrmode ge addr rs1).
   replace (Val.add (eval_addrmode ge addr rs1) (Vint Int.zero))
      with (eval_addrmode ge addr rs1).
@@ -479,18 +297,18 @@ Proof.
   destruct (eval_addrmode ge addr rs1); simpl in H0; try discriminate.
   simpl. rewrite Int.add_zero; auto. 
   auto. auto. auto. 
-  intros. Simplifs. unfold rs3; Simplifs. unfold rs2; Simplifs. 
-(* ECX is not mentioned *)
-  set (rs2 := nextinstr (rs1#ECX <- (rs1 r))).
+  intros. destruct H3. Simplifs. unfold rs3; Simplifs. unfold rs2; Simplifs. 
+(* EAX is not mentioned *)
+  set (rs2 := nextinstr (rs1#EAX <- (rs1 r))).
   econstructor; split.
   apply exec_straight_two with rs2 m1.
   simpl. auto.
   rewrite H1. unfold exec_store. 
-  rewrite (addressing_mentions_correct addr ECX rs2 rs1); auto.
-  change (rs2 ECX) with (rs1 r). rewrite H0. eauto. 
+  rewrite (addressing_mentions_correct addr EAX rs2 rs1); auto.
+  change (rs2 EAX) with (rs1 r). rewrite H0. eauto. 
   intros. unfold rs2; Simplifs.
   auto. auto.
-  intros. rewrite dec_eq_false. Simplifs. unfold rs2; Simplifs. congruence.
+  intros. destruct H2. simpl. Simplifs. unfold rs2; Simplifs.
 Qed.
 
 (** Accessing slots in the stack frame *)
@@ -521,6 +339,8 @@ Proof.
   unfold exec_load. rewrite H1; rewrite H0; auto. 
   unfold exec_load. rewrite H1; rewrite H0; auto.
   intuition Simplifs.
+  (* long *)
+  inv H.
 Qed.
 
 Lemma storeind_correct:
@@ -549,7 +369,9 @@ Proof.
   intros. apply nextinstr_nf_inv1; auto.
   econstructor; split. apply exec_straight_one.
   simpl. unfold exec_store. rewrite H1; rewrite H0. eauto. auto.
-  intros. Simplifs. rewrite dec_eq_true. Simplifs. 
+  intros. simpl. Simplifs.
+  (* long *)
+  inv H.
 Qed.
 
 (** Translation of addressing modes *)
@@ -608,7 +430,7 @@ Lemma compare_ints_spec:
      rs'#ZF = Val.cmpu (Mem.valid_pointer m) Ceq v1 v2
   /\ rs'#CF = Val.cmpu (Mem.valid_pointer m) Clt v1 v2
   /\ rs'#SOF = Val.cmp Clt v1 v2
-  /\ (forall r, nontemp_preg r = true -> rs'#r = rs#r).
+  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
 Proof.
   intros. unfold rs'; unfold compare_ints.
   split. auto. 
@@ -737,7 +559,7 @@ Lemma compare_floats_spec:
      rs'#ZF = Val.of_bool (negb (Float.cmp Cne n1 n2))
   /\ rs'#CF = Val.of_bool (negb (Float.cmp Cge n1 n2))
   /\ rs'#PF = Val.of_bool (negb (Float.cmp Ceq n1 n2 || Float.cmp Clt n1 n2 || Float.cmp Cgt n1 n2))
-  /\ (forall r, nontemp_preg r = true -> rs'#r = rs#r).
+  /\ (forall r, data_preg r = true -> rs'#r = rs#r).
 Proof.
   intros. unfold rs'; unfold compare_floats.
   split. auto. 
@@ -890,7 +712,7 @@ Lemma transl_cond_correct:
      | None => True
      | Some b => eval_extcond (testcond_for_condition cond) rs' = Some b
      end
-  /\ forall r, nontemp_preg r = true -> rs'#r = rs r.
+  /\ forall r, data_preg r = true -> rs'#r = rs r.
 Proof.
   unfold transl_cond; intros. 
   destruct cond; repeat (destruct args; try discriminate); monadInv H.
@@ -968,19 +790,19 @@ Proof.
   intros. unfold eval_testcond. repeat rewrite Pregmap.gso; auto with asmgen.
 Qed.
 
-Lemma mk_setcc_correct:
+Lemma mk_setcc_base_correct:
   forall cond rd k rs1 m,
   exists rs2,
-  exec_straight ge fn (mk_setcc cond rd k) rs1 m k rs2 m
+  exec_straight ge fn (mk_setcc_base cond rd k) rs1 m k rs2 m
   /\ rs2#rd = Val.of_optbool(eval_extcond cond rs1)
-  /\ forall r, nontemp_preg r = true -> r <> rd -> rs2#r = rs1#r.
+  /\ forall r, data_preg r = true -> r <> EAX /\ r <> ECX -> r <> rd -> rs2#r = rs1#r.
 Proof.
   intros. destruct cond; simpl in *.
-(* base *)
+- (* base *)
   econstructor; split.
-  apply exec_straight_one. simpl; eauto. auto.
-  intuition Simplifs.
-(* or *)
+  apply exec_straight_one. simpl; eauto. auto. 
+  split. Simplifs. intros; Simplifs. 
+- (* or *)
   assert (Val.of_optbool
     match eval_testcond c1 rs1 with
     | Some b1 =>
@@ -996,7 +818,7 @@ Proof.
   destruct b; auto.
   auto.
   rewrite H; clear H.
-  destruct (ireg_eq rd EDX).
+  destruct (ireg_eq rd EAX).
   subst rd. econstructor; split.
   eapply exec_straight_three.
   simpl; eauto.
@@ -1010,9 +832,9 @@ Proof.
   simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto. 
   simpl. eauto. 
   auto. auto. auto.
-  split. Simplifs. rewrite Val.or_commut. f_equal; Simplifs. 
-  intros. Simplifs.
-(* and *)
+  split. Simplifs. rewrite Val.or_commut. decEq; Simplifs.
+  intros. destruct H0; Simplifs. 
+- (* and *)
   assert (Val.of_optbool
     match eval_testcond c1 rs1 with
     | Some b1 =>
@@ -1023,12 +845,14 @@ Proof.
     | None => None
     end =
     Val.and (Val.of_optbool (eval_testcond c1 rs1)) (Val.of_optbool (eval_testcond c2 rs1))).
-  destruct (eval_testcond c1 rs1). destruct (eval_testcond c2 rs1). 
-  destruct b; destruct b0; auto.
-  destruct b; auto.
-  auto.
+  {
+    destruct (eval_testcond c1 rs1). destruct (eval_testcond c2 rs1). 
+    destruct b; destruct b0; auto.
+    destruct b; auto.
+    auto.
+  }
   rewrite H; clear H.
-  destruct (ireg_eq rd EDX).
+  destruct (ireg_eq rd EAX).
   subst rd. econstructor; split.
   eapply exec_straight_three.
   simpl; eauto.
@@ -1042,10 +866,25 @@ Proof.
   simpl. rewrite eval_testcond_nextinstr. repeat rewrite eval_testcond_set_ireg. eauto. 
   simpl. eauto. 
   auto. auto. auto.
-  split. Simplifs. rewrite Val.and_commut. f_equal; Simplifs. 
-  intros. Simplifs.
+  split. Simplifs. rewrite Val.and_commut. decEq; Simplifs.
+  intros. destruct H0; Simplifs. 
 Qed.
 
+Lemma mk_setcc_correct:
+  forall cond rd k rs1 m,
+  exists rs2,
+  exec_straight ge fn (mk_setcc cond rd k) rs1 m k rs2 m
+  /\ rs2#rd = Val.of_optbool(eval_extcond cond rs1)
+  /\ forall r, data_preg r = true -> r <> EAX /\ r <> ECX -> r <> rd -> rs2#r = rs1#r.
+Proof.
+  intros. unfold mk_setcc. destruct (low_ireg rd).
+- apply mk_setcc_base_correct.
+- exploit mk_setcc_base_correct. intros [rs2 [A [B C]]].
+  econstructor; split. eapply exec_straight_trans. eexact A. apply exec_straight_one. 
+    simpl. eauto. simpl. auto.
+  intuition Simplifs. 
+Qed. 
+
 (** Translation of arithmetic operations. *)
 
 Ltac ArgsInv :=
@@ -1053,7 +892,8 @@ Ltac ArgsInv :=
   | [ H: Error _ = OK _ |- _ ] => discriminate
   | [ H: match ?args with nil => _ | _ :: _ => _ end = OK _ |- _ ] => destruct args; ArgsInv
   | [ H: bind _ _ = OK _ |- _ ] => monadInv H; ArgsInv
-  | [ H: assertion _ = OK _ |- _ ] => monadInv H; subst; ArgsInv
+  | [ H: match _ with left _ => _ | right _ => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
+  | [ H: match _ with true => _ | false => assertion_failed end = OK _ |- _ ] => monadInv H; ArgsInv
   | [ H: ireg_of _ = OK _ |- _ ] => simpl in *; rewrite (ireg_of_eq _ _ H) in *; clear H; ArgsInv
   | [ H: freg_of _ = OK _ |- _ ] => simpl in *; rewrite (freg_of_eq _ _ H) in *; clear H; ArgsInv
   | _ => idtac
@@ -1071,25 +911,22 @@ Lemma transl_op_correct:
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ Val.lessdef v rs'#(preg_of res)
-  /\ forall r, 
-     match op with Omove => data_preg r = true /\ r <> ST0 | _ => nontemp_preg r = true end ->
-     r <> preg_of res -> rs' r = rs r.
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r.
 Proof.
+Transparent destroyed_by_op.
   intros until v; intros TR EV.
   assert (SAME:
   (exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ rs'#(preg_of res) = v
-  /\ forall r, 
-     match op with Omove => data_preg r = true /\ r <> ST0 | _ => nontemp_preg r = true end ->
-     r <> preg_of res -> rs' r = rs r) ->
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r) ->
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ Val.lessdef v rs'#(preg_of res)
-  /\ forall r, 
-     match op with Omove => data_preg r = true /\ r <> ST0 | _ => nontemp_preg r = true end ->
-     r <> preg_of res -> rs' r = rs r).
-  intros [rs' [A [B C]]]. subst v. exists rs'; auto. 
+  /\ forall r, data_preg r = true -> r <> preg_of res -> preg_notin r (destroyed_by_op op) -> rs' r = rs r).
+  {
+    intros [rs' [A [B C]]]. subst v. exists rs'; auto.
+  } 
 
   destruct op; simpl in TR; ArgsInv; simpl in EV; try (inv EV); try (apply SAME; TranslOp; fail).
 (* move *)
@@ -1109,32 +946,34 @@ Proof.
   apply SAME. eapply mk_intconv_correct; eauto.
 (* div *)
   apply SAME.
-  specialize (divs_mods_exist (rs x0) (rs x1)). rewrite H0. 
-  destruct (Val.mods (rs x0) (rs x1)) as [vr|] eqn:?; intros; try contradiction.
-  eapply mk_div_correct with (dsem := Val.divs) (msem := Val.mods); eauto.
+  specialize (divs_mods_exist (rs EAX) (rs ECX)). rewrite H0. 
+  destruct (Val.mods (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
+  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
+  auto. auto.
+  simpl in H3. destruct H3; Simplifs.
 (* divu *)
   apply SAME.
-  specialize (divu_modu_exist (rs x0) (rs x1)). rewrite H0. 
-  destruct (Val.modu (rs x0) (rs x1)) as [vr|] eqn:?; intros; try contradiction.
-  eapply mk_div_correct with (dsem := Val.divu) (msem := Val.modu); eauto.
+  specialize (divu_modu_exist (rs EAX) (rs ECX)). rewrite H0. 
+  destruct (Val.modu (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
+  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
+  auto. auto.
+  simpl in H3. destruct H3; Simplifs.
 (* mod *)
   apply SAME.
-  specialize (divs_mods_exist (rs x0) (rs x1)). rewrite H0. 
-  destruct (Val.divs (rs x0) (rs x1)) as [vq|] eqn:?; intros; try contradiction.
-  eapply mk_mod_correct with (dsem := Val.divs) (msem := Val.mods); eauto.
+  specialize (divs_mods_exist (rs EAX) (rs ECX)). rewrite H0. 
+  destruct (Val.divs (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
+  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
+  auto. auto.
+  simpl in H3. destruct H3; Simplifs.
 (* modu *)
   apply SAME.
-  specialize (divu_modu_exist (rs x0) (rs x1)). rewrite H0. 
-  destruct (Val.divu (rs x0) (rs x1)) as [vq|] eqn:?; intros; try contradiction.
-  eapply mk_mod_correct with (dsem := Val.divu) (msem := Val.modu); eauto.
-(* shl *)
-  apply SAME. eapply mk_shift_correct; eauto. 
-(* shr *)
-  apply SAME. eapply mk_shift_correct; eauto. 
+  specialize (divu_modu_exist (rs EAX) (rs ECX)). rewrite H0. 
+  destruct (Val.divu (rs EAX) (rs ECX)) as [vr|] eqn:?; intros; try contradiction.
+  TranslOp. change (rs#EDX<-Vundef ECX) with (rs#ECX). rewrite H0; rewrite Heqo. eauto.
+  auto. auto.
+  simpl in H3. destruct H3; Simplifs.
 (* shrximm *)
   apply SAME. eapply mk_shrximm_correct; eauto.
-(* shru *)
-  apply SAME. eapply mk_shift_correct; eauto.
 (* lea *)
   exploit transl_addressing_mode_correct; eauto. intros EA.
   TranslOp. rewrite nextinstr_inv; auto with asmgen. rewrite Pregmap.gss; auto.
@@ -1163,7 +1002,7 @@ Lemma transl_load_correct:
   exists rs',
      exec_straight ge fn c rs m k rs' m
   /\ rs'#(preg_of dest) = v
-  /\ forall r, nontemp_preg r = true -> r <> preg_of dest -> rs'#r = rs#r.
+  /\ forall r, data_preg r = true -> r <> preg_of dest -> rs'#r = rs#r.
 Proof.
   unfold transl_load; intros. monadInv H. 
   exploit transl_addressing_mode_correct; eauto. intro EA.
@@ -1191,7 +1030,7 @@ Lemma transl_store_correct:
   Mem.storev chunk m a (rs (preg_of src)) = Some m' ->
   exists rs',
      exec_straight ge fn c rs m k rs' m'
-  /\ forall r, nontemp_preg r = true -> rs'#r = rs#r.
+  /\ forall r, data_preg r = true -> preg_notin r (destroyed_by_store chunk addr) -> rs'#r = rs#r.
 Proof.
   unfold transl_store; intros. monadInv H. 
   exploit transl_addressing_mode_correct; eauto. intro EA.
@@ -1223,7 +1062,7 @@ Proof.
 (* float32 *)
   econstructor; split.
   apply exec_straight_one. simpl. unfold exec_store. rewrite H1. eauto. auto.
-  intros. Simplifs.
+  intros. Transparent destroyed_by_store. simpl in H2. simpl. Simplifs.
 (* float64 *)
   econstructor; split.
   apply exec_straight_one. simpl. unfold exec_store. erewrite Mem.storev_float64al32; eauto. auto.
diff --git a/ia32/ConstpropOp.vp b/ia32/ConstpropOp.vp
index e6ba98a..fea0afd 100644
--- a/ia32/ConstpropOp.vp
+++ b/ia32/ConstpropOp.vp
@@ -31,6 +31,7 @@ Inductive approx : Type :=
                              no compile-time information is available. *)
   | I: int -> approx     (** A known integer value. *)
   | F: float -> approx   (** A known floating-point value. *)
+  | L: int64 -> approx   (** A know 64-bit integer value. *)
   | G: ident -> int -> approx
                          (** The value is the address of the given global
                              symbol plus the given integer offset. *)
@@ -130,6 +131,11 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) :=
   | Oshru, I n1 :: I n2 :: nil => if Int.ltu n2 Int.iwordsize then I(Int.shru n1 n2) else Unknown
   | Oshruimm n, I n1 :: nil => if Int.ltu n Int.iwordsize then I(Int.shru n1 n) else Unknown
   | Ororimm n, I n1 :: nil => if Int.ltu n Int.iwordsize then I(Int.ror n1 n) else Unknown
+  | Oshldimm n, I n1 :: I n2 :: nil =>
+      let n' := Int.sub Int.iwordsize n in
+      if Int.ltu n Int.iwordsize && Int.ltu n' Int.iwordsize
+      then I(Int.or (Int.shl n1 n) (Int.shru n2 n')) 
+      else Unknown
   | Olea mode, vl => eval_static_addressing mode vl
   | Onegf, F n1 :: nil => F(Float.neg n1)
   | Oabsf, F n1 :: nil => F(Float.abs n1)
@@ -140,6 +146,9 @@ Nondetfunction eval_static_operation (op: operation) (vl: list approx) :=
   | Osingleoffloat, F n1 :: nil => F(Float.singleoffloat n1)
   | Ointoffloat, F n1 :: nil => eval_static_intoffloat n1
   | Ofloatofint, I n1 :: nil => if propagate_float_constants tt then F(Float.floatofint n1) else Unknown
+  | Omakelong, I n1 :: I n2 :: nil => L(Int64.ofwords n1 n2)
+  | Olowlong, L n :: nil => I(Int64.loword n)
+  | Ohighlong, L n :: nil => I(Int64.hiword n)
   | Ocmp c, vl => eval_static_condition_val c vl
   | _, _ => Unknown
   end.
diff --git a/ia32/ConstpropOpproof.v b/ia32/ConstpropOpproof.v
index d792d8a..b6c3cdc 100644
--- a/ia32/ConstpropOpproof.v
+++ b/ia32/ConstpropOpproof.v
@@ -44,9 +44,10 @@ Definition val_match_approx (a: approx) (v: val) : Prop :=
   | Unknown => True
   | I p => v = Vint p
   | F p => v = Vfloat p
+  | L p => v = Vlong p
   | G symb ofs => v = symbol_address ge symb ofs
   | S ofs => v = Val.add sp (Vint ofs)
-  | _ => False
+  | Novalue => False
   end.
 
 Inductive val_list_match_approx: list approx -> list val -> Prop :=
@@ -64,6 +65,8 @@ Ltac SimplVMA :=
       simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (F _) ?v) |- _ =>
       simpl in H; (try subst v); SimplVMA
+  | H: (val_match_approx (L _) ?v) |- _ =>
+      simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (G _ _) ?v) |- _ =>
       simpl in H; (try subst v); SimplVMA
   | H: (val_match_approx (S _) ?v) |- _ =>
@@ -156,6 +159,8 @@ Proof.
   destruct (Int.ltu n2 Int.iwordsize); simpl; auto.
   destruct (Int.ltu n Int.iwordsize); simpl; auto.
   destruct (Int.ltu n Int.iwordsize); simpl; auto.
+  destruct (Int.ltu n Int.iwordsize);
+  destruct (Int.ltu (Int.sub Int.iwordsize n) Int.iwordsize); simpl; auto.
   eapply eval_static_addressing_correct; eauto.
   unfold eval_static_intoffloat.
   destruct (Float.intoffloat n1) eqn:?; simpl in H0; inv H0.
diff --git a/ia32/Machregs.v b/ia32/Machregs.v
index df96393..3b84aa5 100644
--- a/ia32/Machregs.v
+++ b/ia32/Machregs.v
@@ -10,34 +10,32 @@
 (*                                                                     *)
 (* *********************************************************************)
 
+Require Import String.
 Require Import Coqlib.
 Require Import Maps.
 Require Import AST.
+Require Import Integers.
+Require Import Op.
 
 (** ** Machine registers *)
 
 (** The following type defines the machine registers that can be referenced
   as locations.  These include:
-- Integer registers that can be allocated to RTL pseudo-registers ([Rxx]).
-- Floating-point registers that can be allocated to RTL pseudo-registers 
-  ([Fxx]).
-- Two integer registers, not allocatable, reserved as temporaries for
-  spilling and reloading ([IT1, IT2]).
-- Two float registers, not allocatable, reserved as temporaries for
-  spilling and reloading ([FT1, FT2]).
+- Integer registers that can be allocated to RTL pseudo-registers.
+- Floating-point registers that can be allocated to RTL pseudo-registers.
+- The special [FP0] register denoting the top of the X87 float stack.
 
   The type [mreg] does not include special-purpose or reserved
   machine registers such as the stack pointer and the condition codes. *)
 
 Inductive mreg: Type :=
   (** Allocatable integer regs *)
-  | AX: mreg | BX: mreg | SI: mreg | DI: mreg | BP: mreg
+  | AX: mreg | BX: mreg | CX: mreg | DX: mreg | SI: mreg | DI: mreg | BP: mreg
   (** Allocatable float regs *)
-  | X0: mreg | X1: mreg | X2: mreg | X3: mreg | X4: mreg | X5: mreg
-  (** Integer temporaries *)
-  | IT1: mreg (* DX *) | IT2: mreg (* CX *)
-  (** Float temporaries *)
-  | FT1: mreg (* X6 *) | FT2: mreg (* X7 *) | FP0: mreg (* top of FP stack *).
+  | X0: mreg | X1: mreg | X2: mreg | X3: mreg 
+  | X4: mreg | X5: mreg | X6: mreg | X7: mreg
+  (** Special float reg *)
+  | FP0: mreg (**r top of x87 FP stack *).
 
 Lemma mreg_eq: forall (r1 r2: mreg), {r1 = r2} + {r1 <> r2}.
 Proof. decide equality. Defined.
@@ -45,28 +43,24 @@ Global Opaque mreg_eq.
 
 Definition mreg_type (r: mreg): typ :=
   match r with
-  | AX => Tint | BX => Tint | SI => Tint | DI => Tint | BP => Tint
-  (** Allocatable float regs *)
-  | X0 => Tfloat | X1 => Tfloat | X2 => Tfloat
-  | X3 => Tfloat | X4 => Tfloat | X5 => Tfloat
-  (** Integer temporaries *)
-  | IT1 => Tint | IT2 => Tint
-  (** Float temporaries *)
-  | FT1 => Tfloat | FT2 => Tfloat | FP0 => Tfloat
+  | AX => Tint | BX => Tint | CX => Tint | DX => Tint
+  | SI => Tint | DI => Tint | BP => Tint
+  | X0 => Tfloat | X1 => Tfloat | X2 => Tfloat | X3 => Tfloat
+  | X4 => Tfloat | X5 => Tfloat | X6 => Tfloat | X7 => Tfloat
+  | FP0 => Tfloat
   end.
 
-Open Scope positive_scope.
+Local Open Scope positive_scope.
 
 Module IndexedMreg <: INDEXED_TYPE.
   Definition t := mreg.
   Definition eq := mreg_eq.
   Definition index (r: mreg): positive :=
     match r with
-    | AX => 1 | BX => 2 | SI => 3 | DI => 4 | BP => 5
-    | X0 => 6 | X1 => 7 | X2 => 8
-    | X3 => 9 | X4 => 10 | X5 => 11
-    | IT1 => 12 | IT2 => 13
-    | FT1 => 14 | FT2 => 15 | FP0 => 16
+    | AX => 1 | BX => 2 | CX => 3 | DX => 4 | SI => 5 | DI => 6 | BP => 7
+    | X0 => 8 | X1 => 9 | X2 => 10 | X3 => 11
+    | X4 => 12 | X5 => 13 | X6 => 14 | X7 => 15
+    | FP0 => 16
     end.
   Lemma index_inj:
     forall r1 r2, index r1 = index r2 -> r1 = r2.
@@ -75,3 +69,147 @@ Module IndexedMreg <: INDEXED_TYPE.
   Qed.
 End IndexedMreg.
 
+(** ** Destroyed registers, preferred registers *)
+
+Definition destroyed_by_op (op: operation): list mreg :=
+  match op with
+  | Omove => FP0 :: nil
+  | Ocast8signed | Ocast8unsigned | Ocast16signed | Ocast16unsigned => AX :: nil
+  | Odiv => AX :: DX :: nil
+  | Odivu => AX :: DX :: nil
+  | Omod => AX :: DX :: nil
+  | Omodu => AX :: DX :: nil
+  | Oshrximm _ => CX :: nil
+  | Ocmp _ => AX :: CX :: nil
+  | _ => nil
+  end.
+
+Definition destroyed_by_load (chunk: memory_chunk) (addr: addressing): list mreg :=
+  nil.
+
+Definition destroyed_by_store (chunk: memory_chunk) (addr: addressing): list mreg :=
+  match chunk with
+  | Mint8signed | Mint8unsigned => AX :: CX :: nil
+  | Mint16signed | Mint16unsigned | Mint32 | Mint64 => nil
+  | Mfloat32 => X7 :: nil
+  | Mfloat64 | Mfloat64al32 => FP0 :: nil
+  end.
+
+Definition destroyed_by_cond (cond: condition): list mreg :=
+  nil.
+
+Definition destroyed_by_jumptable: list mreg :=
+  nil.
+
+Definition destroyed_by_builtin (ef: external_function): list mreg :=
+  match ef with
+  | EF_memcpy sz al =>
+      if zle sz 32 then CX :: X7 :: nil else CX :: SI :: DI :: nil
+  | EF_vload _ => nil
+  | EF_vload_global _ _ _ => nil
+  | EF_vstore _ => AX :: CX :: X7 :: nil
+  | EF_vstore_global _ _ _ => AX :: X7 :: nil
+  | _ => AX :: CX :: X7 :: FP0 :: nil
+  end.
+
+Definition destroyed_at_function_entry: list mreg :=
+  DX :: FP0 :: nil.   (* must include destroyed_by_op Omove *)
+
+Definition temp_for_parent_frame: mreg :=
+  DX.
+
+Definition mregs_for_operation (op: operation): list (option mreg) * option mreg :=
+  match op with
+  | Odiv => (Some AX :: Some CX :: nil, Some AX)
+  | Odivu => (Some AX :: Some CX :: nil, Some AX)
+  | Omod => (Some AX :: Some CX :: nil, Some DX)
+  | Omodu => (Some AX :: Some CX :: nil, Some DX)
+  | Oshl => (None :: Some CX :: nil, None)
+  | Oshr => (None :: Some CX :: nil, None)
+  | Oshru => (None :: Some CX :: nil, None)
+  | Oshrximm _ => (Some AX :: nil, Some AX)
+  | _ => (nil, None)
+  end.
+
+Local Open Scope string_scope.
+
+Definition builtin_negl := ident_of_string "__builtin_negl".
+Definition builtin_addl := ident_of_string "__builtin_addl".
+Definition builtin_subl := ident_of_string "__builtin_subl".
+Definition builtin_mull := ident_of_string "__builtin_mull".
+
+Definition mregs_for_builtin (ef: external_function): list (option mreg) * list (option mreg) :=
+  match ef with
+  | EF_memcpy sz al =>
+     if zle sz 32 then (Some AX :: Some DX :: nil, nil) else (Some DI :: Some SI :: nil, nil)
+  | EF_builtin id sg =>
+     if ident_eq id builtin_negl then
+       (Some DX :: Some AX :: nil, Some DX :: Some AX :: nil)
+     else if ident_eq id builtin_addl || ident_eq id builtin_subl then
+       (Some DX :: Some AX :: Some CX :: Some BX :: nil, Some DX :: Some AX :: nil)
+     else if ident_eq id builtin_mull then
+       (Some AX :: Some DX :: nil, Some DX :: Some AX :: nil)
+     else
+       (nil, nil)
+  | _ => (nil, nil)
+  end.
+
+Global Opaque
+    destroyed_by_op destroyed_by_load destroyed_by_store
+    destroyed_by_cond destroyed_by_jumptable destroyed_by_builtin
+    destroyed_at_function_entry temp_for_parent_frame
+    mregs_for_operation mregs_for_builtin.
+
+(** Two-address operations.  Return [true] if the first argument and
+  the result must be in the same location *and* are unconstrained
+  by [mregs_for_operation]. *)
+
+Definition two_address_op (op: operation) : bool :=
+  match op with
+  | Omove => false
+  | Ointconst _ => false
+  | Ofloatconst _ => false
+  | Oindirectsymbol _ => false
+  | Ocast8signed => false
+  | Ocast8unsigned => false
+  | Ocast16signed => false
+  | Ocast16unsigned => false
+  | Oneg => true
+  | Osub => true
+  | Omul => true
+  | Omulimm _ => true
+  | Odiv => false
+  | Odivu => false
+  | Omod => false
+  | Omodu => false
+  | Oand => true
+  | Oandimm _ => true
+  | Oor => true
+  | Oorimm _ => true
+  | Oxor => true
+  | Oxorimm _ => true
+  | Oshl => true
+  | Oshlimm _ => true
+  | Oshr => true
+  | Oshrimm _ => true
+  | Oshrximm _ => false
+  | Oshru => true
+  | Oshruimm _ => true
+  | Ororimm _ => true
+  | Oshldimm _ => true
+  | Olea addr => false
+  | Onegf => true
+  | Oabsf => true
+  | Oaddf => true
+  | Osubf => true
+  | Omulf => true
+  | Odivf => true
+  | Osingleoffloat => false
+  | Ointoffloat => false
+  | Ofloatofint => false
+  | Omakelong => false
+  | Olowlong => false
+  | Ohighlong => false
+  | Ocmp c => false
+  end.
+
diff --git a/ia32/Machregsaux.ml b/ia32/Machregsaux.ml
index 7d6df90..8403746 100644
--- a/ia32/Machregsaux.ml
+++ b/ia32/Machregsaux.ml
@@ -15,11 +15,11 @@
 open Machregs
 
 let register_names = [
-  ("AX", AX); ("BX", BX); ("SI", SI); ("DI", DI); ("BP", BP);
+  ("AX", AX); ("BX", BX); ("CX", CX); ("DX", DX);
+  ("SI", SI); ("DI", DI); ("BP", BP);
   ("XMM0", X0); ("XMM1", X1); ("XMM2", X2); ("XMM3", X3);
-  ("XMM4", X4); ("XMM5", X5);
-  ("DX", IT1); ("CX", IT2);
-  ("XMM6", FT1); ("XMM7", FT2); ("ST0", FP0)
+  ("XMM4", X4); ("XMM5", X5); ("XMM6", X6); ("XMM7", X7);
+  ("ST0", FP0)
 ]
 
 let name_of_register r =
diff --git a/ia32/Op.v b/ia32/Op.v
index a23e8ac..3dc1f77 100644
--- a/ia32/Op.v
+++ b/ia32/Op.v
@@ -96,6 +96,7 @@ Inductive operation : Type :=
   | Oshru: operation                    (**r [rd = r1 >> r2] (unsigned) *)
   | Oshruimm: int -> operation          (**r [rd = r1 >> n] (unsigned) *)
   | Ororimm: int -> operation           (**r rotate right immediate *)
+  | Oshldimm: int -> operation          (**r [rd = r1 << n | r2 >> (32-n)] *)
   | Olea: addressing -> operation       (**r effective address *)
 (*c Floating-point arithmetic: *)
   | Onegf: operation                    (**r [rd = - r1] *)
@@ -108,6 +109,10 @@ Inductive operation : Type :=
 (*c Conversions between int and float: *)
   | Ointoffloat: operation              (**r [rd = signed_int_of_float(r1)] *)
   | Ofloatofint: operation              (**r [rd = float_of_signed_int(r1)] *)
+(*c Manipulating 64-bit integers: *)
+  | Omakelong: operation                (**r [rd = r1 << 32 | r2] *)
+  | Olowlong: operation                 (**r [rd = low-word(r1)] *)
+  | Ohighlong: operation                (**r [rd = high-word(r1)] *)
 (*c Boolean tests: *)
   | Ocmp: condition -> operation.       (**r [rd = 1] if condition holds, [rd = 0] otherwise. *)
 
@@ -130,6 +135,7 @@ Definition eq_operation (x y: operation): {x=y} + {x<>y}.
 Proof.
   generalize Int.eq_dec; intro.
   generalize Float.eq_dec; intro.
+  generalize Int64.eq_dec; intro.
   assert (forall (x y: ident), {x=y}+{x<>y}). exact peq.
   assert (forall (x y: comparison), {x=y}+{x<>y}). decide equality.
   assert (forall (x y: condition), {x=y}+{x<>y}). decide equality.
@@ -222,6 +228,8 @@ Definition eval_operation
   | Oshru, v1::v2::nil => Some (Val.shru v1 v2)
   | Oshruimm n, v1::nil => Some (Val.shru v1 (Vint n))
   | Ororimm n, v1::nil => Some (Val.ror v1 (Vint n))
+  | Oshldimm n, v1::v2::nil => Some (Val.or (Val.shl v1 (Vint n))
+                                            (Val.shru v2 (Vint (Int.sub Int.iwordsize n))))
   | Olea addr, _ => eval_addressing genv sp addr vl
   | Onegf, v1::nil => Some(Val.negf v1)
   | Oabsf, v1::nil => Some(Val.absf v1)
@@ -232,6 +240,9 @@ Definition eval_operation
   | Osingleoffloat, v1::nil => Some(Val.singleoffloat v1)
   | Ointoffloat, v1::nil => Val.intoffloat v1
   | Ofloatofint, v1::nil => Val.floatofint v1
+  | Omakelong, v1::v2::nil => Some(Val.longofwords v1 v2)
+  | Olowlong, v1::nil => Some(Val.loword v1)
+  | Ohighlong, v1::nil => Some(Val.hiword v1)
   | Ocmp c, _ => Some(Val.of_optbool (eval_condition c vl m))
   | _, _ => None
   end.
@@ -306,6 +317,7 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Oshru => (Tint :: Tint :: nil, Tint)
   | Oshruimm _ => (Tint :: nil, Tint)
   | Ororimm _ => (Tint :: nil, Tint)
+  | Oshldimm _ => (Tint :: Tint :: nil, Tint)
   | Olea addr => (type_of_addressing addr, Tint)
   | Onegf => (Tfloat :: nil, Tfloat)
   | Oabsf => (Tfloat :: nil, Tfloat)
@@ -316,6 +328,9 @@ Definition type_of_operation (op: operation) : list typ * typ :=
   | Osingleoffloat => (Tfloat :: nil, Tfloat)
   | Ointoffloat => (Tfloat :: nil, Tint)
   | Ofloatofint => (Tint :: nil, Tfloat)
+  | Omakelong => (Tint :: Tint :: nil, Tlong)
+  | Olowlong => (Tlong :: nil, Tint)
+  | Ohighlong => (Tlong :: nil, Tint)
   | Ocmp c => (type_of_condition c, Tint)
   end.
 
@@ -386,6 +401,8 @@ Proof with (try exact I).
   destruct v0; destruct v1; simpl... destruct (Int.ltu i0 Int.iwordsize)...
   destruct v0; simpl... destruct (Int.ltu i Int.iwordsize)...
   destruct v0; simpl... destruct (Int.ltu i Int.iwordsize)...
+  destruct v0; simpl... destruct (Int.ltu i Int.iwordsize)...
+  destruct v1; simpl... destruct (Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize)...
   eapply type_of_addressing_sound; eauto.
   destruct v0...
   destruct v0...
@@ -396,6 +413,9 @@ Proof with (try exact I).
   destruct v0...
   destruct v0; simpl in H0; inv H0. destruct (Float.intoffloat f); inv H2...
   destruct v0; simpl in H0; inv H0...
+  destruct v0; destruct v1...
+  destruct v0...
+  destruct v0...
   destruct (eval_condition c vl m); simpl... destruct b... 
 Qed.
 
@@ -511,79 +531,41 @@ Proof.
   apply eval_shift_stack_addressing. 
 Qed.
 
-(** Transformation of addressing modes with two operands or more
-  into an equivalent arithmetic operation.  This is used in the [Reload]
-  pass when a store instruction cannot be reloaded directly because
-  it runs out of temporary registers. *)
+(** Offset an addressing mode [addr] by a quantity [delta], so that
+  it designates the pointer [delta] bytes past the pointer designated
+  by [addr].  May be undefined, in which case [None] is returned. *)
 
-Definition op_for_binary_addressing (addr: addressing) : operation := Olea addr.
+Definition offset_addressing (addr: addressing) (delta: int) : option addressing :=
+  match addr with
+  | Aindexed n => Some(Aindexed (Int.add n delta))
+  | Aindexed2 n => Some(Aindexed2 (Int.add n delta))
+  | Ascaled sc n => Some(Ascaled sc (Int.add n delta))
+  | Aindexed2scaled sc n => Some(Aindexed2scaled sc (Int.add n delta))
+  | Aglobal s n => Some(Aglobal s (Int.add n delta))
+  | Abased s n => Some(Abased s (Int.add n delta))
+  | Abasedscaled sc s n => Some(Abasedscaled sc s (Int.add n delta))
+  | Ainstack n => Some(Ainstack (Int.add n delta))
+  end.
 
-Lemma eval_op_for_binary_addressing:
-  forall (F V: Type) (ge: Genv.t F V) sp addr args v m,
-  (length args >= 2)%nat ->
+Lemma eval_offset_addressing:
+  forall (F V: Type) (ge: Genv.t F V) sp addr args delta addr' v,
+  offset_addressing addr delta = Some addr' ->
   eval_addressing ge sp addr args = Some v ->
-  eval_operation ge sp (op_for_binary_addressing addr) args m = Some v.
-Proof.
-  intros. simpl. auto.
-Qed.
-
-Lemma type_op_for_binary_addressing:
-  forall addr,
-  (length (type_of_addressing addr) >= 2)%nat ->
-  type_of_operation (op_for_binary_addressing addr) = (type_of_addressing addr, Tint).
+  eval_addressing ge sp addr' args = Some(Val.add v (Vint delta)).
 Proof.
-  intros. simpl. auto.
+  intros. destruct addr; simpl in H; inv H; simpl in *; FuncInv; inv H.
+  rewrite Val.add_assoc; auto.
+  rewrite !Val.add_assoc; auto.
+  rewrite !Val.add_assoc; auto.
+  rewrite !Val.add_assoc; auto.
+  unfold symbol_address. destruct (Genv.find_symbol ge i); auto. 
+  unfold symbol_address. destruct (Genv.find_symbol ge i); auto.
+  rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto. 
+  unfold symbol_address. destruct (Genv.find_symbol ge i0); auto.
+  rewrite Val.add_assoc. rewrite Val.add_permut. rewrite Val.add_commut. auto. 
+  rewrite Val.add_assoc. auto. 
 Qed.
 
-
-(** Two-address operations.  Return [true] if the first argument and
-  the result must be in the same location. *)
-
-Definition two_address_op (op: operation) : bool :=
-  match op with
-  | Omove => false
-  | Ointconst _ => false
-  | Ofloatconst _ => false
-  | Oindirectsymbol _ => false
-  | Ocast8signed => false
-  | Ocast8unsigned => false
-  | Ocast16signed => false
-  | Ocast16unsigned => false
-  | Oneg => true
-  | Osub => true
-  | Omul => true
-  | Omulimm _ => true
-  | Odiv => true
-  | Odivu => true
-  | Omod => true
-  | Omodu => true
-  | Oand => true
-  | Oandimm _ => true
-  | Oor => true
-  | Oorimm _ => true
-  | Oxor => true
-  | Oxorimm _ => true
-  | Oshl => true
-  | Oshlimm _ => true
-  | Oshr => true
-  | Oshrimm _ => true
-  | Oshrximm _ => true
-  | Oshru => true
-  | Oshruimm _ => true
-  | Ororimm _ => true
-  | Olea addr => false
-  | Onegf => true
-  | Oabsf => true
-  | Oaddf => true
-  | Osubf => true
-  | Omulf => true
-  | Odivf => true
-  | Osingleoffloat => false
-  | Ointoffloat => false
-  | Ofloatofint => false
-  | Ocmp c => false
-  end.
-
 (** Operations that are so cheap to recompute that CSE should not factor them out. *)
 
 Definition is_trivial_op (op: operation) : bool :=
@@ -774,6 +756,8 @@ Proof.
   eauto 3 using val_cmpu_bool_inject, Mem.valid_pointer_implies.
   inv H3; inv H2; simpl in H0; inv H0; auto.
   inv H3; inv H2; simpl in H0; inv H0; auto.
+  inv H3; try discriminate; inv H5; auto.
+  inv H3; try discriminate; inv H5; auto.
 Qed.
 
 Ltac TrivialExists :=
@@ -842,6 +826,8 @@ Proof.
   inv H4; inv H2; simpl; auto. destruct (Int.ltu i0 Int.iwordsize); auto.
   inv H4; simpl; auto. destruct (Int.ltu i Int.iwordsize); auto.
   inv H4; simpl; auto. destruct (Int.ltu i Int.iwordsize); auto.
+  inv H4; simpl; auto. destruct (Int.ltu i Int.iwordsize); auto.
+  inv H2; simpl; auto. destruct (Int.ltu (Int.sub Int.iwordsize i) Int.iwordsize); auto.
   eapply eval_addressing_inj; eauto. 
   inv H4; simpl; auto.
   inv H4; simpl; auto.
@@ -853,6 +839,9 @@ Proof.
   inv H4; simpl in H1; inv H1. simpl. destruct (Float.intoffloat f0); simpl in H2; inv H2.
   exists (Vint i); auto.
   inv H4; simpl in H1; inv H1. simpl. TrivialExists.
+  inv H4; inv H2; simpl; auto.
+  inv H4; simpl; auto.
+  inv H4; simpl; auto.
   subst v1. destruct (eval_condition c vl1 m1) eqn:?.
   exploit eval_condition_inj; eauto. intros EQ; rewrite EQ.
   destruct b; simpl; constructor.
diff --git a/ia32/PrintAsm.ml b/ia32/PrintAsm.ml
index 4768606..c2ea98f 100644
--- a/ia32/PrintAsm.ml
+++ b/ia32/PrintAsm.ml
@@ -61,7 +61,7 @@ let raw_symbol oc s =
   | ELF -> fprintf oc "%s" s
   | MacOS | Cygwin -> fprintf oc "_%s" s
 
-let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$"
+let re_variadic_stub = Str.regexp "\\(.*\\)\\$[ifl]*$"
 
 let symbol oc symb =
   let s = extern_atom symb in
@@ -239,9 +239,9 @@ let print_annot_val oc txt args res =
   fprintf oc "%s annotation: " comment;
   PrintAnnot.print_annot_val preg oc txt args;
   match args, res with
-  | IR src :: _, IR dst ->
+  | [IR src], [IR dst] ->
       if dst <> src then fprintf oc "	movl	%a, %a\n" ireg src ireg dst
-  | FR src :: _, FR dst ->
+  | [FR src], [FR dst] ->
       if dst <> src then fprintf oc "	movsd	%a, %a\n" freg src freg dst
   | _, _ -> assert false
 
@@ -251,98 +251,76 @@ let print_annot_val oc txt args res =
    memory accesses regardless of alignment. *)
 
 let print_builtin_memcpy_small oc sz al src dst =
-  let tmp =
-    if src <> ECX && dst <> ECX then ECX
-    else if src <> EDX && dst <> EDX then EDX
-    else EAX in
-  let need_tmp =
-    sz mod 4 <> 0 || not !Clflags.option_fsse in
+  assert (src = EDX && dst = EAX);
   let rec copy ofs sz =
     if sz >= 8 && !Clflags.option_fsse then begin
-      fprintf oc "	movq	%d(%a), %a\n" ofs ireg src freg XMM6;
-      fprintf oc "	movq	%a, %d(%a)\n" freg XMM6 ofs ireg dst;
+      fprintf oc "	movq	%d(%a), %a\n" ofs ireg src freg XMM7;
+      fprintf oc "	movq	%a, %d(%a)\n" freg XMM7 ofs ireg dst;
       copy (ofs + 8) (sz - 8)
     end else if sz >= 4 then begin
-      if !Clflags.option_fsse then begin
-        fprintf oc "	movd	%d(%a), %a\n" ofs ireg src freg XMM6;
-        fprintf oc "	movd	%a, %d(%a)\n" freg XMM6 ofs ireg dst
-      end else begin
-        fprintf oc "	movl	%d(%a), %a\n" ofs ireg src ireg tmp;
-        fprintf oc "	movl	%a, %d(%a)\n" ireg tmp ofs ireg dst
-      end;
+      fprintf oc "	movl	%d(%a), %a\n" ofs ireg src ireg ECX;
+      fprintf oc "	movl	%a, %d(%a)\n" ireg ECX ofs ireg dst;
       copy (ofs + 4) (sz - 4)
     end else if sz >= 2 then begin
-      fprintf oc "	movw	%d(%a), %a\n" ofs ireg src ireg16 tmp;
-      fprintf oc "	movw	%a, %d(%a)\n" ireg16 tmp ofs ireg dst;
+      fprintf oc "	movw	%d(%a), %a\n" ofs ireg src ireg16 ECX;
+      fprintf oc "	movw	%a, %d(%a)\n" ireg16 ECX ofs ireg dst;
       copy (ofs + 2) (sz - 2)
     end else if sz >= 1 then begin
-      fprintf oc "	movb	%d(%a), %a\n" ofs ireg src ireg8 tmp;
-      fprintf oc "	movb	%a, %d(%a)\n" ireg8 tmp ofs ireg dst;
+      fprintf oc "	movb	%d(%a), %a\n" ofs ireg src ireg8 ECX;
+      fprintf oc "	movb	%a, %d(%a)\n" ireg8 ECX ofs ireg dst;
       copy (ofs + 1) (sz - 1)
     end in
-  if need_tmp && tmp = EAX then
-    fprintf oc "	pushl	%a\n" ireg EAX;
-  copy 0 sz;
-  if need_tmp && tmp = EAX then
-    fprintf oc "	popl	%a\n" ireg EAX
-
-let print_mov2 oc src1 dst1 src2 dst2 =
-  if src1 = dst1 then
-    if src2 = dst2
-    then ()
-    else fprintf oc "	movl	%a, %a\n" ireg src2 ireg dst2
-  else if src2 = dst2 then
-    fprintf oc "	movl	%a, %a\n" ireg src1 ireg dst1
-  else if src2 = dst1 then
-    if src1 = dst2 then
-      fprintf oc "	xchgl	%a, %a\n" ireg src1 ireg src2
-    else begin
-      fprintf oc "	movl	%a, %a\n" ireg src2 ireg dst2;
-      fprintf oc "	movl	%a, %a\n" ireg src1 ireg dst1
-    end
-  else begin
-    fprintf oc "	movl	%a, %a\n" ireg src1 ireg dst1;
-    fprintf oc "	movl	%a, %a\n" ireg src2 ireg dst2
-  end
+  copy 0 sz
 
 let print_builtin_memcpy_big oc sz al src dst =
-  fprintf oc "	pushl	%a\n" ireg ESI;
-  fprintf oc "	pushl	%a\n" ireg EDI;
-  print_mov2 oc src ESI dst EDI;
+  assert (src = ESI && dst = EDI);
   fprintf oc "	movl	$%d, %a\n" (sz / 4) ireg ECX;
   fprintf oc "	rep	movsl\n";
   if sz mod 4 >= 2 then fprintf oc "	movsw\n";
-  if sz mod 2 >= 1 then fprintf oc "	movsb\n";
-  fprintf oc "	popl	%a\n" ireg EDI;
-  fprintf oc "	popl	%a\n" ireg ESI
+  if sz mod 2 >= 1 then fprintf oc "	movsb\n"
 
 let print_builtin_memcpy oc sz al args =
   let (dst, src) =
     match args with [IR d; IR s] -> (d, s) | _ -> assert false in
   fprintf oc "%s begin builtin __builtin_memcpy_aligned, size = %d, alignment = %d\n"
           comment sz al;
-  if sz <= 64
+  if sz <= 32
   then print_builtin_memcpy_small oc sz al src dst
   else print_builtin_memcpy_big oc sz al src dst;
   fprintf oc "%s end builtin __builtin_memcpy_aligned\n" comment
 
 (* Handling of volatile reads and writes *)
 
+let offset_addressing (Addrmode(base, ofs, cst)) delta =
+  Addrmode(base, ofs,
+           match cst with
+           | Coq_inl n -> Coq_inl(Integers.Int.add n delta)
+           | Coq_inr(id, n) -> Coq_inr(id, Integers.Int.add n delta))
+
 let print_builtin_vload_common oc chunk addr res =
   match chunk, res with
-  | Mint8unsigned, IR res ->
+  | Mint8unsigned, [IR res] ->
       fprintf oc "	movzbl	%a, %a\n" addressing addr ireg res
-  | Mint8signed, IR res ->
+  | Mint8signed, [IR res] ->
       fprintf oc "	movsbl	%a, %a\n" addressing addr ireg res
-  | Mint16unsigned, IR res ->
+  | Mint16unsigned, [IR res] ->
       fprintf oc "	movzwl	%a, %a\n" addressing addr ireg res
-  | Mint16signed, IR res ->
+  | Mint16signed, [IR res] ->
       fprintf oc "	movswl	%a, %a\n" addressing addr ireg res
-  | Mint32, IR res ->
+  | Mint32, [IR res] ->
       fprintf oc "	movl	%a, %a\n" addressing addr ireg res
-  | Mfloat32, FR res ->
+  | Mint64, [IR res1; IR res2] ->
+      let addr' = offset_addressing addr (coqint_of_camlint 4l) in
+      if not (Asmgen.addressing_mentions addr res2) then begin
+        fprintf oc "	movl	%a, %a\n" addressing addr ireg res2;
+        fprintf oc "	movl	%a, %a\n" addressing addr' ireg res1
+      end else begin
+        fprintf oc "	movl	%a, %a\n" addressing addr' ireg res1;
+        fprintf oc "	movl	%a, %a\n" addressing addr ireg res2
+      end
+  | Mfloat32, [FR res] ->
       fprintf oc "	cvtss2sd %a, %a\n" addressing addr freg res
-  | (Mfloat64 | Mfloat64al32), FR res ->
+  | (Mfloat64 | Mfloat64al32), [FR res] ->
       fprintf oc "	movsd	%a, %a\n" addressing addr freg res
   | _ ->
       assert false
@@ -366,21 +344,25 @@ let print_builtin_vload_global oc chunk id ofs args res =
 
 let print_builtin_vstore_common oc chunk addr src tmp =
   match chunk, src with
-  | (Mint8signed | Mint8unsigned), IR src ->
+  | (Mint8signed | Mint8unsigned), [IR src] ->
       if Asmgen.low_ireg src then
         fprintf oc "	movb	%a, %a\n" ireg8 src addressing addr
       else begin
         fprintf oc "	movl	%a, %a\n" ireg src ireg tmp;
         fprintf oc "	movb	%a, %a\n" ireg8 tmp addressing addr
       end
-  | (Mint16signed | Mint16unsigned), IR src ->
+  | (Mint16signed | Mint16unsigned), [IR src] ->
       fprintf oc "	movw	%a, %a\n" ireg16 src addressing addr
-  | Mint32, IR src ->
+  | Mint32, [IR src] ->
       fprintf oc "	movl	%a, %a\n" ireg src addressing addr
-  | Mfloat32, FR src ->
+  | Mint64, [IR src1; IR src2] ->
+      let addr' = offset_addressing addr (coqint_of_camlint 4l) in
+      fprintf oc "	movl	%a, %a\n" ireg src2 addressing addr;
+      fprintf oc "	movl	%a, %a\n" ireg src1 addressing addr'
+  | Mfloat32, [FR src] ->
       fprintf oc "	cvtsd2ss %a, %%xmm7\n" freg src;
       fprintf oc "	movss	%%xmm7, %a\n" addressing addr
-  | (Mfloat64 | Mfloat64al32), FR src ->
+  | (Mfloat64 | Mfloat64al32), [FR src] ->
       fprintf oc "	movsd	%a, %a\n" freg src addressing addr
   | _ ->
       assert false
@@ -388,10 +370,10 @@ let print_builtin_vstore_common oc chunk addr src tmp =
 let print_builtin_vstore oc chunk args =
   fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
   begin match args with
-  | [IR addr; src] ->
+  | IR addr :: src ->
       print_builtin_vstore_common oc chunk
                  (Addrmode(Some addr, None, Coq_inl Integers.Int.zero)) src
-                 (if addr = ECX then EDX else ECX)
+                 (if addr = EAX then ECX else EAX)
   | _ ->
       assert false
   end;
@@ -399,13 +381,8 @@ let print_builtin_vstore oc chunk args =
 
 let print_builtin_vstore_global oc chunk id ofs args =
   fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
-  begin match args with
-  | [src] ->
-      print_builtin_vstore_common oc chunk
-                       (Addrmode(None, None, Coq_inr(id,ofs))) src EDX
-  | _ ->
-      assert false
-  end;
+  print_builtin_vstore_common oc chunk
+                 (Addrmode(None, None, Coq_inr(id,ofs))) args EAX;
   fprintf oc "%s end builtin __builtin_volatile_write\n" comment
 
 (* Handling of compiler-inlined builtins *)
@@ -414,13 +391,13 @@ let print_builtin_inline oc name args res =
   fprintf oc "%s begin builtin %s\n" comment name;
   begin match name, args, res with
   (* Memory accesses *)
-  | "__builtin_read16_reversed", [IR a1], IR res ->
+  | "__builtin_read16_reversed", [IR a1], [IR res] ->
       let tmp = if Asmgen.low_ireg res then res else ECX in
       fprintf oc "	movzwl	0(%a), %a\n" ireg a1 ireg tmp;
       fprintf oc "	xchg	%a, %a\n" ireg8 tmp high_ireg8 tmp;
       if tmp <> res then
         fprintf oc "	movl	%a, %a\n" ireg tmp ireg res
-  | "__builtin_read32_reversed", [IR a1], IR res ->
+  | "__builtin_read32_reversed", [IR a1], [IR res] ->
       fprintf oc "	movl	0(%a), %a\n" ireg a1 ireg res;
       fprintf oc "	bswap	%a\n" ireg res
   | "__builtin_write16_reversed", [IR a1; IR a2], _ ->
@@ -436,19 +413,19 @@ let print_builtin_inline oc name args res =
       fprintf oc "	bswap	%a\n" ireg tmp;
       fprintf oc "	movl	%a, 0(%a)\n" ireg tmp ireg a1
   (* Integer arithmetic *)
-  | "__builtin_bswap", [IR a1], IR res ->
+  | "__builtin_bswap", [IR a1], [IR res] ->
       if a1 <> res then
         fprintf oc "	movl	%a, %a\n" ireg a1 ireg res;
       fprintf oc "	bswap	%a\n" ireg res
   (* Float arithmetic *)
-  | "__builtin_fabs", [FR a1], FR res ->
+  | "__builtin_fabs", [FR a1], [FR res] ->
       need_masks := true;
       if a1 <> res then
         fprintf oc "	movsd	%a, %a\n" freg a1 freg res;
       fprintf oc "	andpd	%a, %a\n" raw_symbol "__absd_mask" freg res
-  | "__builtin_fsqrt", [FR a1], FR res ->
+  | "__builtin_fsqrt", [FR a1], [FR res] ->
       fprintf oc "	sqrtsd	%a, %a\n" freg a1 freg res
-  | "__builtin_fmax", [FR a1; FR a2], FR res ->
+  | "__builtin_fmax", [FR a1; FR a2], [FR res] ->
       if res = a1 then
         fprintf oc "	maxsd	%a, %a\n" freg a2 freg res
       else if res = a2 then
@@ -457,7 +434,7 @@ let print_builtin_inline oc name args res =
         fprintf oc "	movsd	%a, %a\n" freg a1 freg res;
         fprintf oc "	maxsd	%a, %a\n" freg a2 freg res
       end
-  | "__builtin_fmin", [FR a1; FR a2], FR res ->
+  | "__builtin_fmin", [FR a1; FR a2], [FR res] ->
       if res = a1 then
         fprintf oc "	minsd	%a, %a\n" freg a2 freg res
       else if res = a2 then
@@ -466,6 +443,23 @@ let print_builtin_inline oc name args res =
         fprintf oc "	movsd	%a, %a\n" freg a1 freg res;
         fprintf oc "	minsd	%a, %a\n" freg a2 freg res
       end
+  (* 64-bit integer arithmetic *)
+  | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
+      assert (ah = EDX && al = EAX && rh = EDX && rl = EAX);
+      fprintf oc "	negl	%a\n" ireg EAX;
+      fprintf oc "	adcl	$0, %a\n" ireg EDX;
+      fprintf oc "	negl	%a\n" ireg EDX
+  | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX);
+      fprintf oc "	addl	%a, %a\n" ireg EBX ireg EAX;
+      fprintf oc "	adcl	%a, %a\n" ireg ECX ireg EDX
+  | "__builtin_subl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
+      assert (ah = EDX && al = EAX && bh = ECX && bl = EBX && rh = EDX && rl = EAX);
+      fprintf oc "	subl	%a, %a\n" ireg EBX ireg EAX;
+      fprintf oc "	sbbl	%a, %a\n" ireg ECX ireg EDX
+  | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
+      assert (a = EAX && b = EDX && rh = EDX && rl = EAX);
+      fprintf oc "	mull    %a\n" ireg EDX
   (* Catch-all *)  
   | _ ->
       invalid_arg ("unrecognized builtin " ^ name)
@@ -604,6 +598,8 @@ let print_instruction oc = function
       fprintf oc "	sarl	%%cl, %a\n" ireg rd
   | Psar_ri(rd, n) ->
       fprintf oc "	sarl	$%a, %a\n" coqint n ireg rd
+  | Pshld_ri(rd, r1, n) ->
+      fprintf oc "	shldl	$%a, %a, %a\n" coqint n ireg r1 ireg rd
   | Pror_ri(rd, n) ->
       fprintf oc "	rorl	$%a, %a\n" coqint n ireg rd
   | Pcmp_rr(r1, r2) ->
@@ -617,8 +613,8 @@ let print_instruction oc = function
   | Pcmov(c, rd, r1) ->
       fprintf oc "	cmov%s	%a, %a\n" (name_of_condition c) ireg r1 ireg rd
   | Psetcc(c, rd) ->
-      fprintf oc "	set%s	%%cl\n" (name_of_condition c);
-      fprintf oc "	movzbl	%%cl, %a\n" ireg rd
+      fprintf oc "	set%s	%a\n" (name_of_condition c) ireg8 rd;
+      fprintf oc "	movzbl	%a, %a\n" ireg8 rd ireg rd
   (** Arithmetic operations over floats *)
   | Paddd_ff(rd, r1) ->
       fprintf oc "	addsd	%a, %a\n" freg r1 freg rd
@@ -757,6 +753,8 @@ let print_init oc = function
       fprintf oc "	.short	%ld\n" (camlint_of_coqint n)
   | Init_int32 n ->
       fprintf oc "	.long	%ld\n" (camlint_of_coqint n)
+  | Init_int64 n ->
+      fprintf oc "	.quad	%Ld\n" (camlint64_of_coqint n)
   | Init_float32 n ->
       fprintf oc "	.long	%ld %s %.18g\n"
 	(camlint_of_coqint (Floats.Float.bits_of_single n))
diff --git a/ia32/PrintOp.ml b/ia32/PrintOp.ml
index 7ddf42f..1f417c2 100644
--- a/ia32/PrintOp.ml
+++ b/ia32/PrintOp.ml
@@ -90,6 +90,7 @@ let print_operation reg pp = function
   | Oshru, [r1;r2] -> fprintf pp "%a >>u %a" reg r1 reg r2
   | Oshruimm n, [r1] -> fprintf pp "%a >>u %ld" reg r1 (camlint_of_coqint n)
   | Ororimm n, [r1] -> fprintf pp "%a ror %ld" reg r1 (camlint_of_coqint n)
+  | Oshldimm n, [r1;r2] -> fprintf pp "(%a, %a) << %ld" reg r1 reg r2 (camlint_of_coqint n)
   | Olea addr, args -> print_addressing reg pp (addr, args)
   | Onegf, [r1] -> fprintf pp "negf(%a)" reg r1
   | Oabsf, [r1] -> fprintf pp "absf(%a)" reg r1
@@ -100,6 +101,9 @@ let print_operation reg pp = function
   | Osingleoffloat, [r1] -> fprintf pp "singleoffloat(%a)" reg r1
   | Ointoffloat, [r1] -> fprintf pp "intoffloat(%a)" reg r1
   | Ofloatofint, [r1] -> fprintf pp "floatofint(%a)" reg r1
+  | Omakelong, [r1;r2] -> fprintf pp "makelong(%a,%a)" reg r1 reg r2
+  | Olowlong, [r1] -> fprintf pp "lowlong(%a)" reg r1
+  | Ohighlong, [r1] -> fprintf pp "highlong(%a)" reg r1
   | Ocmp c, args -> print_condition reg pp (c, args)
   | _ -> fprintf pp "<bad operator>"
 
diff --git a/ia32/SelectOp.vp b/ia32/SelectOp.vp
index 0c30386..7f79a4f 100644
--- a/ia32/SelectOp.vp
+++ b/ia32/SelectOp.vp
@@ -264,14 +264,16 @@ Nondetfunction or (e1: expr) (e2: expr) :=
   | Eop (Ointconst n1) Enil, t2 => orimm n1 t2
   | t1, Eop (Ointconst n2) Enil => orimm n2 t1
   | Eop (Oshlimm n1) (t1:::Enil), Eop (Oshruimm n2) (t2:::Enil) =>
-      if Int.eq (Int.add n1 n2) Int.iwordsize
-      && same_expr_pure t1 t2
-      then Eop (Ororimm n2) (t1:::Enil)
+      if Int.eq (Int.add n1 n2) Int.iwordsize then
+        if same_expr_pure t1 t2
+        then Eop (Ororimm n2) (t1:::Enil)
+        else Eop (Oshldimm n1) (t1:::t2:::Enil)
       else Eop Oor (e1:::e2:::Enil)
   | Eop (Oshruimm n2) (t2:::Enil), Eop (Oshlimm n1) (t1:::Enil) =>
-      if Int.eq (Int.add n1 n2) Int.iwordsize
-      && same_expr_pure t1 t2
-      then Eop (Ororimm n2) (t1:::Enil)
+      if Int.eq (Int.add n1 n2) Int.iwordsize then
+        if same_expr_pure t1 t2
+        then Eop (Ororimm n2) (t1:::Enil)
+        else Eop (Oshldimm n1) (t1:::t2:::Enil)
       else Eop Oor (e1:::e2:::Enil)
   | _, _ =>
       Eop Oor (e1:::e2:::Enil)
diff --git a/ia32/SelectOpproof.v b/ia32/SelectOpproof.v
index 18deca6..1569ad6 100644
--- a/ia32/SelectOpproof.v
+++ b/ia32/SelectOpproof.v
@@ -26,13 +26,6 @@ Require Import SelectOp.
 
 Open Local Scope cminorsel_scope.
 
-Section CMCONSTR.
-
-Variable ge: genv.
-Variable sp: val.
-Variable e: env.
-Variable m: mem.
-
 (** * Useful lemmas and tactics *)
 
 (** The following are trivial lemmas and custom tactics that help
@@ -78,9 +71,15 @@ Ltac TrivialExists :=
   | [ |- exists v, _ /\ Val.lessdef ?a v ] => exists a; split; [EvalOp | auto]
   end.
 
-
 (** * Correctness of the smart constructors *)
 
+Section CMCONSTR.
+
+Variable ge: genv.
+Variable sp: val.
+Variable e: env.
+Variable m: mem.
+
 (** We now show that the code generated by "smart constructor" functions
   such as [SelectOp.notint] behaves as expected.  Continuing the
   [notint] example, we show that if the expression [e]
@@ -410,6 +409,12 @@ Proof.
   discriminate.
 Qed.
 
+Remark int_add_sub_eq:
+  forall x y z, Int.add x y = z -> Int.sub z x = y.
+Proof.
+  intros. subst z. rewrite Int.sub_add_l. rewrite Int.sub_idem. apply Int.add_zero_l.
+Qed.
+
 Lemma eval_or: binary_constructor_sound or Val.or.
 Proof.
   red; intros until y; unfold or; case (or_match a b); intros.
@@ -417,26 +422,29 @@ Proof.
   InvEval. rewrite Val.or_commut. apply eval_orimm; auto. 
   InvEval. apply eval_orimm; auto.
 (* shlimm - shruimm *)
-  destruct (Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2) eqn:?.
-  destruct (andb_prop _ _ Heqb0). 
-  generalize (Int.eq_spec (Int.add n1 n2) Int.iwordsize); rewrite H1; intros EQ.
+  predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize.
+  destruct (same_expr_pure t1 t2) eqn:?.
   InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst. 
   exists (Val.ror v0 (Vint n2)); split. EvalOp.
   destruct v0; simpl; auto. 
   destruct (Int.ltu n1 Int.iwordsize) eqn:?; auto.
   destruct (Int.ltu n2 Int.iwordsize) eqn:?; auto.
   simpl. rewrite <- Int.or_ror; auto.
+  InvEval. exists (Val.or x y); split. EvalOp. 
+  simpl. erewrite int_add_sub_eq; eauto. rewrite H0; rewrite H; auto. auto.
   TrivialExists. 
 (* shruimm - shlimm *) 
-  destruct (Int.eq (Int.add n1 n2) Int.iwordsize && same_expr_pure t1 t2) eqn:?.
-  destruct (andb_prop _ _ Heqb0). 
-  generalize (Int.eq_spec (Int.add n1 n2) Int.iwordsize); rewrite H1; intros EQ.
+  predSpec Int.eq Int.eq_spec (Int.add n1 n2) Int.iwordsize.
+  destruct (same_expr_pure t1 t2) eqn:?.
   InvEval. exploit eval_same_expr; eauto. intros [EQ1 EQ2]; subst. 
   exists (Val.ror v1 (Vint n2)); split. EvalOp.
   destruct v1; simpl; auto. 
   destruct (Int.ltu n2 Int.iwordsize) eqn:?; auto.
   destruct (Int.ltu n1 Int.iwordsize) eqn:?; auto.
   simpl. rewrite Int.or_commut. rewrite <- Int.or_ror; auto.
+  InvEval. exists (Val.or y x); split. EvalOp. 
+  simpl. erewrite int_add_sub_eq; eauto. rewrite H0; rewrite H; auto.
+  rewrite Val.or_commut; auto.
   TrivialExists. 
 (* default *)
   TrivialExists.
diff --git a/ia32/standard/Conventions1.v b/ia32/standard/Conventions1.v
index 49f5da9..cae20f6 100644
--- a/ia32/standard/Conventions1.v
+++ b/ia32/standard/Conventions1.v
@@ -21,46 +21,26 @@ Require Import Locations.
 
 (** Machine registers (type [mreg] in module [Locations]) are divided in
   the following groups:
-- Temporaries used for spilling, reloading, and parallel move operations.
-- Allocatable registers, that can be assigned to RTL pseudo-registers.
-  These are further divided into:
--- Callee-save registers, whose value is preserved across a function call.
--- Caller-save registers that can be modified during a function call.
+- Callee-save registers, whose value is preserved across a function call.
+- Caller-save registers that can be modified during a function call.
 
   We follow the x86-32 application binary interface (ABI) in our choice
   of callee- and caller-save registers.
 *)
 
-Definition int_caller_save_regs := AX :: nil.
+Definition int_caller_save_regs := AX :: CX :: DX :: nil.
 
-Definition float_caller_save_regs := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: nil.
+Definition float_caller_save_regs := X0 :: X1 :: X2 :: X3 :: X4 :: X5 :: X6 :: X7 :: nil.
 
 Definition int_callee_save_regs := BX :: SI :: DI :: BP :: nil.
 
 Definition float_callee_save_regs : list mreg := nil.
 
-Definition destroyed_at_call_regs :=
-  int_caller_save_regs ++ float_caller_save_regs.
+Definition destroyed_at_call :=
+  FP0 :: int_caller_save_regs ++ float_caller_save_regs.
 
-Definition destroyed_at_call := List.map R destroyed_at_call_regs.
-
-Definition int_temporaries := IT1 :: IT2 :: nil.
-
-Definition float_temporaries := FT1 :: FT2 :: nil.
-
-(** [FP0] is not used for reloading, hence it is not in [float_temporaries],
-  however it is not allocatable, hence it is in [temporaries]. *)
-  
-Definition temporary_regs := IT1 :: IT2 :: FT1 :: FT2 :: FP0 :: nil.
-
-Definition temporaries := List.map R temporary_regs.
-
-Definition destroyed_at_move_regs := FP0 :: nil.
-
-Definition destroyed_at_move := List.map R destroyed_at_move_regs.
-
-Definition dummy_int_reg := AX.     (**r Used in [Coloring]. *)
-Definition dummy_float_reg := X0.   (**r Used in [Coloring]. *)
+Definition dummy_int_reg := AX.     (**r Used in [Regalloc]. *)
+Definition dummy_float_reg := X0.   (**r Used in [Regalloc]. *)
 
 (** The [index_int_callee_save] and [index_float_callee_save] associate
   a unique positive integer to callee-save registers.  This integer is
@@ -147,7 +127,7 @@ Proof.
 Qed.
 
 (** The following lemmas show that
-    (temporaries, destroyed at call, integer callee-save, float callee-save)
+    (destroyed at call, integer callee-save, float callee-save)
     is a partition of the set of machine registers. *)
 
 Lemma int_float_callee_save_disjoint:
@@ -158,34 +138,26 @@ Qed.
 
 Lemma register_classification:
   forall r, 
-  (In (R r) temporaries \/ In (R r) destroyed_at_call) \/
-  (In r int_callee_save_regs \/ In r float_callee_save_regs).
+  In r destroyed_at_call \/ In r int_callee_save_regs \/ In r float_callee_save_regs.
 Proof.
   destruct r; 
-  try (left; left; simpl; OrEq);
-  try (left; right; simpl; OrEq);
+  try (left; simpl; OrEq);
   try (right; left; simpl; OrEq);
   try (right; right; simpl; OrEq).
 Qed.
 
 Lemma int_callee_save_not_destroyed:
   forall r, 
-    In (R r) temporaries \/ In (R r) destroyed_at_call ->
-    ~(In r int_callee_save_regs).
+    In r destroyed_at_call -> In r int_callee_save_regs -> False.
 Proof.
-  intros; red; intros. elim H.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
+  intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
 Qed.
 
 Lemma float_callee_save_not_destroyed:
   forall r, 
-    In (R r) temporaries \/ In (R r) destroyed_at_call ->
-    ~(In r float_callee_save_regs).
+    In r destroyed_at_call -> In r float_callee_save_regs -> False.
 Proof.
-  intros; red; intros. elim H.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
-  generalize H0. simpl; ElimOrEq; NotOrEq.
+  intros. revert H0 H. simpl. ElimOrEq; NotOrEq.
 Qed.
 
 Lemma int_callee_save_type:
@@ -244,13 +216,15 @@ Qed.
   registers [AX] or [FP0], depending on the type of the returned value.
   We treat a function without result as a function with one integer result. *)
 
-Definition loc_result (s: signature) : mreg :=
+Definition loc_result (s: signature) : list mreg :=
   match s.(sig_res) with
-  | None => AX
-  | Some Tint => AX
-  | Some Tfloat => FP0
+  | None => AX :: nil
+  | Some Tint => AX :: nil
+  | Some Tfloat => FP0 :: nil
+  | Some Tlong => DX :: AX :: nil
   end.
 
+(*
 (** The result location has the type stated in the signature. *)
 
 Lemma loc_result_type:
@@ -263,17 +237,18 @@ Proof.
   destruct t; reflexivity.
   reflexivity.
 Qed.
+*)
 
-(** The result location is a caller-save register or a temporary *)
+(** The result locations are caller-save registers *)
 
 Lemma loc_result_caller_save:
-  forall (s: signature), 
-  In (R (loc_result s)) destroyed_at_call \/ In (R (loc_result s)) temporaries.
+  forall (s: signature) (r: mreg),
+  In r (loc_result s) -> In r destroyed_at_call.
 Proof.
-  intros; unfold loc_result.
-  destruct (sig_res s).
-  destruct t. left; simpl; OrEq. right; simpl; OrEq.
-  left; simpl; OrEq.
+  intros.
+  assert (r = AX \/ r = DX \/ r = FP0).
+    unfold loc_result in H. destruct (sig_res s); [destruct t|idtac]; simpl in H; intuition.
+  destruct H0 as [A | [A | A]]; subst r; simpl; OrEq.
 Qed.
 
 (** ** Location of function arguments *)
@@ -284,8 +259,9 @@ Fixpoint loc_arguments_rec
     (tyl: list typ) (ofs: Z) {struct tyl} : list loc :=
   match tyl with
   | nil => nil
-  | Tint :: tys => S (Outgoing ofs Tint) :: loc_arguments_rec tys (ofs + 1)
-  | Tfloat :: tys => S (Outgoing ofs Tfloat) :: loc_arguments_rec tys (ofs + 2)
+  | Tint :: tys => S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 1)
+  | Tfloat :: tys => S Outgoing ofs Tfloat :: loc_arguments_rec tys (ofs + 2)
+  | Tlong :: tys => S Outgoing (ofs + 1) Tint :: S Outgoing ofs Tint :: loc_arguments_rec tys (ofs + 2)
   end.
 
 (** [loc_arguments s] returns the list of locations where to store arguments
@@ -301,27 +277,19 @@ Fixpoint size_arguments_rec
     (tyl: list typ) (ofs: Z) {struct tyl} : Z :=
   match tyl with
   | nil => ofs
-  | Tint :: tys => size_arguments_rec tys (ofs + 1)
-  | Tfloat :: tys => size_arguments_rec tys (ofs + 2)
+  | ty :: tys => size_arguments_rec tys (ofs + typesize ty)
   end.
 
 Definition size_arguments (s: signature) : Z :=
   size_arguments_rec s.(sig_args) 0.
 
-(** A tail-call is possible for a signature if the corresponding
-    arguments are all passed in registers. *)
-
-Definition tailcall_possible (s: signature) : Prop :=
-  forall l, In l (loc_arguments s) ->
-  match l with R _ => True | S _ => False end.
-
-(** Argument locations are either non-temporary registers or [Outgoing] 
+(** Argument locations are either caller-save registers or [Outgoing] 
   stack slots at nonnegative offsets. *)
 
 Definition loc_argument_acceptable (l: loc) : Prop :=
   match l with
-  | R r => ~(In l temporaries)
-  | S (Outgoing ofs ty) => ofs >= 0
+  | R r => In r destroyed_at_call
+  | S Outgoing ofs ty => ofs >= 0 /\ ty <> Tlong
   | _ => False
   end.
 
@@ -329,62 +297,36 @@ Remark loc_arguments_rec_charact:
   forall tyl ofs l,
   In l (loc_arguments_rec tyl ofs) ->
   match l with
-  | S (Outgoing ofs' ty) => ofs' >= ofs
+  | S Outgoing ofs' ty => ofs' >= ofs /\ ty <> Tlong
   | _ => False
   end.
 Proof.
   induction tyl; simpl loc_arguments_rec; intros.
-  elim H.
-  destruct a; simpl in H; destruct H.
-  subst l. omega.
-  generalize (IHtyl _ _ H). destruct l; auto. destruct s; auto. omega. 
-  subst l. omega.
-  generalize (IHtyl _ _ H). destruct l; auto. destruct s; auto. omega. 
+  destruct H.
+  destruct a.
+- destruct H. subst l. split. omega. congruence.
+  exploit IHtyl; eauto.
+  destruct l; auto. destruct sl; auto. intuition omega.
+- destruct H. subst l. split. omega. congruence.
+  exploit IHtyl; eauto.
+  destruct l; auto. destruct sl; auto. intuition omega.
+- destruct H. subst l; split; [omega|congruence].
+  destruct H. subst l; split; [omega|congruence].
+  exploit IHtyl; eauto.
+  destruct l; auto. destruct sl; auto. intuition omega.
 Qed.
 
 Lemma loc_arguments_acceptable:
-  forall (s: signature) (r: loc),
-  In r (loc_arguments s) -> loc_argument_acceptable r.
+  forall (s: signature) (l: loc),
+  In l (loc_arguments s) -> loc_argument_acceptable l.
 Proof.
   unfold loc_arguments; intros.
-  generalize (loc_arguments_rec_charact  _ _ _ H).
-  destruct r; tauto.
-Qed.
-Hint Resolve loc_arguments_acceptable: locs.
-
-(** Arguments are parwise disjoint (in the sense of [Loc.norepet]). *)
-
-Remark loc_arguments_rec_notin_local:
-  forall tyl ofs ofs0 ty0,
-  Loc.notin (S (Local ofs0 ty0)) (loc_arguments_rec tyl ofs).
-Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a; simpl; auto.
-Qed.
-
-Remark loc_arguments_rec_notin_outgoing:
-  forall tyl ofs ofs0 ty0,
-  ofs0 + typesize ty0 <= ofs ->
-  Loc.notin (S (Outgoing ofs0 ty0)) (loc_arguments_rec tyl ofs).
-Proof.
-  induction tyl; simpl; intros.
-  auto.
-  destruct a.
-  split. simpl. omega. apply IHtyl. omega. 
-  split. simpl. omega. apply IHtyl. omega. 
+  exploit loc_arguments_rec_charact; eauto. 
+  unfold loc_argument_acceptable.
+  destruct l; tauto.
 Qed.
 
-Lemma loc_arguments_norepet:
-  forall (s: signature), Loc.norepet (loc_arguments s).
-Proof.
-  intros. unfold loc_arguments. generalize (sig_args s) 0.
-  induction l; simpl; intros.
-  constructor.
-  destruct a; constructor.
-  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
-  apply loc_arguments_rec_notin_outgoing. simpl; omega. auto.
-Qed.
+Hint Resolve loc_arguments_acceptable: locs.
 
 (** The offsets of [Outgoing] arguments are below [size_arguments s]. *)
 
@@ -393,9 +335,8 @@ Remark size_arguments_rec_above:
 Proof.
   induction tyl; simpl; intros.
   omega.
-  destruct a.
-  apply Zle_trans with (ofs0 + 1); auto; omega.
-  apply Zle_trans with (ofs0 + 2); auto; omega.
+  apply Zle_trans with (ofs0 + typesize a); auto. 
+  generalize (typesize_pos a); omega.
 Qed.
 
 Lemma size_arguments_above:
@@ -407,56 +348,20 @@ Qed.
 
 Lemma loc_arguments_bounded:
   forall (s: signature) (ofs: Z) (ty: typ),
-  In (S (Outgoing ofs ty)) (loc_arguments s) ->
+  In (S Outgoing ofs ty) (loc_arguments s) ->
   ofs + typesize ty <= size_arguments s.
 Proof.
   intros until ty. unfold loc_arguments, size_arguments. generalize (sig_args s) 0.
   induction l; simpl; intros.
-  elim H.
-  destruct a; simpl in H; destruct H.
-  inv H. apply size_arguments_rec_above.
-  auto.
-  inv H. apply size_arguments_rec_above.
+  destruct H.
+  destruct a.
+  destruct H. inv H. apply size_arguments_rec_above. auto.
+  destruct H. inv H. apply size_arguments_rec_above. auto.
+  destruct H. inv H.
+  simpl typesize. replace (z + 1 + 1) with (z + 2) by omega. apply size_arguments_rec_above.
+  destruct H. inv H. 
+  simpl typesize. apply Zle_trans with (ofs + 2). omega. apply size_arguments_rec_above. 
   auto.
 Qed.
 
-(** Temporary registers do not overlap with argument locations. *)
-
-Lemma loc_arguments_not_temporaries:
-  forall sig, Loc.disjoint (loc_arguments sig) temporaries.
-Proof.
-  intros; red; intros x1 x2 H.
-  generalize (loc_arguments_rec_charact _ _ _ H).
-  destruct x1. tauto. destruct s; intuition.
-  revert H1. simpl; ElimOrEq; auto.
-Qed.
-Hint Resolve loc_arguments_not_temporaries: locs.
-
-(** Argument registers are caller-save. *)
 
-Lemma arguments_caller_save:
-  forall sig r,
-  In (R r) (loc_arguments sig) -> In (R r) destroyed_at_call.
-Proof.
-  unfold loc_arguments; intros.
-  elim (loc_arguments_rec_charact _ _ _ H); simpl.
-Qed.
-
-(** Argument locations agree in number with the function signature. *)
-
-Lemma loc_arguments_length:
-  forall sig,
-  List.length (loc_arguments sig) = List.length sig.(sig_args).
-Proof.
-  intros. unfold loc_arguments. generalize (sig_args sig) 0.
-  induction l; simpl; intros. auto. destruct a; simpl; decEq; auto.
-Qed.
-
-(** Argument locations agree in types with the function signature. *)
-
-Lemma loc_arguments_type:
-  forall sig, List.map Loc.type (loc_arguments sig) = sig.(sig_args).
-Proof.
-  intros. unfold loc_arguments. generalize (sig_args sig) 0. 
-  induction l; simpl; intros. auto. destruct a; simpl; decEq; auto.
-Qed.
diff --git a/ia32/standard/Stacklayout.v b/ia32/standard/Stacklayout.v
index be854fd..f9d1daf 100644
--- a/ia32/standard/Stacklayout.v
+++ b/ia32/standard/Stacklayout.v
@@ -19,10 +19,9 @@ Require Import Bounds.
   from bottom (lowest offsets) to top:
 - Space for outgoing arguments to function calls.
 - Back link to parent frame
-- Local stack slots of integer type.
 - Saved values of integer callee-save registers used by the function.
-- Local stack slots of float type.
 - Saved values of float callee-save registers used by the function.
+- Local stack slots.
 - Space for the stack-allocated data declared in Cminor
 - Return address.
 
@@ -36,10 +35,9 @@ Record frame_env : Type := mk_frame_env {
   fe_size: Z;
   fe_ofs_link: Z;
   fe_ofs_retaddr: Z;
-  fe_ofs_int_local: Z;
+  fe_ofs_local: Z;
   fe_ofs_int_callee_save: Z;
   fe_num_int_callee_save: Z;
-  fe_ofs_float_local: Z;
   fe_ofs_float_callee_save: Z;
   fe_num_float_callee_save: Z;
   fe_stack_data: Z
@@ -50,17 +48,16 @@ Record frame_env : Type := mk_frame_env {
 
 Definition make_env (b: bounds) :=
   let olink := 4 * b.(bound_outgoing) in  (* back link *)
-  let oil := olink + 4 in              (* integer locals *)
-  let oics := oil + 4 * b.(bound_int_local) in (* integer callee-saves *)
-  let oendi := oics + 4 * b.(bound_int_callee_save) in
-  let ofl := align oendi 8 in             (* float locals *)
-  let ofcs := ofl + 8 * b.(bound_float_local) in (* float callee-saves *)
-  let ostkdata := ofcs + 8 * b.(bound_float_callee_save) in (* stack data *)
+  let oics := olink + 4 in                (* integer callee-saves *)
+  let ofcs := align (oics + 4 * b.(bound_int_callee_save)) 8 in (* float callee-saves *)
+  let ol := ofcs + 8 * b.(bound_float_callee_save) in (* locals *)
+  let ostkdata := align (ol + 4 * b.(bound_local)) 8 in (* stack data *)
   let oretaddr := align (ostkdata + b.(bound_stack_data)) 4 in (* return address *)
   let sz := oretaddr + 4 in (* total size *)
   mk_frame_env sz olink oretaddr
-                  oil oics b.(bound_int_callee_save)
-                  ofl ofcs b.(bound_float_callee_save)
+                  ol
+                  oics b.(bound_int_callee_save)
+                  ofcs b.(bound_float_callee_save)
                   ostkdata.
 
 (** Separation property *)
@@ -70,25 +67,23 @@ Remark frame_env_separated:
   let fe := make_env b in
   0 <= fe_ofs_arg
   /\ fe_ofs_arg + 4 * b.(bound_outgoing) <= fe.(fe_ofs_link)
-  /\ fe.(fe_ofs_link) + 4 <= fe.(fe_ofs_int_local)
-  /\ fe.(fe_ofs_int_local) + 4 * b.(bound_int_local) <= fe.(fe_ofs_int_callee_save)
-  /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_local)
-  /\ fe.(fe_ofs_float_local) + 8 * b.(bound_float_local) <= fe.(fe_ofs_float_callee_save)
-  /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_stack_data)
+  /\ fe.(fe_ofs_link) + 4 <= fe.(fe_ofs_int_callee_save)
+  /\ fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save) <= fe.(fe_ofs_float_callee_save)
+  /\ fe.(fe_ofs_float_callee_save) + 8 * b.(bound_float_callee_save) <= fe.(fe_ofs_local)
+  /\ fe.(fe_ofs_local) + 4 * b.(bound_local) <= fe.(fe_stack_data)
   /\ fe.(fe_stack_data) + b.(bound_stack_data) <= fe.(fe_ofs_retaddr)
   /\ fe.(fe_ofs_retaddr) + 4 <= fe.(fe_size).
 Proof.
   intros.
   generalize (align_le (fe.(fe_ofs_int_callee_save) + 4 * b.(bound_int_callee_save)) 8 (refl_equal _)).
+  generalize (align_le (fe.(fe_ofs_local) + 4 * b.(bound_local)) 8 (refl_equal _)).
   generalize (align_le (fe.(fe_stack_data) + b.(bound_stack_data)) 4 (refl_equal _)).
   unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
-    fe_ofs_int_local, fe_ofs_int_callee_save,
-    fe_num_int_callee_save,
-    fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+    fe_ofs_local, fe_ofs_int_callee_save, fe_num_int_callee_save,
+    fe_ofs_float_callee_save, fe_num_float_callee_save,
     fe_stack_data, fe_ofs_arg.
   intros.
-  generalize (bound_int_local_pos b); intro;
-  generalize (bound_float_local_pos b); intro;
+  generalize (bound_local_pos b); intro;
   generalize (bound_int_callee_save_pos b); intro;
   generalize (bound_float_callee_save_pos b); intro;
   generalize (bound_outgoing_pos b); intro;
@@ -102,38 +97,34 @@ Remark frame_env_aligned:
   forall b,
   let fe := make_env b in
   (4 | fe.(fe_ofs_link))
-  /\ (4 | fe.(fe_ofs_int_local))
   /\ (4 | fe.(fe_ofs_int_callee_save))
-  /\ (8 | fe.(fe_ofs_float_local))
   /\ (8 | fe.(fe_ofs_float_callee_save))
-  /\ (4 | fe.(fe_ofs_retaddr))
+  /\ (8 | fe.(fe_ofs_local))
   /\ (8 | fe.(fe_stack_data))
+  /\ (4 | fe.(fe_ofs_retaddr))
   /\ (4 | fe.(fe_size)).
 Proof.
   intros.
   unfold fe, make_env, fe_size, fe_ofs_link, fe_ofs_retaddr,
-    fe_ofs_int_local, fe_ofs_int_callee_save,
+    fe_ofs_local, fe_ofs_int_callee_save,
     fe_num_int_callee_save,
-    fe_ofs_float_local, fe_ofs_float_callee_save, fe_num_float_callee_save,
+    fe_ofs_float_callee_save, fe_num_float_callee_save,
     fe_stack_data.
   set (x1 := 4 * bound_outgoing b).
   assert (4 | x1). unfold x1; exists (bound_outgoing b); ring.
   set (x2 := x1 + 4).
   assert (4 | x2). unfold x2; apply Zdivide_plus_r; auto. exists 1; auto.
-  set (x3 := x2 + 4 * bound_int_local b).
-  assert (4 | x3). unfold x2; apply Zdivide_plus_r; auto. exists (bound_int_local b); ring.
-  set (x4 := x3 + 4 * bound_int_callee_save b).
-  set (x5 := align x4 8).
-  assert (8 | x5). unfold x5. apply align_divides. omega.
-  set (x6 := x5 + 8 * bound_float_local b).
-  assert (8 | x6). unfold x6. apply Zdivide_plus_r; auto. exists (bound_float_local b); ring.
-  set (x7 := x6 + 8 * bound_float_callee_save b).
-  assert (8 | x7).
-    unfold x7. apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
-  set (x8 := align (x7 + bound_stack_data b) 4).
-  assert (4 | x8). apply align_divides. omega.
-  set (x9 := x8 + 4).
-  assert (4 | x9). unfold x8; apply Zdivide_plus_r; auto. exists 1; auto.
+  set (x3 := x2 + 4 * bound_int_callee_save b).
+  set (x4 := align x3 8).
+  assert (8 | x4). unfold x4. apply align_divides. omega.
+  set (x5 := x4 + 8 * bound_float_callee_save b).
+  assert (8 | x5). unfold x5; apply Zdivide_plus_r; auto. exists (bound_float_callee_save b); ring.
+  set (x6 := align (x5 + 4 * bound_local b) 8).
+  assert (8 | x6). unfold x6; apply align_divides; omega.
+  set (x7 := align (x6 + bound_stack_data b) 4).
+  assert (4 | x7). unfold x7; apply align_divides; omega.
+  set (x8 := x7 + 4).
+  assert (4 | x8). unfold x8; apply Zdivide_plus_r; auto. exists 1; auto.
   tauto.
 Qed.
author	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2013-04-20 07:54:52 +0000
committer	xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>	2013-04-20 07:54:52 +0000
commit	255cee09b71255051c2b40eae0c88bffce1f6f32 (patch)
tree	7951b1b13e8fd5e525b9223e8be0580e83550f55 /ia32
parent	6e5041958df01c56762e90770abd704b95a36e5d (diff)