8 files changed, 59 insertions, 29 deletions
diff --git a/common/Switch.v b/common/Switch.v
index 1b3ca9b..0ba6186 100644
--- a/common/Switch.v
+++ b/common/Switch.v
@@ -250,7 +250,7 @@ Proof.
   rewrite Int.sub_add_opp. rewrite Int.add_permut. rewrite Int.add_assoc. 
   replace (Int.add Int.one (Int.neg Int.one)) with Int.zero.
   rewrite Int.add_zero. apply Int.add_commut.
-  apply Int.mkint_eq. reflexivity.
+  rewrite Int.add_neg_zero; auto.
 Qed.
 
 Lemma validate_jumptable_correct:
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index d38a398..a433c59 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -24,6 +24,8 @@ let option_fpacked_structs = ref false
 let option_fsse = ref true
 let option_ffloatconstprop = ref 2
 let option_falignfunctions = ref (None: int option)
+let option_falignbranchtargets = ref 0
+let option_faligncondbranchs = ref 0
 let option_finline_asm = ref false
 let option_dparse = ref false
 let option_dcmedium = ref false
diff --git a/driver/Driver.ml b/driver/Driver.ml
index e0ed27f..3a5981e 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -391,7 +391,9 @@ Code generation options: (use -fno-<opt> to turn off -f<opt>) :
   -fsmall-const <n>  Set maximal size <n> for allocation in small constant area
   -ffloat-const-prop <n>  Control constant propagation of floats
                    (<n>=0: none, <n>=1: limited, <n>=2: full; default is full)
-  -falign_functions <n>  Set alignment (in bytes) of function entry points
+  -falign-functions <n>  Set alignment (in bytes) of function entry points
+  -falign-branch-targets <n>  Set alignment (in bytes) of branch targets
+  -falign-cond-branches <n>  Set alignment (in bytes) of conditional branches
   -Wa,<opt>      Pass option <opt> to the assembler
 Tracing options:
   -dparse        Save C file after parsing and elaboration in <file>.parse.c
@@ -485,6 +487,8 @@ let cmdline_actions =
   "-fsmall-const$", Integer(fun n -> option_small_const := n);
   "-ffloat-const-prop$", Integer(fun n -> option_ffloatconstprop := n);
   "-falign-functions$", Integer(fun n -> option_falignfunctions := Some n);
+  "-falign-branch-targets", Integer(fun n -> option_falignbranchtargets := n);
+  "-falign-cond-branches", Integer(fun n -> option_faligncondbranchs := n);
   "-fall$", Self (fun _ ->
               List.iter (fun r -> r := true) language_support_options);
   "-fnone$", Self (fun _ ->
diff --git a/lib/Floats.v b/lib/Floats.v
index 2c23d45..63375ff 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -816,8 +816,9 @@ Proof.
     pose proof (Bminus_correct 53 1024 eq_refl eq_refl mode_NE x y) end.
   apply (fun f x y => f x y) in H3; try apply (fun x => proj2 (from_words_value x)).
   do 2 rewrite (fun x => proj1 (from_words_value x)) in H3.
+  rewrite Int.unsigned_zero in H3.
   replace (bpow radix2 52 + Z2R (Int.unsigned x) -
-    (bpow radix2 52 + Z2R (Int.unsigned Int.zero)))%R with (Z2R (Int.unsigned x)) in H3 by (simpl; ring).
+    (bpow radix2 52 + Z2R 0))%R with (Z2R (Int.unsigned x)) in H3 by (simpl; ring).
   rewrite round_exact in H3 by smart_omega.
   match goal with [H3:if Rlt_bool ?x ?y then _ else _ |- _] => 
     pose proof (Rlt_bool_spec x y); destruct (Rlt_bool x y) end; destruct H3. 
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index e99049c..de9decb 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -385,6 +385,7 @@ Remark rolm_label:
 Proof.
   intros; unfold rolm. 
   case (is_rlw_mask mask). reflexivity.
+Opaque Int.eq.
   simpl. autorewrite with labels. auto.
 Qed.
 Hint Rewrite rolm_label: labels.
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index f1c206e..56cb224 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -75,9 +75,11 @@ Qed.
 Lemma low_high_s:
   forall n, Int.add (Int.shl (high_s n) (Int.repr 16)) (low_s n) = n.
 Proof.
-  intros. rewrite Int.shl_mul_two_p. 
+  intros.
+  rewrite Int.shl_mul_two_p. 
   unfold high_s. 
   rewrite <- (Int.divu_pow2 (Int.sub n (low_s n)) (Int.repr 65536) (Int.repr 16)).
+  2: reflexivity.
   change (two_p (Int.unsigned (Int.repr 16))) with 65536.
   set (x := Int.sub n (low_s n)).
   assert (x = Int.add (Int.mul (Int.divu x (Int.repr 65536)) (Int.repr 65536))
@@ -88,9 +90,10 @@ Proof.
     change 0 with (0 mod 65536).
     change (Int.unsigned (Int.repr 65536)) with 65536.
     apply Int.eqmod_mod_eq. omega. 
-    unfold x, low_s. eapply Int.eqmod_trans. 
-    eapply Int.eqm_eqmod_two_p with (n := 16). compute; auto.
-    unfold Int.sub. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl. 
+    unfold x, low_s. eapply Int.eqmod_trans.
+    apply Int.eqmod_divides with Int.modulus.
+    unfold Int.sub. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl.
+    exists 65536. compute; auto.
     replace 0 with (Int.unsigned n - Int.unsigned n) by omega.
     apply Int.eqmod_sub. apply Int.eqmod_refl. apply Int.eqmod_sign_ext'. 
     compute; auto.
@@ -98,7 +101,6 @@ Proof.
   rewrite <- H. unfold x. rewrite Int.sub_add_opp. rewrite Int.add_assoc.
   rewrite (Int.add_commut (Int.neg (low_s n))). rewrite <- Int.sub_add_opp. 
   rewrite Int.sub_idem. apply Int.add_zero.
-  reflexivity.
 Qed.
 
 (** * Correspondence between Mach registers and PPC registers *)
@@ -998,6 +1000,7 @@ Lemma transl_cond_correct_1:
   /\ forall r, is_data_reg r = true -> rs'#r = rs#r.
 Proof.
   intros.
+Opaque Int.eq.
   destruct cond; simpl in H; TypeInv; simpl; UseTypeInfo.
   (* Ccomp *)
   fold (Val.cmp c (rs (ireg_of m0)) (rs (ireg_of m1))).
@@ -1171,6 +1174,7 @@ Proof.
   rewrite (Int.add_commut  (Int.neg (Int.add i Int.mone))).
   rewrite <- Int.sub_add_opp. rewrite Int.sub_add_r. rewrite Int.sub_idem. 
   rewrite Int.add_zero_l. rewrite Int.add_neg_zero. rewrite Int.add_zero_l.
+Transparent Int.eq.
   unfold Int.add_carry, Int.eq. 
   rewrite Int.unsigned_zero.  rewrite Int.unsigned_mone.
   unfold negb, Val.of_bool, Vtrue, Vfalse.
@@ -1269,6 +1273,7 @@ Proof.
   destruct (mreg_type r1); apply exec_straight_one; auto.
   split. repeat SIMP. intros; repeat SIMP.
   (* Other instructions *)
+Opaque Int.eq.
   destruct op; simpl; simpl in H3; injection H3; clear H3; intros;
   TypeInv; simpl in *; UseTypeInfo; inv EV; try (TranslOpSimpl).
   (* Ointconst *)
diff --git a/powerpc/Op.v b/powerpc/Op.v
index 58bcb2c..4a1fb62 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -1044,8 +1044,7 @@ Fixpoint is_rlw_mask_rec (n: nat) (s: rlw_state) (x: Z) {struct n} : bool :=
   | O =>
       rlw_accepting s
   | S m =>
-      let (b, y) := Int.Z_bin_decomp x in
-      is_rlw_mask_rec m (rlw_transition s b) y
+      is_rlw_mask_rec m (rlw_transition s (Z.odd x)) (Z.div2 x)
   end.
 
 Definition is_rlw_mask (x: int) : bool :=
diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml
index 0d2d201..2ba3cfe 100644
--- a/powerpc/PrintAsm.ml
+++ b/powerpc/PrintAsm.ml
@@ -219,12 +219,6 @@ let rolm_mask n =
   assert (!count = 2 || (!count = 0 && !last));
   (!mb, !me-1)
 
-(* Base-2 log of a Caml integer *)
-
-let rec log2 n =
-  assert (n > 0);
-  if n = 1 then 0 else 1 + log2 (n lsr 1)
-
 (* Built-ins.  They come in three flavors: 
    - annotation statements: take their arguments in registers or stack
      locations; generate no code;
@@ -489,7 +483,7 @@ let short_cond_branch tbl pc lbl_dest =
 let float_literals : (int * int64) list ref = ref []
 let jumptables : (int * label list) list ref = ref []
 
-let print_instruction oc tbl pc = function
+let print_instruction oc tbl pc fallthrough = function
   | Padd(r1, r2, r3) ->
       fprintf oc "	add	%a, %a, %a\n" ireg r1 ireg r2 ireg r3
   | Padde(r1, r2, r3) ->
@@ -529,6 +523,8 @@ let print_instruction oc tbl pc = function
   | Pbctrl ->
       fprintf oc "	bctrl\n"
   | Pbf(bit, lbl) ->
+      if !Clflags.option_faligncondbranchs > 0 then
+        fprintf oc "	.balign	%d\n" !Clflags.option_faligncondbranchs;
       if short_cond_branch tbl pc lbl then
         fprintf oc "	bf	%a, %a\n" crbit bit label (transl_label lbl)
       else begin
@@ -544,6 +540,8 @@ let print_instruction oc tbl pc = function
   | Pblr ->
       fprintf oc "	blr\n"
   | Pbt(bit, lbl) ->
+      if !Clflags.option_faligncondbranchs > 0 then
+        fprintf oc "	.balign	%d\n" !Clflags.option_faligncondbranchs;
       if short_cond_branch tbl pc lbl then
         fprintf oc "	bt	%a, %a\n" crbit bit label (transl_label lbl)
       else begin
@@ -729,6 +727,8 @@ let print_instruction oc tbl pc = function
   | Pxoris(r1, r2, c) ->
       fprintf oc "	xoris	%a, %a, %a\n" ireg r1 ireg r2 constant c
   | Plabel lbl ->
+      if (not fallthrough) && !Clflags.option_falignbranchtargets > 0 then
+        fprintf oc "	.balign	%d\n" !Clflags.option_falignbranchtargets;
       fprintf oc "%a:\n" label (transl_label lbl)
   | Pbuiltin(ef, args, res) ->
       begin match ef with
@@ -762,6 +762,15 @@ let print_instruction oc tbl pc = function
           assert false
       end
 
+(* Determine if an instruction falls through *)
+
+let instr_fall_through = function
+  | Pb _ -> false
+  | Pbctr -> false
+  | Pbs _ -> false
+  | Pblr -> false
+  | _ -> true
+
 (* Estimate the size of an Asm instruction encoding, in number of actual
    PowerPC instructions.  We can over-approximate. *)
 
@@ -782,6 +791,7 @@ let instr_size = function
       | EF_builtin(name, sg) ->
           begin match extern_atom name with
           | "__builtin_bswap" -> 3
+          | "__builtin_fcti" -> 4
           | _ -> 1
           end
       | EF_vload chunk ->
@@ -813,11 +823,11 @@ let rec label_positions tbl pc = function
 
 (* Emit a sequence of instructions *)
 
-let rec print_instructions oc tbl pc = function
+let rec print_instructions oc tbl pc fallthrough = function
   | [] -> ()
   | i :: c ->
-      print_instruction oc tbl pc i;
-      print_instructions oc tbl (pc + instr_size i) c
+      print_instruction oc tbl pc fallthrough i;
+      print_instructions oc tbl (pc + instr_size i) (instr_fall_through i) c
 
 (* Print the code for a function *)
 
@@ -842,23 +852,23 @@ let print_function oc name code =
     |    _    -> (Section_text, Section_literal, Section_jumptable) in
   section oc text;
   let alignment =
-    match !Clflags.option_falignfunctions with Some n -> log2 n | None -> 2 in
-  fprintf oc "	.align %d\n" alignment;
+    match !Clflags.option_falignfunctions with Some n -> n | None -> 4 in
+  fprintf oc "	.balign %d\n" alignment;
   if not (C2C.atom_is_static name) then
     fprintf oc "	.globl %a\n" symbol name;
   fprintf oc "%a:\n" symbol name;
-  print_instructions oc (label_positions PTree.empty 0 code) 0 code;
+  print_instructions oc (label_positions PTree.empty 0 code) 0 true code;
   fprintf oc "	.type	%a, @function\n" symbol name;
   fprintf oc "	.size	%a, . - %a\n" symbol name symbol name;
   if !float_literals <> [] then begin
     section oc lit;
-    fprintf oc "	.align 3\n";
+    fprintf oc "	.balign 8\n";
     List.iter (print_literal oc) !float_literals;
     float_literals := []
   end;
   if !jumptables <> [] then begin
     section oc jmptbl;
-    fprintf oc "	.align 2\n";
+    fprintf oc "	.balign 4\n";
     List.iter (print_jumptable oc) !jumptables;
     jumptables := []
   end
@@ -871,7 +881,7 @@ let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$"
 
 let variadic_stub oc stub_name fun_name args =
   section oc Section_text;
-  fprintf oc "	.align 2\n";
+  fprintf oc "	.balign 4\n";
   fprintf oc ".L%s$stub:\n" stub_name;
   (* bit 6 must be set if at least one argument is a float; clear otherwise *)
   if List.mem Tfloat args
@@ -948,13 +958,13 @@ let print_var oc name v =
         |  _  -> Section_data true
       and align =
         match C2C.atom_alignof name with
-        | Some a -> log2 a
-        | None -> 3 in (* 8-alignment is a safe default *)
+        | Some a -> a
+        | None -> 8 in (* 8-alignment is a safe default *)
       let name_sec =
         name_of_section sec in
       if name_sec <> "COMM" then begin
         fprintf oc "	%s\n" name_sec;
-        fprintf oc "	.align	%d\n" align;
+        fprintf oc "	.balign	%d\n" align;
         if not (C2C.atom_is_static name) then
           fprintf oc "	.globl	%a\n" symbol name;
         fprintf oc "%a:\n" symbol name;
@@ -976,8 +986,16 @@ let print_globdef oc (name, gdef) =
   | Gfun f -> print_fundef oc name f
   | Gvar v -> print_var oc name v
 
+let print_prologue oc =
+  match target with
+  | Linux ->
+      ()
+  | Diab ->
+      fprintf oc "	.xopt	align-fill-text = 0x60000000\n"
+
 let print_program oc p =
   stubbed_functions := IdentSet.empty;
   List.iter record_extfun p.prog_defs;
+  print_prologue oc;
   List.iter (print_globdef oc) p.prog_defs