summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--common/Switch.v2
-rw-r--r--driver/Clflags.ml2
-rw-r--r--driver/Driver.ml6
-rw-r--r--lib/Floats.v3
-rw-r--r--powerpc/Asmgenproof.v1
-rw-r--r--powerpc/Asmgenproof1.v15
-rw-r--r--powerpc/Op.v3
-rw-r--r--powerpc/PrintAsm.ml56
8 files changed, 59 insertions, 29 deletions
diff --git a/common/Switch.v b/common/Switch.v
index 1b3ca9b..0ba6186 100644
--- a/common/Switch.v
+++ b/common/Switch.v
@@ -250,7 +250,7 @@ Proof.
rewrite Int.sub_add_opp. rewrite Int.add_permut. rewrite Int.add_assoc.
replace (Int.add Int.one (Int.neg Int.one)) with Int.zero.
rewrite Int.add_zero. apply Int.add_commut.
- apply Int.mkint_eq. reflexivity.
+ rewrite Int.add_neg_zero; auto.
Qed.
Lemma validate_jumptable_correct:
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index d38a398..a433c59 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -24,6 +24,8 @@ let option_fpacked_structs = ref false
let option_fsse = ref true
let option_ffloatconstprop = ref 2
let option_falignfunctions = ref (None: int option)
+let option_falignbranchtargets = ref 0
+let option_faligncondbranchs = ref 0
let option_finline_asm = ref false
let option_dparse = ref false
let option_dcmedium = ref false
diff --git a/driver/Driver.ml b/driver/Driver.ml
index e0ed27f..3a5981e 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -391,7 +391,9 @@ Code generation options: (use -fno-<opt> to turn off -f<opt>) :
-fsmall-const <n> Set maximal size <n> for allocation in small constant area
-ffloat-const-prop <n> Control constant propagation of floats
(<n>=0: none, <n>=1: limited, <n>=2: full; default is full)
- -falign_functions <n> Set alignment (in bytes) of function entry points
+ -falign-functions <n> Set alignment (in bytes) of function entry points
+ -falign-branch-targets <n> Set alignment (in bytes) of branch targets
+ -falign-cond-branches <n> Set alignment (in bytes) of conditional branches
-Wa,<opt> Pass option <opt> to the assembler
Tracing options:
-dparse Save C file after parsing and elaboration in <file>.parse.c
@@ -485,6 +487,8 @@ let cmdline_actions =
"-fsmall-const$", Integer(fun n -> option_small_const := n);
"-ffloat-const-prop$", Integer(fun n -> option_ffloatconstprop := n);
"-falign-functions$", Integer(fun n -> option_falignfunctions := Some n);
+ "-falign-branch-targets", Integer(fun n -> option_falignbranchtargets := n);
+ "-falign-cond-branches", Integer(fun n -> option_faligncondbranchs := n);
"-fall$", Self (fun _ ->
List.iter (fun r -> r := true) language_support_options);
"-fnone$", Self (fun _ ->
diff --git a/lib/Floats.v b/lib/Floats.v
index 2c23d45..63375ff 100644
--- a/lib/Floats.v
+++ b/lib/Floats.v
@@ -816,8 +816,9 @@ Proof.
pose proof (Bminus_correct 53 1024 eq_refl eq_refl mode_NE x y) end.
apply (fun f x y => f x y) in H3; try apply (fun x => proj2 (from_words_value x)).
do 2 rewrite (fun x => proj1 (from_words_value x)) in H3.
+ rewrite Int.unsigned_zero in H3.
replace (bpow radix2 52 + Z2R (Int.unsigned x) -
- (bpow radix2 52 + Z2R (Int.unsigned Int.zero)))%R with (Z2R (Int.unsigned x)) in H3 by (simpl; ring).
+ (bpow radix2 52 + Z2R 0))%R with (Z2R (Int.unsigned x)) in H3 by (simpl; ring).
rewrite round_exact in H3 by smart_omega.
match goal with [H3:if Rlt_bool ?x ?y then _ else _ |- _] =>
pose proof (Rlt_bool_spec x y); destruct (Rlt_bool x y) end; destruct H3.
diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v
index e99049c..de9decb 100644
--- a/powerpc/Asmgenproof.v
+++ b/powerpc/Asmgenproof.v
@@ -385,6 +385,7 @@ Remark rolm_label:
Proof.
intros; unfold rolm.
case (is_rlw_mask mask). reflexivity.
+Opaque Int.eq.
simpl. autorewrite with labels. auto.
Qed.
Hint Rewrite rolm_label: labels.
diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v
index f1c206e..56cb224 100644
--- a/powerpc/Asmgenproof1.v
+++ b/powerpc/Asmgenproof1.v
@@ -75,9 +75,11 @@ Qed.
Lemma low_high_s:
forall n, Int.add (Int.shl (high_s n) (Int.repr 16)) (low_s n) = n.
Proof.
- intros. rewrite Int.shl_mul_two_p.
+ intros.
+ rewrite Int.shl_mul_two_p.
unfold high_s.
rewrite <- (Int.divu_pow2 (Int.sub n (low_s n)) (Int.repr 65536) (Int.repr 16)).
+ 2: reflexivity.
change (two_p (Int.unsigned (Int.repr 16))) with 65536.
set (x := Int.sub n (low_s n)).
assert (x = Int.add (Int.mul (Int.divu x (Int.repr 65536)) (Int.repr 65536))
@@ -88,9 +90,10 @@ Proof.
change 0 with (0 mod 65536).
change (Int.unsigned (Int.repr 65536)) with 65536.
apply Int.eqmod_mod_eq. omega.
- unfold x, low_s. eapply Int.eqmod_trans.
- eapply Int.eqm_eqmod_two_p with (n := 16). compute; auto.
- unfold Int.sub. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl.
+ unfold x, low_s. eapply Int.eqmod_trans.
+ apply Int.eqmod_divides with Int.modulus.
+ unfold Int.sub. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl.
+ exists 65536. compute; auto.
replace 0 with (Int.unsigned n - Int.unsigned n) by omega.
apply Int.eqmod_sub. apply Int.eqmod_refl. apply Int.eqmod_sign_ext'.
compute; auto.
@@ -98,7 +101,6 @@ Proof.
rewrite <- H. unfold x. rewrite Int.sub_add_opp. rewrite Int.add_assoc.
rewrite (Int.add_commut (Int.neg (low_s n))). rewrite <- Int.sub_add_opp.
rewrite Int.sub_idem. apply Int.add_zero.
- reflexivity.
Qed.
(** * Correspondence between Mach registers and PPC registers *)
@@ -998,6 +1000,7 @@ Lemma transl_cond_correct_1:
/\ forall r, is_data_reg r = true -> rs'#r = rs#r.
Proof.
intros.
+Opaque Int.eq.
destruct cond; simpl in H; TypeInv; simpl; UseTypeInfo.
(* Ccomp *)
fold (Val.cmp c (rs (ireg_of m0)) (rs (ireg_of m1))).
@@ -1171,6 +1174,7 @@ Proof.
rewrite (Int.add_commut (Int.neg (Int.add i Int.mone))).
rewrite <- Int.sub_add_opp. rewrite Int.sub_add_r. rewrite Int.sub_idem.
rewrite Int.add_zero_l. rewrite Int.add_neg_zero. rewrite Int.add_zero_l.
+Transparent Int.eq.
unfold Int.add_carry, Int.eq.
rewrite Int.unsigned_zero. rewrite Int.unsigned_mone.
unfold negb, Val.of_bool, Vtrue, Vfalse.
@@ -1269,6 +1273,7 @@ Proof.
destruct (mreg_type r1); apply exec_straight_one; auto.
split. repeat SIMP. intros; repeat SIMP.
(* Other instructions *)
+Opaque Int.eq.
destruct op; simpl; simpl in H3; injection H3; clear H3; intros;
TypeInv; simpl in *; UseTypeInfo; inv EV; try (TranslOpSimpl).
(* Ointconst *)
diff --git a/powerpc/Op.v b/powerpc/Op.v
index 58bcb2c..4a1fb62 100644
--- a/powerpc/Op.v
+++ b/powerpc/Op.v
@@ -1044,8 +1044,7 @@ Fixpoint is_rlw_mask_rec (n: nat) (s: rlw_state) (x: Z) {struct n} : bool :=
| O =>
rlw_accepting s
| S m =>
- let (b, y) := Int.Z_bin_decomp x in
- is_rlw_mask_rec m (rlw_transition s b) y
+ is_rlw_mask_rec m (rlw_transition s (Z.odd x)) (Z.div2 x)
end.
Definition is_rlw_mask (x: int) : bool :=
diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml
index 0d2d201..2ba3cfe 100644
--- a/powerpc/PrintAsm.ml
+++ b/powerpc/PrintAsm.ml
@@ -219,12 +219,6 @@ let rolm_mask n =
assert (!count = 2 || (!count = 0 && !last));
(!mb, !me-1)
-(* Base-2 log of a Caml integer *)
-
-let rec log2 n =
- assert (n > 0);
- if n = 1 then 0 else 1 + log2 (n lsr 1)
-
(* Built-ins. They come in three flavors:
- annotation statements: take their arguments in registers or stack
locations; generate no code;
@@ -489,7 +483,7 @@ let short_cond_branch tbl pc lbl_dest =
let float_literals : (int * int64) list ref = ref []
let jumptables : (int * label list) list ref = ref []
-let print_instruction oc tbl pc = function
+let print_instruction oc tbl pc fallthrough = function
| Padd(r1, r2, r3) ->
fprintf oc " add %a, %a, %a\n" ireg r1 ireg r2 ireg r3
| Padde(r1, r2, r3) ->
@@ -529,6 +523,8 @@ let print_instruction oc tbl pc = function
| Pbctrl ->
fprintf oc " bctrl\n"
| Pbf(bit, lbl) ->
+ if !Clflags.option_faligncondbranchs > 0 then
+ fprintf oc " .balign %d\n" !Clflags.option_faligncondbranchs;
if short_cond_branch tbl pc lbl then
fprintf oc " bf %a, %a\n" crbit bit label (transl_label lbl)
else begin
@@ -544,6 +540,8 @@ let print_instruction oc tbl pc = function
| Pblr ->
fprintf oc " blr\n"
| Pbt(bit, lbl) ->
+ if !Clflags.option_faligncondbranchs > 0 then
+ fprintf oc " .balign %d\n" !Clflags.option_faligncondbranchs;
if short_cond_branch tbl pc lbl then
fprintf oc " bt %a, %a\n" crbit bit label (transl_label lbl)
else begin
@@ -729,6 +727,8 @@ let print_instruction oc tbl pc = function
| Pxoris(r1, r2, c) ->
fprintf oc " xoris %a, %a, %a\n" ireg r1 ireg r2 constant c
| Plabel lbl ->
+ if (not fallthrough) && !Clflags.option_falignbranchtargets > 0 then
+ fprintf oc " .balign %d\n" !Clflags.option_falignbranchtargets;
fprintf oc "%a:\n" label (transl_label lbl)
| Pbuiltin(ef, args, res) ->
begin match ef with
@@ -762,6 +762,15 @@ let print_instruction oc tbl pc = function
assert false
end
+(* Determine if an instruction falls through *)
+
+let instr_fall_through = function
+ | Pb _ -> false
+ | Pbctr -> false
+ | Pbs _ -> false
+ | Pblr -> false
+ | _ -> true
+
(* Estimate the size of an Asm instruction encoding, in number of actual
PowerPC instructions. We can over-approximate. *)
@@ -782,6 +791,7 @@ let instr_size = function
| EF_builtin(name, sg) ->
begin match extern_atom name with
| "__builtin_bswap" -> 3
+ | "__builtin_fcti" -> 4
| _ -> 1
end
| EF_vload chunk ->
@@ -813,11 +823,11 @@ let rec label_positions tbl pc = function
(* Emit a sequence of instructions *)
-let rec print_instructions oc tbl pc = function
+let rec print_instructions oc tbl pc fallthrough = function
| [] -> ()
| i :: c ->
- print_instruction oc tbl pc i;
- print_instructions oc tbl (pc + instr_size i) c
+ print_instruction oc tbl pc fallthrough i;
+ print_instructions oc tbl (pc + instr_size i) (instr_fall_through i) c
(* Print the code for a function *)
@@ -842,23 +852,23 @@ let print_function oc name code =
| _ -> (Section_text, Section_literal, Section_jumptable) in
section oc text;
let alignment =
- match !Clflags.option_falignfunctions with Some n -> log2 n | None -> 2 in
- fprintf oc " .align %d\n" alignment;
+ match !Clflags.option_falignfunctions with Some n -> n | None -> 4 in
+ fprintf oc " .balign %d\n" alignment;
if not (C2C.atom_is_static name) then
fprintf oc " .globl %a\n" symbol name;
fprintf oc "%a:\n" symbol name;
- print_instructions oc (label_positions PTree.empty 0 code) 0 code;
+ print_instructions oc (label_positions PTree.empty 0 code) 0 true code;
fprintf oc " .type %a, @function\n" symbol name;
fprintf oc " .size %a, . - %a\n" symbol name symbol name;
if !float_literals <> [] then begin
section oc lit;
- fprintf oc " .align 3\n";
+ fprintf oc " .balign 8\n";
List.iter (print_literal oc) !float_literals;
float_literals := []
end;
if !jumptables <> [] then begin
section oc jmptbl;
- fprintf oc " .align 2\n";
+ fprintf oc " .balign 4\n";
List.iter (print_jumptable oc) !jumptables;
jumptables := []
end
@@ -871,7 +881,7 @@ let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$"
let variadic_stub oc stub_name fun_name args =
section oc Section_text;
- fprintf oc " .align 2\n";
+ fprintf oc " .balign 4\n";
fprintf oc ".L%s$stub:\n" stub_name;
(* bit 6 must be set if at least one argument is a float; clear otherwise *)
if List.mem Tfloat args
@@ -948,13 +958,13 @@ let print_var oc name v =
| _ -> Section_data true
and align =
match C2C.atom_alignof name with
- | Some a -> log2 a
- | None -> 3 in (* 8-alignment is a safe default *)
+ | Some a -> a
+ | None -> 8 in (* 8-alignment is a safe default *)
let name_sec =
name_of_section sec in
if name_sec <> "COMM" then begin
fprintf oc " %s\n" name_sec;
- fprintf oc " .align %d\n" align;
+ fprintf oc " .balign %d\n" align;
if not (C2C.atom_is_static name) then
fprintf oc " .globl %a\n" symbol name;
fprintf oc "%a:\n" symbol name;
@@ -976,8 +986,16 @@ let print_globdef oc (name, gdef) =
| Gfun f -> print_fundef oc name f
| Gvar v -> print_var oc name v
+let print_prologue oc =
+ match target with
+ | Linux ->
+ ()
+ | Diab ->
+ fprintf oc " .xopt align-fill-text = 0x60000000\n"
+
let print_program oc p =
stubbed_functions := IdentSet.empty;
List.iter record_extfun p.prog_defs;
+ print_prologue oc;
List.iter (print_globdef oc) p.prog_defs