diff options
-rw-r--r-- | common/Switch.v | 2 | ||||
-rw-r--r-- | driver/Clflags.ml | 2 | ||||
-rw-r--r-- | driver/Driver.ml | 6 | ||||
-rw-r--r-- | lib/Floats.v | 3 | ||||
-rw-r--r-- | powerpc/Asmgenproof.v | 1 | ||||
-rw-r--r-- | powerpc/Asmgenproof1.v | 15 | ||||
-rw-r--r-- | powerpc/Op.v | 3 | ||||
-rw-r--r-- | powerpc/PrintAsm.ml | 56 |
8 files changed, 59 insertions, 29 deletions
diff --git a/common/Switch.v b/common/Switch.v index 1b3ca9b..0ba6186 100644 --- a/common/Switch.v +++ b/common/Switch.v @@ -250,7 +250,7 @@ Proof. rewrite Int.sub_add_opp. rewrite Int.add_permut. rewrite Int.add_assoc. replace (Int.add Int.one (Int.neg Int.one)) with Int.zero. rewrite Int.add_zero. apply Int.add_commut. - apply Int.mkint_eq. reflexivity. + rewrite Int.add_neg_zero; auto. Qed. Lemma validate_jumptable_correct: diff --git a/driver/Clflags.ml b/driver/Clflags.ml index d38a398..a433c59 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -24,6 +24,8 @@ let option_fpacked_structs = ref false let option_fsse = ref true let option_ffloatconstprop = ref 2 let option_falignfunctions = ref (None: int option) +let option_falignbranchtargets = ref 0 +let option_faligncondbranchs = ref 0 let option_finline_asm = ref false let option_dparse = ref false let option_dcmedium = ref false diff --git a/driver/Driver.ml b/driver/Driver.ml index e0ed27f..3a5981e 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -391,7 +391,9 @@ Code generation options: (use -fno-<opt> to turn off -f<opt>) : -fsmall-const <n> Set maximal size <n> for allocation in small constant area -ffloat-const-prop <n> Control constant propagation of floats (<n>=0: none, <n>=1: limited, <n>=2: full; default is full) - -falign_functions <n> Set alignment (in bytes) of function entry points + -falign-functions <n> Set alignment (in bytes) of function entry points + -falign-branch-targets <n> Set alignment (in bytes) of branch targets + -falign-cond-branches <n> Set alignment (in bytes) of conditional branches -Wa,<opt> Pass option <opt> to the assembler Tracing options: -dparse Save C file after parsing and elaboration in <file>.parse.c @@ -485,6 +487,8 @@ let cmdline_actions = "-fsmall-const$", Integer(fun n -> option_small_const := n); "-ffloat-const-prop$", Integer(fun n -> option_ffloatconstprop := n); "-falign-functions$", Integer(fun n -> option_falignfunctions := Some n); + "-falign-branch-targets", Integer(fun n -> option_falignbranchtargets := n); + "-falign-cond-branches", Integer(fun n -> option_faligncondbranchs := n); "-fall$", Self (fun _ -> List.iter (fun r -> r := true) language_support_options); "-fnone$", Self (fun _ -> diff --git a/lib/Floats.v b/lib/Floats.v index 2c23d45..63375ff 100644 --- a/lib/Floats.v +++ b/lib/Floats.v @@ -816,8 +816,9 @@ Proof. pose proof (Bminus_correct 53 1024 eq_refl eq_refl mode_NE x y) end. apply (fun f x y => f x y) in H3; try apply (fun x => proj2 (from_words_value x)). do 2 rewrite (fun x => proj1 (from_words_value x)) in H3. + rewrite Int.unsigned_zero in H3. replace (bpow radix2 52 + Z2R (Int.unsigned x) - - (bpow radix2 52 + Z2R (Int.unsigned Int.zero)))%R with (Z2R (Int.unsigned x)) in H3 by (simpl; ring). + (bpow radix2 52 + Z2R 0))%R with (Z2R (Int.unsigned x)) in H3 by (simpl; ring). rewrite round_exact in H3 by smart_omega. match goal with [H3:if Rlt_bool ?x ?y then _ else _ |- _] => pose proof (Rlt_bool_spec x y); destruct (Rlt_bool x y) end; destruct H3. diff --git a/powerpc/Asmgenproof.v b/powerpc/Asmgenproof.v index e99049c..de9decb 100644 --- a/powerpc/Asmgenproof.v +++ b/powerpc/Asmgenproof.v @@ -385,6 +385,7 @@ Remark rolm_label: Proof. intros; unfold rolm. case (is_rlw_mask mask). reflexivity. +Opaque Int.eq. simpl. autorewrite with labels. auto. Qed. Hint Rewrite rolm_label: labels. diff --git a/powerpc/Asmgenproof1.v b/powerpc/Asmgenproof1.v index f1c206e..56cb224 100644 --- a/powerpc/Asmgenproof1.v +++ b/powerpc/Asmgenproof1.v @@ -75,9 +75,11 @@ Qed. Lemma low_high_s: forall n, Int.add (Int.shl (high_s n) (Int.repr 16)) (low_s n) = n. Proof. - intros. rewrite Int.shl_mul_two_p. + intros. + rewrite Int.shl_mul_two_p. unfold high_s. rewrite <- (Int.divu_pow2 (Int.sub n (low_s n)) (Int.repr 65536) (Int.repr 16)). + 2: reflexivity. change (two_p (Int.unsigned (Int.repr 16))) with 65536. set (x := Int.sub n (low_s n)). assert (x = Int.add (Int.mul (Int.divu x (Int.repr 65536)) (Int.repr 65536)) @@ -88,9 +90,10 @@ Proof. change 0 with (0 mod 65536). change (Int.unsigned (Int.repr 65536)) with 65536. apply Int.eqmod_mod_eq. omega. - unfold x, low_s. eapply Int.eqmod_trans. - eapply Int.eqm_eqmod_two_p with (n := 16). compute; auto. - unfold Int.sub. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl. + unfold x, low_s. eapply Int.eqmod_trans. + apply Int.eqmod_divides with Int.modulus. + unfold Int.sub. apply Int.eqm_unsigned_repr_l. apply Int.eqm_refl. + exists 65536. compute; auto. replace 0 with (Int.unsigned n - Int.unsigned n) by omega. apply Int.eqmod_sub. apply Int.eqmod_refl. apply Int.eqmod_sign_ext'. compute; auto. @@ -98,7 +101,6 @@ Proof. rewrite <- H. unfold x. rewrite Int.sub_add_opp. rewrite Int.add_assoc. rewrite (Int.add_commut (Int.neg (low_s n))). rewrite <- Int.sub_add_opp. rewrite Int.sub_idem. apply Int.add_zero. - reflexivity. Qed. (** * Correspondence between Mach registers and PPC registers *) @@ -998,6 +1000,7 @@ Lemma transl_cond_correct_1: /\ forall r, is_data_reg r = true -> rs'#r = rs#r. Proof. intros. +Opaque Int.eq. destruct cond; simpl in H; TypeInv; simpl; UseTypeInfo. (* Ccomp *) fold (Val.cmp c (rs (ireg_of m0)) (rs (ireg_of m1))). @@ -1171,6 +1174,7 @@ Proof. rewrite (Int.add_commut (Int.neg (Int.add i Int.mone))). rewrite <- Int.sub_add_opp. rewrite Int.sub_add_r. rewrite Int.sub_idem. rewrite Int.add_zero_l. rewrite Int.add_neg_zero. rewrite Int.add_zero_l. +Transparent Int.eq. unfold Int.add_carry, Int.eq. rewrite Int.unsigned_zero. rewrite Int.unsigned_mone. unfold negb, Val.of_bool, Vtrue, Vfalse. @@ -1269,6 +1273,7 @@ Proof. destruct (mreg_type r1); apply exec_straight_one; auto. split. repeat SIMP. intros; repeat SIMP. (* Other instructions *) +Opaque Int.eq. destruct op; simpl; simpl in H3; injection H3; clear H3; intros; TypeInv; simpl in *; UseTypeInfo; inv EV; try (TranslOpSimpl). (* Ointconst *) diff --git a/powerpc/Op.v b/powerpc/Op.v index 58bcb2c..4a1fb62 100644 --- a/powerpc/Op.v +++ b/powerpc/Op.v @@ -1044,8 +1044,7 @@ Fixpoint is_rlw_mask_rec (n: nat) (s: rlw_state) (x: Z) {struct n} : bool := | O => rlw_accepting s | S m => - let (b, y) := Int.Z_bin_decomp x in - is_rlw_mask_rec m (rlw_transition s b) y + is_rlw_mask_rec m (rlw_transition s (Z.odd x)) (Z.div2 x) end. Definition is_rlw_mask (x: int) : bool := diff --git a/powerpc/PrintAsm.ml b/powerpc/PrintAsm.ml index 0d2d201..2ba3cfe 100644 --- a/powerpc/PrintAsm.ml +++ b/powerpc/PrintAsm.ml @@ -219,12 +219,6 @@ let rolm_mask n = assert (!count = 2 || (!count = 0 && !last)); (!mb, !me-1) -(* Base-2 log of a Caml integer *) - -let rec log2 n = - assert (n > 0); - if n = 1 then 0 else 1 + log2 (n lsr 1) - (* Built-ins. They come in three flavors: - annotation statements: take their arguments in registers or stack locations; generate no code; @@ -489,7 +483,7 @@ let short_cond_branch tbl pc lbl_dest = let float_literals : (int * int64) list ref = ref [] let jumptables : (int * label list) list ref = ref [] -let print_instruction oc tbl pc = function +let print_instruction oc tbl pc fallthrough = function | Padd(r1, r2, r3) -> fprintf oc " add %a, %a, %a\n" ireg r1 ireg r2 ireg r3 | Padde(r1, r2, r3) -> @@ -529,6 +523,8 @@ let print_instruction oc tbl pc = function | Pbctrl -> fprintf oc " bctrl\n" | Pbf(bit, lbl) -> + if !Clflags.option_faligncondbranchs > 0 then + fprintf oc " .balign %d\n" !Clflags.option_faligncondbranchs; if short_cond_branch tbl pc lbl then fprintf oc " bf %a, %a\n" crbit bit label (transl_label lbl) else begin @@ -544,6 +540,8 @@ let print_instruction oc tbl pc = function | Pblr -> fprintf oc " blr\n" | Pbt(bit, lbl) -> + if !Clflags.option_faligncondbranchs > 0 then + fprintf oc " .balign %d\n" !Clflags.option_faligncondbranchs; if short_cond_branch tbl pc lbl then fprintf oc " bt %a, %a\n" crbit bit label (transl_label lbl) else begin @@ -729,6 +727,8 @@ let print_instruction oc tbl pc = function | Pxoris(r1, r2, c) -> fprintf oc " xoris %a, %a, %a\n" ireg r1 ireg r2 constant c | Plabel lbl -> + if (not fallthrough) && !Clflags.option_falignbranchtargets > 0 then + fprintf oc " .balign %d\n" !Clflags.option_falignbranchtargets; fprintf oc "%a:\n" label (transl_label lbl) | Pbuiltin(ef, args, res) -> begin match ef with @@ -762,6 +762,15 @@ let print_instruction oc tbl pc = function assert false end +(* Determine if an instruction falls through *) + +let instr_fall_through = function + | Pb _ -> false + | Pbctr -> false + | Pbs _ -> false + | Pblr -> false + | _ -> true + (* Estimate the size of an Asm instruction encoding, in number of actual PowerPC instructions. We can over-approximate. *) @@ -782,6 +791,7 @@ let instr_size = function | EF_builtin(name, sg) -> begin match extern_atom name with | "__builtin_bswap" -> 3 + | "__builtin_fcti" -> 4 | _ -> 1 end | EF_vload chunk -> @@ -813,11 +823,11 @@ let rec label_positions tbl pc = function (* Emit a sequence of instructions *) -let rec print_instructions oc tbl pc = function +let rec print_instructions oc tbl pc fallthrough = function | [] -> () | i :: c -> - print_instruction oc tbl pc i; - print_instructions oc tbl (pc + instr_size i) c + print_instruction oc tbl pc fallthrough i; + print_instructions oc tbl (pc + instr_size i) (instr_fall_through i) c (* Print the code for a function *) @@ -842,23 +852,23 @@ let print_function oc name code = | _ -> (Section_text, Section_literal, Section_jumptable) in section oc text; let alignment = - match !Clflags.option_falignfunctions with Some n -> log2 n | None -> 2 in - fprintf oc " .align %d\n" alignment; + match !Clflags.option_falignfunctions with Some n -> n | None -> 4 in + fprintf oc " .balign %d\n" alignment; if not (C2C.atom_is_static name) then fprintf oc " .globl %a\n" symbol name; fprintf oc "%a:\n" symbol name; - print_instructions oc (label_positions PTree.empty 0 code) 0 code; + print_instructions oc (label_positions PTree.empty 0 code) 0 true code; fprintf oc " .type %a, @function\n" symbol name; fprintf oc " .size %a, . - %a\n" symbol name symbol name; if !float_literals <> [] then begin section oc lit; - fprintf oc " .align 3\n"; + fprintf oc " .balign 8\n"; List.iter (print_literal oc) !float_literals; float_literals := [] end; if !jumptables <> [] then begin section oc jmptbl; - fprintf oc " .align 2\n"; + fprintf oc " .balign 4\n"; List.iter (print_jumptable oc) !jumptables; jumptables := [] end @@ -871,7 +881,7 @@ let re_variadic_stub = Str.regexp "\\(.*\\)\\$[if]*$" let variadic_stub oc stub_name fun_name args = section oc Section_text; - fprintf oc " .align 2\n"; + fprintf oc " .balign 4\n"; fprintf oc ".L%s$stub:\n" stub_name; (* bit 6 must be set if at least one argument is a float; clear otherwise *) if List.mem Tfloat args @@ -948,13 +958,13 @@ let print_var oc name v = | _ -> Section_data true and align = match C2C.atom_alignof name with - | Some a -> log2 a - | None -> 3 in (* 8-alignment is a safe default *) + | Some a -> a + | None -> 8 in (* 8-alignment is a safe default *) let name_sec = name_of_section sec in if name_sec <> "COMM" then begin fprintf oc " %s\n" name_sec; - fprintf oc " .align %d\n" align; + fprintf oc " .balign %d\n" align; if not (C2C.atom_is_static name) then fprintf oc " .globl %a\n" symbol name; fprintf oc "%a:\n" symbol name; @@ -976,8 +986,16 @@ let print_globdef oc (name, gdef) = | Gfun f -> print_fundef oc name f | Gvar v -> print_var oc name v +let print_prologue oc = + match target with + | Linux -> + () + | Diab -> + fprintf oc " .xopt align-fill-text = 0x60000000\n" + let print_program oc p = stubbed_functions := IdentSet.empty; List.iter record_extfun p.prog_defs; + print_prologue oc; List.iter (print_globdef oc) p.prog_defs |