summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-07-27 07:35:49 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2014-07-27 07:35:49 +0000
commitf4e106d4fc1cce484678b5cdd86ab57d7a43076a (patch)
tree3de9bddc63b80fb1b695bbdb8fa5bd6aa893a13a
parent04ff02a9f4bc4f766a450e5463729102ee26882e (diff)
ARM port: add support for Thumb2. To be tested.
git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2549 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
-rw-r--r--arm/Archi.v4
-rw-r--r--arm/Asm.v169
-rw-r--r--arm/Asmgen.v251
-rw-r--r--arm/Asmgenproof.v35
-rw-r--r--arm/Asmgenproof1.v383
-rw-r--r--arm/PrintAsm.ml486
-rw-r--r--arm/SelectOp.vp12
-rw-r--r--driver/Clflags.ml1
-rw-r--r--driver/Compopts.v3
-rw-r--r--driver/Driver.ml2
-rw-r--r--extraction/extraction.v2
-rw-r--r--runtime/arm/i64_dtos.S67
-rw-r--r--runtime/arm/i64_dtou.S56
-rw-r--r--runtime/arm/i64_sar.S (renamed from runtime/arm/i64_sar.s)24
-rw-r--r--runtime/arm/i64_sdiv.S (renamed from runtime/arm/i64_smod.s)39
-rw-r--r--runtime/arm/i64_shl.S (renamed from runtime/arm/i64_shl.s)29
-rw-r--r--runtime/arm/i64_shr.S (renamed from runtime/arm/i64_shr.s)25
-rw-r--r--runtime/arm/i64_smod.S (renamed from runtime/arm/i64_sdiv.s)39
-rw-r--r--runtime/arm/i64_stod.S21
-rw-r--r--runtime/arm/i64_stof.S42
-rw-r--r--runtime/arm/i64_udiv.S (renamed from runtime/arm/i64_udiv.s)13
-rw-r--r--runtime/arm/i64_udivmod.S (renamed from runtime/arm/i64_udivmod.s)25
-rw-r--r--runtime/arm/i64_umod.S (renamed from runtime/arm/i64_umod.s)11
-rw-r--r--runtime/arm/i64_utod.S28
-rw-r--r--runtime/arm/i64_utof.S38
-rw-r--r--runtime/arm/sysdeps.h84
-rw-r--r--runtime/arm/vararg.S36
-rw-r--r--runtime/ia32/sysdeps.h68
28 files changed, 1194 insertions, 799 deletions
diff --git a/arm/Archi.v b/arm/Archi.v
index 1306459..00d9895 100644
--- a/arm/Archi.v
+++ b/arm/Archi.v
@@ -47,5 +47,9 @@ Global Opaque big_endian
default_pl_64 choose_binop_pl_64
default_pl_32 choose_binop_pl_32.
+(** Which ABI to use: either the standard ARM EABI with floats passed
+ in integer registers, or the "hardfloat" variant of the EABI
+ that uses FP registers instead. *)
+
Inductive abi_kind := Softfloat | Hardfloat.
Parameter abi: abi_kind.
diff --git a/arm/Asm.v b/arm/Asm.v
index f054db0..0790c6f 100644
--- a/arm/Asm.v
+++ b/arm/Asm.v
@@ -91,29 +91,19 @@ Notation "'RA'" := IR14 (only parsing).
reference manuals for more details. Some instructions,
described below, are pseudo-instructions: they expand to
canned instruction sequences during the printing of the assembly
- code. *)
+ code. Most instructions are common to Thumb2 and ARM classic.
+ We use a few Thumb2-specific instructions when available, and avoid
+ to use ARM classic features that are not in Thumb2. *)
Definition label := positive.
Inductive shift_op : Type :=
| SOimm: int -> shift_op
| SOreg: ireg -> shift_op
- | SOlslimm: ireg -> int -> shift_op
- | SOlslreg: ireg -> ireg -> shift_op
- | SOlsrimm: ireg -> int -> shift_op
- | SOlsrreg: ireg -> ireg -> shift_op
- | SOasrimm: ireg -> int -> shift_op
- | SOasrreg: ireg -> ireg -> shift_op
- | SOrorimm: ireg -> int -> shift_op
- | SOrorreg: ireg -> ireg -> shift_op.
-
-Inductive shift_addr : Type :=
- | SAimm: int -> shift_addr
- | SAreg: ireg -> shift_addr
- | SAlsl: ireg -> int -> shift_addr
- | SAlsr: ireg -> int -> shift_addr
- | SAasr: ireg -> int -> shift_addr
- | SAror: ireg -> int -> shift_addr.
+ | SOlsl: ireg -> int -> shift_op
+ | SOlsr: ireg -> int -> shift_op
+ | SOasr: ireg -> int -> shift_op
+ | SOror: ireg -> int -> shift_op.
Inductive testcond : Type :=
| TCeq: testcond (**r equal *)
@@ -133,6 +123,7 @@ Inductive instruction : Type :=
(* Core instructions *)
| Padd: ireg -> ireg -> shift_op -> instruction (**r integer addition *)
| Pand: ireg -> ireg -> shift_op -> instruction (**r bitwise and *)
+ | Pasr: ireg -> ireg -> ireg -> instruction (**r arithmetic shift right *)
| Pb: label -> instruction (**r branch to label *)
| Pbc: testcond -> label -> instruction (**r conditional branch to label *)
| Pbsymb: ident -> signature -> instruction (**r branch to symbol *)
@@ -142,23 +133,27 @@ Inductive instruction : Type :=
| Pbic: ireg -> ireg -> shift_op -> instruction (**r bitwise bit-clear *)
| Pcmp: ireg -> shift_op -> instruction (**r integer comparison *)
| Peor: ireg -> ireg -> shift_op -> instruction (**r bitwise exclusive or *)
- | Pldr: ireg -> ireg -> shift_addr -> instruction (**r int32 load *)
- | Pldr_a: ireg -> ireg -> shift_addr -> instruction (**r any32 load to int register *)
- | Pldrb: ireg -> ireg -> shift_addr -> instruction (**r unsigned int8 load *)
- | Pldrh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *)
- | Pldrsb: ireg -> ireg -> shift_addr -> instruction (**r signed int8 load *)
- | Pldrsh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *)
+ | Pldr: ireg -> ireg -> shift_op -> instruction (**r int32 load *)
+ | Pldr_a: ireg -> ireg -> shift_op -> instruction (**r any32 load to int register *)
+ | Pldrb: ireg -> ireg -> shift_op -> instruction (**r unsigned int8 load *)
+ | Pldrh: ireg -> ireg -> shift_op -> instruction (**r unsigned int16 load *)
+ | Pldrsb: ireg -> ireg -> shift_op -> instruction (**r signed int8 load *)
+ | Pldrsh: ireg -> ireg -> shift_op -> instruction (**r unsigned int16 load *)
+ | Plsl: ireg -> ireg -> ireg -> instruction (**r shift left *)
+ | Plsr: ireg -> ireg -> ireg -> instruction (**r logical shift right *)
| Pmla: ireg -> ireg -> ireg -> ireg -> instruction (**r integer multiply-add *)
| Pmov: ireg -> shift_op -> instruction (**r integer move *)
- | Pmovc: testcond -> ireg -> shift_op -> instruction (**r integer conditional move *)
+ | Pmovw: ireg -> int -> instruction (**r move 16-bit immediate *)
+ | Pmovt: ireg -> int -> instruction (**r set high 16 bits *)
| Pmul: ireg -> ireg -> ireg -> instruction (**r integer multiplication *)
| Pmvn: ireg -> shift_op -> instruction (**r integer complement *)
| Porr: ireg -> ireg -> shift_op -> instruction (**r bitwise or *)
| Prsb: ireg -> ireg -> shift_op -> instruction (**r integer reverse subtraction *)
- | Pstr: ireg -> ireg -> shift_addr -> instruction (**r int32 store *)
- | Pstr_a: ireg -> ireg -> shift_addr -> instruction (**r any32 store from int register *)
- | Pstrb: ireg -> ireg -> shift_addr -> instruction (**r int8 store *)
- | Pstrh: ireg -> ireg -> shift_addr -> instruction (**r int16 store *)
+ | Psbfx: ireg -> ireg -> int -> int -> instruction (**r signed bitfield extract *)
+ | Pstr: ireg -> ireg -> shift_op -> instruction (**r int32 store *)
+ | Pstr_a: ireg -> ireg -> shift_op -> instruction (**r any32 store from int register *)
+ | Pstrb: ireg -> ireg -> shift_op -> instruction (**r int8 store *)
+ | Pstrh: ireg -> ireg -> shift_op -> instruction (**r int16 store *)
| Psdiv: instruction (**r signed division *)
| Psmull: ireg -> ireg -> ireg -> ireg -> instruction (**r signed multiply long *)
| Psub: ireg -> ireg -> shift_op -> instruction (**r integer subtraction *)
@@ -206,6 +201,7 @@ Inductive instruction : Type :=
| Pfreeframe: Z -> int -> instruction (**r deallocate stack frame and restore previous frame *)
| Plabel: label -> instruction (**r define a code label *)
| Ploadsymbol: ireg -> ident -> int -> instruction (**r load the address of a symbol *)
+ | Pmovite: testcond -> ireg -> shift_op -> shift_op -> instruction (**r integer conditional move *)
| Pbtbl: ireg -> list label -> instruction (**r N-way branch through a jump table *)
| Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function *)
| Pannot: external_function -> list annot_param -> instruction (**r annotation statement *)
@@ -288,6 +284,11 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset :=
| r :: l' => undef_regs l' (rs#r <- Vundef)
end.
+(** Undefining the condition codes *)
+
+Definition undef_flags (rs: regset) : regset :=
+ fun r => match r with CR _ => Vundef | _ => rs r end.
+
(** Assigning multiple registers *)
Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset :=
@@ -347,6 +348,9 @@ Inductive outcome: Type :=
Definition nextinstr (rs: regset) :=
rs#PC <- (Val.add rs#PC Vone).
+Definition nextinstr_nf (rs: regset) :=
+ nextinstr (undef_flags rs).
+
Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) :=
match label_pos lbl 0 (fn_code f) with
| None => Stuck
@@ -363,29 +367,12 @@ Definition eval_shift_op (so: shift_op) (rs: regset) :=
match so with
| SOimm n => Vint n
| SOreg r => rs r
- | SOlslimm r n => Val.shl (rs r) (Vint n)
- | SOlslreg r r' => Val.shl (rs r) (rs r')
- | SOlsrimm r n => Val.shru (rs r) (Vint n)
- | SOlsrreg r r' => Val.shru (rs r) (rs r')
- | SOasrimm r n => Val.shr (rs r) (Vint n)
- | SOasrreg r r' => Val.shr (rs r) (rs r')
- | SOrorimm r n => Val.ror (rs r) (Vint n)
- | SOrorreg r r' => Val.ror (rs r) (rs r')
- end.
-
-(** Evaluation of [shift_addr] operands *)
-
-Definition eval_shift_addr (sa: shift_addr) (rs: regset) :=
- match sa with
- | SAimm n => Vint n
- | SAreg r => rs r
- | SAlsl r n => Val.shl (rs r) (Vint n)
- | SAlsr r n => Val.shru (rs r) (Vint n)
- | SAasr r n => Val.shr (rs r) (Vint n)
- | SAror r n => Val.ror (rs r) (Vint n)
+ | SOlsl r n => Val.shl (rs r) (Vint n)
+ | SOlsr r n => Val.shru (rs r) (Vint n)
+ | SOasr r n => Val.shr (rs r) (Vint n)
+ | SOror r n => Val.ror (rs r) (Vint n)
end.
-
(** Auxiliaries for memory accesses *)
Definition exec_load (chunk: memory_chunk) (addr: val) (r: preg)
@@ -515,22 +502,29 @@ Definition eval_testcond (c: testcond) (rs: regset) : option bool :=
(** Execution of a single instruction [i] in initial state
[rs] and [m]. Return updated state. For instructions
- that correspond to actual PowerPC instructions, the cases are
+ that correspond to actual ARM instructions, the cases are
straightforward transliterations of the informal descriptions
- given in the PowerPC reference manuals. For pseudo-instructions,
- refer to the informal descriptions given above. Note that
- we set to [Vundef] the registers used as temporaries by the
- expansions of the pseudo-instructions, so that the PPC code
- we generate cannot use those registers to hold values that
- must survive the execution of the pseudo-instruction.
+ given in the ARM reference manuals. For pseudo-instructions,
+ refer to the informal descriptions given above.
+
+ Note that we set to [Vundef] the registers used as temporaries by
+ the expansions of the pseudo-instructions, so that the ARM code we
+ generate cannot use those registers to hold values that must
+ survive the execution of the pseudo-instruction.
+
+ Likewise, for several instructions we set the condition flags
+ to [Vundef], so that we can expand them later to the S form
+ or to the non-S form, whichever is more compact in Thumb2.
*)
Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : outcome :=
match i with
| Padd r1 r2 so =>
- Next (nextinstr (rs#r1 <- (Val.add rs#r2 (eval_shift_op so rs)))) m
+ Next (nextinstr_nf (rs#r1 <- (Val.add rs#r2 (eval_shift_op so rs)))) m
| Pand r1 r2 so =>
- Next (nextinstr (rs#r1 <- (Val.and rs#r2 (eval_shift_op so rs)))) m
+ Next (nextinstr_nf (rs#r1 <- (Val.and rs#r2 (eval_shift_op so rs)))) m
+ | Pasr r1 r2 r3 =>
+ Next (nextinstr_nf (rs#r1 <- (Val.shr rs#r2 rs#r3))) m
| Pb lbl =>
goto_label f lbl rs m
| Pbc cond lbl =>
@@ -548,49 +542,52 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
| Pblreg r sg =>
Next (rs#IR14 <- (Val.add rs#PC Vone) #PC <- (rs#r)) m
| Pbic r1 r2 so =>
- Next (nextinstr (rs#r1 <- (Val.and rs#r2 (Val.notint (eval_shift_op so rs))))) m
+ Next (nextinstr_nf (rs#r1 <- (Val.and rs#r2 (Val.notint (eval_shift_op so rs))))) m
| Pcmp r1 so =>
Next (nextinstr (compare_int rs rs#r1 (eval_shift_op so rs) m)) m
| Peor r1 r2 so =>
- Next (nextinstr (rs#r1 <- (Val.xor rs#r2 (eval_shift_op so rs)))) m
+ Next (nextinstr_nf (rs#r1 <- (Val.xor rs#r2 (eval_shift_op so rs)))) m
| Pldr r1 r2 sa =>
- exec_load Mint32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_load Mint32 (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
| Pldr_a r1 r2 sa =>
- exec_load Many32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_load Many32 (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
| Pldrb r1 r2 sa =>
- exec_load Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_load Mint8unsigned (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
| Pldrh r1 r2 sa =>
- exec_load Mint16unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_load Mint16unsigned (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
| Pldrsb r1 r2 sa =>
- exec_load Mint8signed (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_load Mint8signed (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
| Pldrsh r1 r2 sa =>
- exec_load Mint16signed (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_load Mint16signed (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
+ | Plsl r1 r2 r3 =>
+ Next (nextinstr_nf (rs#r1 <- (Val.shl rs#r2 rs#r3))) m
+ | Plsr r1 r2 r3 =>
+ Next (nextinstr_nf (rs#r1 <- (Val.shru rs#r2 rs#r3))) m
| Pmla r1 r2 r3 r4 =>
Next (nextinstr (rs#r1 <- (Val.add (Val.mul rs#r2 rs#r3) rs#r4))) m
| Pmov r1 so =>
- Next (nextinstr (rs#r1 <- (eval_shift_op so rs))) m
- | Pmovc cond r1 so =>
- match eval_testcond cond rs with
- | Some true => Next (nextinstr (rs#r1 <- (eval_shift_op so rs))) m
- | Some false => Next (nextinstr rs) m
- | None => Next (nextinstr (rs#r1 <- Vundef)) m
- end
+ Next (nextinstr_nf (rs#r1 <- (eval_shift_op so rs))) m
+ | Pmovw r n =>
+ Next (nextinstr (rs#r <- (Vint n))) m
+ | Pmovt r n =>
+ Next (nextinstr (rs#r <- (Val.or (Val.and rs#r (Vint (Int.repr 65535)))
+ (Vint (Int.shl n (Int.repr 16)))))) m
| Pmul r1 r2 r3 =>
Next (nextinstr (rs#r1 <- (Val.mul rs#r2 rs#r3))) m
| Pmvn r1 so =>
- Next (nextinstr (rs#r1 <- (Val.notint (eval_shift_op so rs)))) m
+ Next (nextinstr_nf (rs#r1 <- (Val.notint (eval_shift_op so rs)))) m
| Porr r1 r2 so =>
- Next (nextinstr (rs#r1 <- (Val.or rs#r2 (eval_shift_op so rs)))) m
+ Next (nextinstr_nf (rs#r1 <- (Val.or rs#r2 (eval_shift_op so rs)))) m
| Prsb r1 r2 so =>
- Next (nextinstr (rs#r1 <- (Val.sub (eval_shift_op so rs) rs#r2))) m
+ Next (nextinstr_nf (rs#r1 <- (Val.sub (eval_shift_op so rs) rs#r2))) m
| Pstr r1 r2 sa =>
- exec_store Mint32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_store Mint32 (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
| Pstr_a r1 r2 sa =>
- exec_store Many32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_store Many32 (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
| Pstrb r1 r2 sa =>
- exec_store Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_store Mint8unsigned (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
| Pstrh r1 r2 sa =>
- exec_store Mint16unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m
+ exec_store Mint16unsigned (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m
| Psdiv =>
match Val.divs rs#IR0 rs#IR1 with
| Some v => Next (nextinstr (rs#IR0 <- v
@@ -598,11 +595,13 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
#IR3 <- Vundef #IR12 <- Vundef)) m
| None => Stuck
end
+ | Psbfx r1 r2 lsb sz =>
+ Next (nextinstr (rs#r1 <- (Val.sign_ext (Int.unsigned sz) (Val.shru rs#r2 (Vint lsb))))) m
| Psmull rdl rdh r1 r2 =>
Next (nextinstr (rs#rdl <- (Val.mul rs#r1 rs#r2)
#rdh <- (Val.mulhs rs#r1 rs#r2))) m
| Psub r1 r2 so =>
- Next (nextinstr (rs#r1 <- (Val.sub rs#r2 (eval_shift_op so rs)))) m
+ Next (nextinstr_nf (rs#r1 <- (Val.sub rs#r2 (eval_shift_op so rs)))) m
| Pudiv =>
match Val.divu rs#IR0 rs#IR1 with
| Some v => Next (nextinstr (rs#IR0 <- v
@@ -709,6 +708,14 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out
Next (nextinstr rs) m
| Ploadsymbol r1 lbl ofs =>
Next (nextinstr (rs#r1 <- (Genv.symbol_address ge lbl ofs))) m
+ | Pmovite cond r1 ifso ifnot =>
+ let v :=
+ match eval_testcond cond rs with
+ | Some true => eval_shift_op ifso rs
+ | Some false => eval_shift_op ifnot rs
+ | None => Vundef
+ end in
+ Next (nextinstr (rs#r1 <- v)) m
| Pbtbl r tbl =>
match rs#r with
| Vint n =>
diff --git a/arm/Asmgen.v b/arm/Asmgen.v
index fa4faa6..de4b87f 100644
--- a/arm/Asmgen.v
+++ b/arm/Asmgen.v
@@ -21,6 +21,7 @@ Require Import Op.
Require Import Locations.
Require Import Mach.
Require Import Asm.
+Require Import Compopts.
Open Local Scope string_scope.
Open Local Scope error_monad_scope.
@@ -33,62 +34,120 @@ Definition ireg_of (r: mreg) : res ireg :=
Definition freg_of (r: mreg) : res freg :=
match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgen.freg_of") end.
-(** Recognition of integer immediate arguments.
-- For arithmetic operations, immediates are
- 8-bit quantities zero-extended and rotated right by 0, 2, 4, ... 30 bits.
-- For memory accesses of type [Mint32], immediate offsets are
- 12-bit quantities plus a sign bit.
-- For other memory accesses, immediate offsets are
- 8-bit quantities plus a sign bit. *)
-
-Fixpoint is_immed_arith_aux (n: nat) (x msk: int) {struct n}: bool :=
+(** Recognition of integer immediate arguments for arithmetic operations.
+- ARM: immediates are 8-bit quantities zero-extended and rotated right
+ by 0, 2, 4, ... 30 bits. In other words, [n] is an immediate iff
+ [rotate-left(n, p)] is between 0 and 255 for some [p = 0, 2, 4, ..., 30].
+- Thumb: immediates are 8-bit quantities zero-extended and shifted left
+ by 0, 1, ..., 31 bits. In other words, [n] is an immediate if
+ all bits are 0 except a run of 8 adjacent bits. In addition,
+ [00XY00XY] and [XY00XY00] and [XYXYXYXY] are immediates for
+ a given [XY] 8-bit constant.
+*)
+
+Fixpoint is_immed_arith_arm (n: nat) (x: int) {struct n}: bool :=
match n with
| Datatypes.O => false
- | Datatypes.S n' =>
- Int.eq (Int.and x (Int.not msk)) Int.zero ||
- is_immed_arith_aux n' x (Int.ror msk (Int.repr 2))
+ | Datatypes.S n =>
+ Int.eq x (Int.and x (Int.repr 255)) ||
+ is_immed_arith_arm n (Int.rol x (Int.repr 2))
+ end.
+
+Fixpoint is_immed_arith_thumb (n: nat) (x: int) {struct n}: bool :=
+ match n with
+ | Datatypes.O => true
+ | Datatypes.S n =>
+ Int.eq x (Int.and x (Int.repr 255)) ||
+ (Int.eq (Int.and x Int.one) Int.zero
+ && is_immed_arith_thumb n (Int.shru x Int.one))
end.
-Definition is_immed_arith (x: int) : bool :=
- is_immed_arith_aux 16%nat x (Int.repr 255).
-
-Definition is_immed_mem_word (x: int) : bool :=
- Int.lt x (Int.repr 4096) && Int.lt (Int.repr (-4096)) x.
-
-Definition mk_immed_mem_word (x: int) : int :=
- Int.sign_ext 13 x.
-
-Definition is_immed_mem_small (x: int) : bool :=
- Int.lt x (Int.repr 256) && Int.lt (Int.repr (-256)) x.
-
-Definition mk_immed_mem_small (x: int) : int :=
- Int.sign_ext 9 x.
-
-Definition is_immed_mem_float (x: int) : bool :=
- Int.eq (Int.and x (Int.repr 3)) Int.zero
- && Int.lt x (Int.repr 1024) && Int.lt (Int.repr (-1024)) x.
-
-Definition mk_immed_mem_float (x: int) : int :=
- Int.and (Int.sign_ext 11 x) (Int.repr 4294967288). (**r 0xfffffff8 *)
-
+Definition is_immed_arith_thumb_special (x: int): bool :=
+ let l1 := Int.and x (Int.repr 255) in
+ let l2 := Int.shl l1 (Int.repr 8) in
+ let l3 := Int.shl l2 (Int.repr 8) in
+ let l4 := Int.shl l3 (Int.repr 8) in
+ let l13 := Int.or l1 l3 in
+ let l24 := Int.or l2 l4 in
+ Int.eq x l13 || Int.eq x l24 || Int.eq x (Int.or l13 l24).
+
+Definition is_immed_arith (x: int): bool :=
+ if thumb tt
+ then is_immed_arith_thumb 24%nat x || is_immed_arith_thumb_special x
+ else is_immed_arith_arm 16%nat x.
+
+(** Recognition of integer immediate arguments for indexed memory accesses.
+- For 32-bit integers, immediate offsets are [(-2^12,2^12)] for ARM classic
+ and [(-2^8,2^12)] for Thumb2.
+- For 8- and 16-bit integers, immediate offsets are [(-2^8,2^8)].
+- For 32- and 64-bit integers, immediate offsets are multiples of 4
+ in [(-2^10,2^10)].
+
+For all 3 kinds of accesses, we provide not a recognizer but a synthesizer:
+a function taking an arbitrary offset [n] and returning a valid offset [n']
+that contains as many useful bits of [n] as possible, so that the
+computation of the remainder [n - n'] is as simple as possible.
+In particular, if [n] is a representable immediate argument, we should have
+[n' = n].
+*)
+
+Definition mk_immed_mem_word (x: int): int :=
+ if Int.ltu x Int.zero then
+ Int.neg (Int.zero_ext (if thumb tt then 8 else 12) (Int.neg x))
+ else
+ Int.zero_ext 12 x.
+
+Definition mk_immed_mem_small (x: int): int :=
+ if Int.ltu x Int.zero then
+ Int.neg (Int.zero_ext 8 (Int.neg x))
+ else
+ Int.zero_ext 8 x.
+
+Definition mk_immed_mem_float (x: int): int :=
+ let x := Int.and x (Int.repr (-4)) in (**r mask low 2 bits off *)
+ if Int.ltu x Int.zero then
+ Int.neg (Int.zero_ext 10 (Int.neg x))
+ else
+ Int.zero_ext 10 x.
+
(** Decomposition of a 32-bit integer into a list of immediate arguments,
whose sum or "or" or "xor" equals the integer. *)
-Fixpoint decompose_int_rec (N: nat) (n p: int) : list int :=
+Fixpoint decompose_int_arm (N: nat) (n p: int) : list int :=
match N with
| Datatypes.O =>
if Int.eq n Int.zero then nil else n :: nil
| Datatypes.S M =>
if Int.eq (Int.and n (Int.shl (Int.repr 3) p)) Int.zero then
- decompose_int_rec M n (Int.add p (Int.repr 2))
+ decompose_int_arm M n (Int.add p (Int.repr 2))
+ else
+ let m := Int.shl (Int.repr 255) p in
+ Int.and n m ::
+ decompose_int_arm M (Int.and n (Int.not m)) (Int.add p (Int.repr 2))
+ end.
+
+Fixpoint decompose_int_thumb (N: nat) (n p: int) : list int :=
+ match N with
+ | Datatypes.O =>
+ if Int.eq n Int.zero then nil else n :: nil
+ | Datatypes.S M =>
+ if Int.eq (Int.and n (Int.shl Int.one p)) Int.zero then
+ decompose_int_thumb M n (Int.add p Int.one)
else
let m := Int.shl (Int.repr 255) p in
Int.and n m ::
- decompose_int_rec M (Int.and n (Int.not m)) (Int.add p (Int.repr 2))
+ decompose_int_thumb M (Int.and n (Int.not m)) (Int.add p Int.one)
end.
+Definition decompose_int_base (n: int): list int :=
+ if thumb tt
+ then if is_immed_arith_thumb_special n
+ then n :: nil
+ else decompose_int_thumb 24%nat n Int.zero
+ else decompose_int_arm 12%nat n Int.zero.
+
Definition decompose_int (n: int) : list int :=
- match decompose_int_rec 12%nat n Int.zero with
+ match decompose_int_base n with
| nil => Int.zero :: nil
| l => l
end.
@@ -103,28 +162,46 @@ Definition iterate_op (op1 op2: shift_op -> instruction) (l: list int) (k: code)
(** Smart constructors for integer immediate arguments. *)
+Definition loadimm_thumb (r: ireg) (n: int) (k: code) :=
+ let hi := Int.shru n (Int.repr 16) in
+ if Int.eq hi Int.zero
+ then Pmovw r n :: k
+ else Pmovw r (Int.zero_ext 16 n) :: Pmovt r hi :: k.
+
Definition loadimm (r: ireg) (n: int) (k: code) :=
let d1 := decompose_int n in
let d2 := decompose_int (Int.not n) in
- if NPeano.leb (List.length d1) (List.length d2)
- then iterate_op (Pmov r) (Porr r r) d1 k
- else iterate_op (Pmvn r) (Pbic r r) d2 k.
+ let l1 := List.length d1 in
+ let l2 := List.length d2 in
+ if NPeano.leb l1 1%nat then
+ Pmov r (SOimm n) :: k
+ else if NPeano.leb l2 1%nat then
+ Pmvn r (SOimm (Int.not n)) :: k
+ else if thumb tt then
+ loadimm_thumb r n k
+ else if NPeano.leb l1 l2 then
+ iterate_op (Pmov r) (Porr r r) d1 k
+ else
+ iterate_op (Pmvn r) (Pbic r r) d2 k.
Definition addimm (r1 r2: ireg) (n: int) (k: code) :=
- let d1 := decompose_int n in
- let d2 := decompose_int (Int.neg n) in
- if NPeano.leb (List.length d1) (List.length d2)
- then iterate_op (Padd r1 r2) (Padd r1 r1) d1 k
- else iterate_op (Psub r1 r2) (Psub r1 r1) d2 k.
+ if Int.ltu (Int.repr (-256)) n then
+ Psub r1 r2 (SOimm (Int.neg n)) :: k
+ else
+ (let d1 := decompose_int n in
+ let d2 := decompose_int (Int.neg n) in
+ if NPeano.leb (List.length d1) (List.length d2)
+ then iterate_op (Padd r1 r2) (Padd r1 r1) d1 k
+ else iterate_op (Psub r1 r2) (Psub r1 r1) d2 k).
+
+Definition rsubimm (r1 r2: ireg) (n: int) (k: code) :=
+ iterate_op (Prsb r1 r2) (Padd r1 r1) (decompose_int n) k.
Definition andimm (r1 r2: ireg) (n: int) (k: code) :=
if is_immed_arith n
then Pand r1 r2 (SOimm n) :: k
else iterate_op (Pbic r1 r2) (Pbic r1 r1) (decompose_int (Int.not n)) k.
-Definition rsubimm (r1 r2: ireg) (n: int) (k: code) :=
- iterate_op (Prsb r1 r2) (Padd r1 r1) (decompose_int n) k.
-
Definition orimm (r1 r2: ireg) (n: int) (k: code) :=
iterate_op (Porr r1 r2) (Porr r1 r1) (decompose_int n) k.
@@ -135,10 +212,10 @@ Definition xorimm (r1 r2: ireg) (n: int) (k: code) :=
Definition transl_shift (s: shift) (r: ireg) : shift_op :=
match s with
- | Slsl n => SOlslimm r (s_amount n)
- | Slsr n => SOlsrimm r (s_amount n)
- | Sasr n => SOasrimm r (s_amount n)
- | Sror n => SOrorimm r (s_amount n)
+ | Slsl n => SOlsl r (s_amount n)
+ | Slsr n => SOlsr r (s_amount n)
+ | Sasr n => SOasr r (s_amount n)
+ | Sror n => SOror r (s_amount n)
end.
(** Translation of a condition. Prepends to [k] the instructions
@@ -288,12 +365,18 @@ Definition transl_op
OK (addimm r IR13 n k)
| Ocast8signed, a1 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1;
- OK (Pmov r (SOlslimm r1 (Int.repr 24)) ::
- Pmov r (SOasrimm r (Int.repr 24)) :: k)
+ OK (if thumb tt then
+ Psbfx r r1 Int.zero (Int.repr 8) :: k
+ else
+ Pmov r (SOlsl r1 (Int.repr 24)) ::
+ Pmov r (SOasr r (Int.repr 24)) :: k)
| Ocast16signed, a1 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1;
- OK (Pmov r (SOlslimm r1 (Int.repr 16)) ::
- Pmov r (SOasrimm r (Int.repr 16)) :: k)
+ OK (if thumb tt then
+ Psbfx r r1 Int.zero (Int.repr 16) :: k
+ else
+ Pmov r (SOlsl r1 (Int.repr 16)) ::
+ Pmov r (SOasr r (Int.repr 16)) :: k)
| Oadd, a1 :: a2 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
OK (Padd r r1 (SOreg r2) :: k)
@@ -317,15 +400,11 @@ Definition transl_op
OK (rsubimm r r1 n k)
| Omul, a1 :: a2 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (if negb (ireg_eq r r1) then Pmul r r1 r2 :: k
- else if negb (ireg_eq r r2) then Pmul r r2 r1 :: k
- else Pmul IR14 r1 r2 :: Pmov r (SOreg IR14) :: k)
+ OK (Pmul r r1 r2 :: k)
| Omla, a1 :: a2 :: a3 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1;
do r2 <- ireg_of a2; do r3 <- ireg_of a3;
- OK (if negb (ireg_eq r r1) then Pmla r r1 r2 r3 :: k
- else if negb (ireg_eq r r2) then Pmla r r2 r1 r3 :: k
- else Pmla IR14 r1 r2 r3 :: Pmov r (SOreg IR14) :: k)
+ OK (Pmla r r1 r2 r3 :: k)
| Omulhs, a1 :: a2 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
OK (Psmull IR14 r r1 r2 :: k)
@@ -383,13 +462,13 @@ Definition transl_op
OK (Pmvn r (transl_shift s r1) :: k)
| Oshl, a1 :: a2 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pmov r (SOlslreg r1 r2) :: k)
+ OK (Plsl r r1 r2 :: k)
| Oshr, a1 :: a2 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pmov r (SOasrreg r1 r2) :: k)
+ OK (Pasr r r1 r2 :: k)
| Oshru, a1 :: a2 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (Pmov r (SOlsrreg r1 r2) :: k)
+ OK (Plsr r r1 r2 :: k)
| Oshift s, a1 :: nil =>
do r <- ireg_of res; do r1 <- ireg_of a1;
OK (Pmov r (transl_shift s r1) :: k)
@@ -398,9 +477,9 @@ Definition transl_op
if Int.eq n Int.zero then
OK (Pmov r (SOreg r1) :: k)
else
- OK (Pmov IR14 (SOasrimm r1 (Int.repr 31)) ::
- Padd IR14 r1 (SOlsrimm IR14 (Int.sub Int.iwordsize n)) ::
- Pmov r (SOasrimm IR14 n) :: k)
+ OK (Pmov IR14 (SOasr r1 (Int.repr 31)) ::
+ Padd IR14 r1 (SOlsr IR14 (Int.sub Int.iwordsize n)) ::
+ Pmov r (SOasr IR14 n) :: k)
| Onegf, a1 :: nil =>
do r <- freg_of res; do r1 <- freg_of a1;
OK (Pfnegd r r1 :: k)
@@ -470,9 +549,7 @@ Definition transl_op
| Ocmp cmp, _ =>
do r <- ireg_of res;
transl_cond cmp args
- (Pmov r (SOimm Int.zero) ::
- Pmovc (cond_for_cond cmp) r (SOimm Int.one) ::
- k)
+ (Pmovite (cond_for_cond cmp) r (SOimm Int.one) (SOimm Int.zero) :: k)
| _, _ =>
Error(msg "Asmgen.transl_op")
end.
@@ -489,14 +566,14 @@ Definition indexed_memory_access
else addimm IR14 base (Int.sub n n1) (mk_instr IR14 n1 :: k).
Definition loadind_int (base: ireg) (ofs: int) (dst: ireg) (k: code) :=
- indexed_memory_access (fun base n => Pldr dst base (SAimm n)) mk_immed_mem_word base ofs k.
+ indexed_memory_access (fun base n => Pldr dst base (SOimm n)) mk_immed_mem_word base ofs k.
Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
match ty, preg_of dst with
| Tint, IR r =>
- OK (indexed_memory_access (fun base n => Pldr r base (SAimm n)) mk_immed_mem_word base ofs k)
+ OK (indexed_memory_access (fun base n => Pldr r base (SOimm n)) mk_immed_mem_word base ofs k)
| Tany32, IR r =>
- OK (indexed_memory_access (fun base n => Pldr_a r base (SAimm n)) mk_immed_mem_word base ofs k)
+ OK (indexed_memory_access (fun base n => Pldr_a r base (SOimm n)) mk_immed_mem_word base ofs k)
| Tsingle, FR r =>
OK (indexed_memory_access (Pflds r) mk_immed_mem_float base ofs k)
| Tfloat, FR r =>
@@ -510,9 +587,9 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) :=
Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
match ty, preg_of src with
| Tint, IR r =>
- OK (indexed_memory_access (fun base n => Pstr r base (SAimm n)) mk_immed_mem_word base ofs k)
+ OK (indexed_memory_access (fun base n => Pstr r base (SOimm n)) mk_immed_mem_word base ofs k)
| Tany32, IR r =>
- OK (indexed_memory_access (fun base n => Pstr_a r base (SAimm n)) mk_immed_mem_word base ofs k)
+ OK (indexed_memory_access (fun base n => Pstr_a r base (SOimm n)) mk_immed_mem_word base ofs k)
| Tsingle, FR r =>
OK (indexed_memory_access (Pfsts r) mk_immed_mem_float base ofs k)
| Tfloat, FR r =>
@@ -525,17 +602,9 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) :=
(** Translation of memory accesses *)
-Definition transl_shift_addr (s: shift) (r: ireg) : shift_addr :=
- match s with
- | Slsl n => SAlsl r (s_amount n)
- | Slsr n => SAlsr r (s_amount n)
- | Sasr n => SAasr r (s_amount n)
- | Sror n => SAror r (s_amount n)
- end.
-
Definition transl_memory_access
(mk_instr_imm: ireg -> int -> instruction)
- (mk_instr_gen: option (ireg -> shift_addr -> instruction))
+ (mk_instr_gen: option (ireg -> shift_op -> instruction))
(mk_immed: int -> int)
(addr: addressing) (args: list mreg) (k: code) :=
match addr, args with
@@ -546,7 +615,7 @@ Definition transl_memory_access
match mk_instr_gen with
| Some f =>
do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (f r1 (SAreg r2) :: k)
+ OK (f r1 (SOreg r2) :: k)
| None =>
Error (msg "Asmgen.Aindexed2")
end
@@ -554,7 +623,7 @@ Definition transl_memory_access
match mk_instr_gen with
| Some f =>
do r1 <- ireg_of a1; do r2 <- ireg_of a2;
- OK (f r1 (transl_shift_addr s r2) :: k)
+ OK (f r1 (transl_shift s r2) :: k)
| None =>
Error (msg "Asmgen.Aindexed2shift")
end
@@ -565,12 +634,12 @@ Definition transl_memory_access
end.
Definition transl_memory_access_int
- (mk_instr: ireg -> ireg -> shift_addr -> instruction)
+ (mk_instr: ireg -> ireg -> shift_op -> instruction)
(mk_immed: int -> int)
(dst: mreg) (addr: addressing) (args: list mreg) (k: code) :=
do rd <- ireg_of dst;
transl_memory_access
- (fun r n => mk_instr rd r (SAimm n))
+ (fun r n => mk_instr rd r (SOimm n))
(Some (mk_instr rd))
mk_immed addr args k.
@@ -729,7 +798,7 @@ Definition transl_function (f: Mach.function) :=
do c <- transl_code f f.(Mach.fn_code) true;
OK (mkfunction f.(Mach.fn_sig)
(Pallocframe f.(fn_stacksize) f.(fn_link_ofs) ::
- Pstr IR14 IR13 (SAimm f.(fn_retaddr_ofs)) :: c)).
+ Pstr IR14 IR13 (SOimm f.(fn_retaddr_ofs)) :: c)).
Definition transf_function (f: Mach.function) : res Asm.function :=
do tf <- transl_function f;
diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v
index 341f6a0..713e012 100644
--- a/arm/Asmgenproof.v
+++ b/arm/Asmgenproof.v
@@ -27,6 +27,7 @@ Require Import Op.
Require Import Locations.
Require Import Conventions.
Require Import Mach.
+Require Import Compopts.
Require Import Asm.
Require Import Asmgen.
Require Import Asmgenproof0.
@@ -179,8 +180,13 @@ Remark loadimm_label:
forall r n k, tail_nolabel k (loadimm r n k).
Proof.
intros. unfold loadimm.
- destruct (NPeano.leb (length (decompose_int n)) (length (decompose_int (Int.not n))));
- auto with labels.
+ set (l1 := length (decompose_int n)).
+ set (l2 := length (decompose_int (Int.not n))).
+ destruct (NPeano.leb l1 1%nat). TailNoLabel.
+ destruct (NPeano.leb l2 1%nat). TailNoLabel.
+ destruct (thumb tt). unfold loadimm_thumb.
+ destruct (Int.eq (Int.shru n (Int.repr 16)) Int.zero); TailNoLabel.
+ destruct (NPeano.leb l1 l2); auto with labels.
Qed.
Hint Resolve loadimm_label: labels.
@@ -188,6 +194,7 @@ Remark addimm_label:
forall r1 r2 n k, tail_nolabel k (addimm r1 r2 n k).
Proof.
intros; unfold addimm.
+ destruct (Int.ltu (Int.repr (-256)) n). TailNoLabel.
destruct (NPeano.leb (length (decompose_int n)) (length (decompose_int (Int.neg n))));
auto with labels.
Qed.
@@ -262,15 +269,15 @@ Proof.
Opaque Int.eq.
unfold transl_op; intros; destruct op; TailNoLabel.
destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel.
- destruct (negb (ireg_eq x x0)). TailNoLabel. destruct (negb (ireg_eq x x1)); TailNoLabel.
- destruct (negb (ireg_eq x x0)). TailNoLabel. destruct (negb (ireg_eq x x1)); TailNoLabel.
+ destruct (thumb tt); TailNoLabel.
+ destruct (thumb tt); TailNoLabel.
eapply tail_nolabel_trans; TailNoLabel.
eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel.
Qed.
Remark transl_memory_access_label:
forall (mk_instr_imm: ireg -> int -> instruction)
- (mk_instr_gen: option (ireg -> shift_addr -> instruction))
+ (mk_instr_gen: option (ireg -> shift_op -> instruction))
(mk_immed: int -> int)
(addr: addressing) (args: list mreg) c k,
transl_memory_access mk_instr_imm mk_instr_gen mk_immed addr args k = OK c ->
@@ -556,8 +563,6 @@ Proof.
exists rs'; split. eauto.
split. eapply agree_undef_regs; eauto with asmgen.
simpl; intros. rewrite Q; auto with asmgen.
-Local Transparent destroyed_by_setstack.
- destruct ty; simpl; auto with asmgen.
- (* Mgetparam *)
assert (f0 = f) by congruence; subst f0.
@@ -687,7 +692,7 @@ Opaque loadind.
k rs2 m2'
/\ rs2#SP = parent_sp s
/\ rs2#RA = parent_ra s
- /\ forall r, r <> PC -> r <> SP -> r <> IR14 -> rs2#r = rs0#r).
+ /\ forall r, if_preg r = true -> r <> SP -> r <> IR14 -> rs2#r = rs0#r).
{
intros.
exploit loadind_int_correct. eexact C. intros [rs1 [P [Q R]]].
@@ -695,9 +700,7 @@ Opaque loadind.
eapply exec_straight_trans. eexact P. apply exec_straight_one.
simpl. rewrite R; auto with asmgen. unfold chunk_of_type in A. rewrite A.
rewrite <- (sp_val _ _ _ AG). rewrite E. eauto. auto.
- split. Simpl.
- split. Simpl.
- intros. Simpl.
+ split. Simpl. split. Simpl. intros. Simpl.
}
destruct ros as [rf|fid]; simpl in H; monadInv H7.
+ (* Indirect call *)
@@ -794,19 +797,19 @@ Opaque loadind.
left; eapply exec_straight_steps_goto; eauto.
intros. simpl in TR.
destruct (transl_cond_correct tge tf cond args _ rs0 m' _ TR) as [rs' [A [B C]]].
- rewrite EC in B.
+ rewrite EC in B. destruct B as [Bpos Bneg].
econstructor; econstructor; econstructor; split. eexact A.
split. eapply agree_undef_regs; eauto with asmgen.
- simpl. rewrite B. reflexivity.
+ simpl. rewrite Bpos. reflexivity.
- (* Mcond false *)
exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC.
left; eapply exec_straight_steps; eauto. intros. simpl in TR.
destruct (transl_cond_correct tge tf cond args _ rs0 m' _ TR) as [rs' [A [B C]]].
- rewrite EC in B.
+ rewrite EC in B. destruct B as [Bpos Bneg].
econstructor; split.
eapply exec_straight_trans. eexact A.
- apply exec_straight_one. simpl. rewrite B. reflexivity. auto.
+ apply exec_straight_one. simpl. rewrite Bpos. reflexivity. auto.
split. eapply agree_undef_regs; eauto with asmgen.
intros; Simpl.
simpl. congruence.
@@ -849,7 +852,7 @@ Opaque loadind.
k rs2 m2'
/\ rs2#SP = parent_sp s
/\ rs2#RA = parent_ra s
- /\ forall r, r <> PC -> r <> SP -> r <> IR14 -> rs2#r = rs0#r).
+ /\ forall r, if_preg r = true -> r <> SP -> r <> IR14 -> rs2#r = rs0#r).
{
intros.
exploit loadind_int_correct. eexact C. intros [rs1 [P [Q R]]].
diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v
index a0d6752..c859434 100644
--- a/arm/Asmgenproof1.v
+++ b/arm/Asmgenproof1.v
@@ -24,6 +24,7 @@ Require Import Globalenvs.
Require Import Op.
Require Import Locations.
Require Import Mach.
+Require Import Compopts.
Require Import Asm.
Require Import Asmgen.
Require Import Conventions.
@@ -45,13 +46,55 @@ Proof.
Qed.
Hint Resolve ireg_of_not_R14': asmgen.
+(** [undef_flags] and [nextinstr_nf] *)
+
+Lemma nextinstr_nf_pc:
+ forall rs, (nextinstr_nf rs)#PC = Val.add rs#PC Vone.
+Proof.
+ intros. reflexivity.
+Qed.
+
+Definition if_preg (r: preg) : bool :=
+ match r with
+ | IR _ => true
+ | FR _ => true
+ | CR _ => false
+ | PC => false
+ end.
+
+Lemma data_if_preg: forall r, data_preg r = true -> if_preg r = true.
+Proof.
+ intros. destruct r; reflexivity || discriminate.
+Qed.
+
+Lemma if_preg_not_PC: forall r, if_preg r = true -> r <> PC.
+Proof.
+ intros; red; intros; subst; discriminate.
+Qed.
+
+Hint Resolve data_if_preg if_preg_not_PC: asmgen.
+
+Lemma nextinstr_nf_inv:
+ forall r rs, if_preg r = true -> (nextinstr_nf rs)#r = rs#r.
+Proof.
+ intros. destruct r; reflexivity || discriminate.
+Qed.
+
+Lemma nextinstr_nf_inv1:
+ forall r rs, data_preg r = true -> (nextinstr_nf rs)#r = rs#r.
+Proof.
+ intros. destruct r; reflexivity || discriminate.
+Qed.
+
(** Useful simplification tactic *)
Ltac Simplif :=
((rewrite nextinstr_inv by eauto with asmgen)
|| (rewrite nextinstr_inv1 by eauto with asmgen)
+ || (rewrite nextinstr_nf_inv by eauto with asmgen)
|| (rewrite Pregmap.gss)
|| (rewrite nextinstr_pc)
+ || (rewrite nextinstr_nf_pc)
|| (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen.
Ltac Simpl := repeat Simplif.
@@ -65,8 +108,8 @@ Variable fn: function.
(** Decomposition of an integer constant *)
-Lemma decompose_int_rec_or:
- forall N n p x, List.fold_left Int.or (decompose_int_rec N n p) x = Int.or x n.
+Lemma decompose_int_arm_or:
+ forall N n p x, List.fold_left Int.or (decompose_int_arm N n p) x = Int.or x n.
Proof.
induction N; intros; simpl.
predSpec Int.eq Int.eq_spec n Int.zero; simpl.
@@ -78,8 +121,8 @@ Proof.
rewrite Int.or_not_self. apply Int.and_mone.
Qed.
-Lemma decompose_int_rec_xor:
- forall N n p x, List.fold_left Int.xor (decompose_int_rec N n p) x = Int.xor x n.
+Lemma decompose_int_arm_xor:
+ forall N n p x, List.fold_left Int.xor (decompose_int_arm N n p) x = Int.xor x n.
Proof.
induction N; intros; simpl.
predSpec Int.eq Int.eq_spec n Int.zero; simpl.
@@ -91,8 +134,8 @@ Proof.
rewrite Int.xor_not_self. apply Int.and_mone.
Qed.
-Lemma decompose_int_rec_add:
- forall N n p x, List.fold_left Int.add (decompose_int_rec N n p) x = Int.add x n.
+Lemma decompose_int_arm_add:
+ forall N n p x, List.fold_left Int.add (decompose_int_arm N n p) x = Int.add x n.
Proof.
induction N; intros; simpl.
predSpec Int.eq Int.eq_spec n Int.zero; simpl.
@@ -104,10 +147,56 @@ Proof.
rewrite Int.or_not_self. apply Int.and_mone. apply Int.and_not_self.
Qed.
-Remark decompose_int_rec_nil:
- forall N n p, decompose_int_rec N n p = nil -> n = Int.zero.
+Remark decompose_int_arm_nil:
+ forall N n p, decompose_int_arm N n p = nil -> n = Int.zero.
Proof.
- intros. generalize (decompose_int_rec_or N n p Int.zero). rewrite H. simpl.
+ intros. generalize (decompose_int_arm_or N n p Int.zero). rewrite H. simpl.
+ rewrite Int.or_commut; rewrite Int.or_zero; auto.
+Qed.
+
+Lemma decompose_int_thumb_or:
+ forall N n p x, List.fold_left Int.or (decompose_int_thumb N n p) x = Int.or x n.
+Proof.
+ induction N; intros; simpl.
+ predSpec Int.eq Int.eq_spec n Int.zero; simpl.
+ subst n. rewrite Int.or_zero. auto.
+ auto.
+ predSpec Int.eq Int.eq_spec (Int.and n (Int.shl Int.one p)) Int.zero.
+ auto.
+ simpl. rewrite IHN. rewrite Int.or_assoc. decEq. rewrite <- Int.and_or_distrib.
+ rewrite Int.or_not_self. apply Int.and_mone.
+Qed.
+
+Lemma decompose_int_thumb_xor:
+ forall N n p x, List.fold_left Int.xor (decompose_int_thumb N n p) x = Int.xor x n.
+Proof.
+ induction N; intros; simpl.
+ predSpec Int.eq Int.eq_spec n Int.zero; simpl.
+ subst n. rewrite Int.xor_zero. auto.
+ auto.
+ predSpec Int.eq Int.eq_spec (Int.and n (Int.shl Int.one p)) Int.zero.
+ auto.
+ simpl. rewrite IHN. rewrite Int.xor_assoc. decEq. rewrite <- Int.and_xor_distrib.
+ rewrite Int.xor_not_self. apply Int.and_mone.
+Qed.
+
+Lemma decompose_int_thumb_add:
+ forall N n p x, List.fold_left Int.add (decompose_int_thumb N n p) x = Int.add x n.
+Proof.
+ induction N; intros; simpl.
+ predSpec Int.eq Int.eq_spec n Int.zero; simpl.
+ subst n. rewrite Int.add_zero. auto.
+ auto.
+ predSpec Int.eq Int.eq_spec (Int.and n (Int.shl Int.one p)) Int.zero.
+ auto.
+ simpl. rewrite IHN. rewrite Int.add_assoc. decEq. rewrite Int.add_and.
+ rewrite Int.or_not_self. apply Int.and_mone. apply Int.and_not_self.
+Qed.
+
+Remark decompose_int_thumb_nil:
+ forall N n p, decompose_int_thumb N n p = nil -> n = Int.zero.
+Proof.
+ intros. generalize (decompose_int_thumb_or N n p Int.zero). rewrite H. simpl.
rewrite Int.or_commut; rewrite Int.or_zero; auto.
Qed.
@@ -116,23 +205,31 @@ Lemma decompose_int_general:
(forall v1 n2 n3, f (f v1 n2) n3 = f v1 (g n2 n3)) ->
(forall n1 n2 n3, g (g n1 n2) n3 = g n1 (g n2 n3)) ->
(forall n, g Int.zero n = n) ->
- (forall N n p x, List.fold_left g (decompose_int_rec N n p) x = g x n) ->
+ (forall N n p x, List.fold_left g (decompose_int_arm N n p) x = g x n) ->
+ (forall N n p x, List.fold_left g (decompose_int_thumb N n p) x = g x n) ->
forall n v,
List.fold_left f (decompose_int n) v = f v n.
Proof.
- intros f g DISTR ASSOC ZERO DECOMP.
+ intros f g DISTR ASSOC ZERO DECOMP1 DECOMP2.
assert (A: forall l x y, g x (fold_left g l y) = fold_left g l (g x y)).
induction l; intros; simpl. auto. rewrite IHl. decEq. rewrite ASSOC; auto.
assert (B: forall l v n, fold_left f l (f v n) = f v (fold_left g l n)).
induction l; intros; simpl.
auto.
rewrite IHl. rewrite DISTR. decEq. decEq. auto.
- intros. unfold decompose_int.
- destruct (decompose_int_rec 12 n Int.zero) eqn:?.
- simpl. exploit decompose_int_rec_nil; eauto. congruence.
- simpl. rewrite B. decEq.
- generalize (DECOMP 12%nat n Int.zero Int.zero).
- rewrite Heql. simpl. repeat rewrite ZERO. auto.
+ intros. unfold decompose_int, decompose_int_base.
+ destruct (thumb tt); [destruct (is_immed_arith_thumb_special n)|].
+- reflexivity.
+- destruct (decompose_int_thumb 24%nat n Int.zero) eqn:DB.
+ + simpl. exploit decompose_int_thumb_nil; eauto. congruence.
+ + simpl. rewrite B. decEq.
+ generalize (DECOMP2 24%nat n Int.zero Int.zero).
+ rewrite DB; simpl. rewrite ! ZERO. auto.
+- destruct (decompose_int_arm 12%nat n Int.zero) eqn:DB.
+ + simpl. exploit decompose_int_arm_nil; eauto. congruence.
+ + simpl. rewrite B. decEq.
+ generalize (DECOMP1 12%nat n Int.zero Int.zero).
+ rewrite DB; simpl. rewrite ! ZERO. auto.
Qed.
Lemma decompose_int_or:
@@ -143,7 +240,7 @@ Proof.
intros. rewrite Val.or_assoc. auto.
apply Int.or_assoc.
intros. rewrite Int.or_commut. apply Int.or_zero.
- apply decompose_int_rec_or.
+ apply decompose_int_arm_or. apply decompose_int_thumb_or.
Qed.
Lemma decompose_int_bic:
@@ -154,7 +251,7 @@ Proof.
intros. rewrite Val.and_assoc. simpl. decEq. decEq. rewrite Int.not_or_and_not. auto.
apply Int.or_assoc.
intros. rewrite Int.or_commut. apply Int.or_zero.
- apply decompose_int_rec_or.
+ apply decompose_int_arm_or. apply decompose_int_thumb_or.
Qed.
Lemma decompose_int_xor:
@@ -165,7 +262,7 @@ Proof.
intros. rewrite Val.xor_assoc. auto.
apply Int.xor_assoc.
intros. rewrite Int.xor_commut. apply Int.xor_zero.
- apply decompose_int_rec_xor.
+ apply decompose_int_arm_xor. apply decompose_int_thumb_xor.
Qed.
Lemma decompose_int_add:
@@ -176,7 +273,7 @@ Proof.
intros. rewrite Val.add_assoc. auto.
apply Int.add_assoc.
intros. rewrite Int.add_commut. apply Int.add_zero.
- apply decompose_int_rec_add.
+ apply decompose_int_arm_add. apply decompose_int_thumb_add.
Qed.
Lemma decompose_int_sub:
@@ -188,26 +285,26 @@ Proof.
rewrite Int.neg_add_distr; auto.
apply Int.add_assoc.
intros. rewrite Int.add_commut. apply Int.add_zero.
- apply decompose_int_rec_add.
+ apply decompose_int_arm_add. apply decompose_int_thumb_add.
Qed.
Lemma iterate_op_correct:
forall op1 op2 (f: val -> int -> val) (rs: regset) (r: ireg) m v0 n k,
(forall (rs:regset) n,
exec_instr ge fn (op2 (SOimm n)) rs m =
- Next (nextinstr (rs#r <- (f (rs#r) n))) m) ->
+ Next (nextinstr_nf (rs#r <- (f (rs#r) n))) m) ->
(forall n,
exec_instr ge fn (op1 (SOimm n)) rs m =
- Next (nextinstr (rs#r <- (f v0 n))) m) ->
+ Next (nextinstr_nf (rs#r <- (f v0 n))) m) ->
exists rs',
exec_straight ge fn (iterate_op op1 op2 (decompose_int n) k) rs m k rs' m
/\ rs'#r = List.fold_left f (decompose_int n) v0
- /\ forall r': preg, r' <> r -> r' <> PC -> rs'#r' = rs#r'.
+ /\ forall r': preg, r' <> r -> if_preg r' = true -> rs'#r' = rs#r'.
Proof.
intros until k; intros SEM2 SEM1.
unfold iterate_op.
- destruct (decompose_int n) as [ | i tl] eqn:?.
- unfold decompose_int in Heql. destruct (decompose_int_rec 12%nat n Int.zero); congruence.
+ destruct (decompose_int n) as [ | i tl] eqn:DI.
+ unfold decompose_int in DI. destruct (decompose_int_base n); congruence.
revert k. pattern tl. apply List.rev_ind.
(* base case *)
intros; simpl. econstructor.
@@ -231,22 +328,52 @@ Lemma loadimm_correct:
exists rs',
exec_straight ge fn (loadimm r n k) rs m k rs' m
/\ rs'#r = Vint n
- /\ forall r': preg, r' <> r -> r' <> PC -> rs'#r' = rs#r'.
+ /\ forall r': preg, r' <> r -> if_preg r' = true -> rs'#r' = rs#r'.
Proof.
intros. unfold loadimm.
- destruct (NPeano.leb (length (decompose_int n)) (length (decompose_int (Int.not n)))).
- (* mov - orr* *)
+ set (l1 := length (decompose_int n)).
+ set (l2 := length (decompose_int (Int.not n))).
+ destruct (NPeano.leb l1 1%nat).
+{ (* single mov *)
+ econstructor; split. apply exec_straight_one. simpl; reflexivity. auto.
+ split; intros; Simpl. }
+ destruct (NPeano.leb l2 1%nat).
+{ (* single movn *)
+ econstructor; split. apply exec_straight_one.
+ simpl. rewrite Int.not_involutive. reflexivity. auto.
+ split; intros; Simpl. }
+ destruct (thumb tt).
+{ (* movw / movt *)
+ unfold loadimm_thumb. destruct (Int.eq (Int.shru n (Int.repr 16)) Int.zero).
+ econstructor; split.
+ apply exec_straight_one. simpl; eauto. auto. split; intros; Simpl.
+ econstructor; split.
+ eapply exec_straight_two. simpl; reflexivity. simpl; reflexivity. auto. auto.
+ split; intros; Simpl. simpl. f_equal. rewrite Int.zero_ext_and by omega.
+ rewrite Int.and_assoc. change 65535 with (two_p 16 - 1). rewrite Int.and_idem.
+ apply Int.same_bits_eq; intros.
+ rewrite Int.bits_or, Int.bits_and, Int.bits_shl, Int.testbit_repr by auto.
+ rewrite Int.Ztestbit_two_p_m1 by omega. change (Int.unsigned (Int.repr 16)) with 16.
+ destruct (zlt i 16).
+ rewrite andb_true_r, orb_false_r; auto.
+ rewrite andb_false_r; simpl. rewrite Int.bits_shru by omega.
+ change (Int.unsigned (Int.repr 16)) with 16. rewrite zlt_true by omega. f_equal; omega.
+}
+ destruct (NPeano.leb l1 l2).
+{ (* mov - orr* *)
replace (Vint n) with (List.fold_left (fun v i => Val.or v (Vint i)) (decompose_int n) Vzero).
apply iterate_op_correct.
auto.
intros; simpl. rewrite Int.or_commut; rewrite Int.or_zero; auto.
rewrite decompose_int_or. simpl. rewrite Int.or_commut; rewrite Int.or_zero; auto.
- (* mvn - bic* *)
+}
+{ (* mvn - bic* *)
replace (Vint n) with (List.fold_left (fun v i => Val.and v (Vint (Int.not i))) (decompose_int (Int.not n)) (Vint Int.mone)).
apply iterate_op_correct.
auto.
intros. simpl. rewrite Int.and_commut; rewrite Int.and_mone; auto.
rewrite decompose_int_bic. simpl. rewrite Int.not_involutive. rewrite Int.and_commut. rewrite Int.and_mone; auto.
+}
Qed.
(** Add integer immediate. *)
@@ -256,9 +383,13 @@ Lemma addimm_correct:
exists rs',
exec_straight ge fn (addimm r1 r2 n k) rs m k rs' m
/\ rs'#r1 = Val.add rs#r2 (Vint n)
- /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'.
+ /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'.
Proof.
intros. unfold addimm.
+ destruct (Int.ltu (Int.repr (-256)) n).
+ (* sub *)
+ econstructor; split. apply exec_straight_one; simpl; auto.
+ split; intros; Simpl. apply Val.sub_opp_add.
destruct (NPeano.leb (length (decompose_int n)) (length (decompose_int (Int.neg n)))).
(* add - add* *)
replace (Val.add (rs r2) (Vint n))
@@ -283,21 +414,17 @@ Lemma andimm_correct:
exists rs',
exec_straight ge fn (andimm r1 r2 n k) rs m k rs' m
/\ rs'#r1 = Val.and rs#r2 (Vint n)
- /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'.
+ /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'.
Proof.
- intros. unfold andimm.
+ intros. unfold andimm. destruct (is_immed_arith n).
(* andi *)
- case (is_immed_arith n).
- exists (nextinstr (rs#r1 <- (Val.and rs#r2 (Vint n)))).
- split. apply exec_straight_one; auto.
- split. rewrite nextinstr_inv; auto with asmgen. apply Pregmap.gss.
- intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto.
+ exists (nextinstr_nf (rs#r1 <- (Val.and rs#r2 (Vint n)))).
+ split. apply exec_straight_one; auto. split; intros; Simpl.
(* bic - bic* *)
replace (Val.and (rs r2) (Vint n))
with (List.fold_left (fun v i => Val.and v (Vint (Int.not i))) (decompose_int (Int.not n)) (rs r2)).
apply iterate_op_correct.
- auto.
- auto.
+ auto. auto.
rewrite decompose_int_bic. rewrite Int.not_involutive. auto.
Qed.
@@ -308,7 +435,7 @@ Lemma rsubimm_correct:
exists rs',
exec_straight ge fn (rsubimm r1 r2 n k) rs m k rs' m
/\ rs'#r1 = Val.sub (Vint n) rs#r2
- /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'.
+ /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'.
Proof.
intros. unfold rsubimm.
(* rsb - add* *)
@@ -329,7 +456,7 @@ Lemma orimm_correct:
exists rs',
exec_straight ge fn (orimm r1 r2 n k) rs m k rs' m
/\ rs'#r1 = Val.or rs#r2 (Vint n)
- /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'.
+ /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'.
Proof.
intros. unfold orimm.
(* ori - ori* *)
@@ -348,7 +475,7 @@ Lemma xorimm_correct:
exists rs',
exec_straight ge fn (xorimm r1 r2 n k) rs m k rs' m
/\ rs'#r1 = Val.xor rs#r2 (Vint n)
- /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'.
+ /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'.
Proof.
intros. unfold xorimm.
(* xori - xori* *)
@@ -367,7 +494,7 @@ Lemma indexed_memory_access_correct:
(mk_immed: int -> int) (base: ireg) n k (rs: regset) m m',
(forall (r1: ireg) (rs1: regset) n1 k,
Val.add rs1#r1 (Vint n1) = Val.add rs#base (Vint n) ->
- (forall (r: preg), r <> PC -> r <> IR14 -> rs1 r = rs r) ->
+ (forall (r: preg), if_preg r = true -> r <> IR14 -> rs1 r = rs r) ->
exists rs',
exec_straight ge fn (mk_instr r1 n1 :: k) rs1 m k rs' m' /\ P rs') ->
exists rs',
@@ -397,12 +524,12 @@ Lemma loadind_int_correct:
exists rs',
exec_straight ge fn (loadind_int base ofs dst k) rs m k rs' m
/\ rs'#dst = v
- /\ forall r, r <> PC -> r <> IR14 -> r <> dst -> rs'#r = rs#r.
+ /\ forall r, if_preg r = true -> r <> IR14 -> r <> dst -> rs'#r = rs#r.
Proof.
intros; unfold loadind_int. apply indexed_memory_access_correct; intros.
econstructor; split.
apply exec_straight_one. simpl. unfold exec_load. rewrite H0; rewrite H; eauto. auto.
- split. Simpl. intros; Simpl.
+ split; intros; Simpl.
Qed.
Lemma loadind_correct:
@@ -412,7 +539,7 @@ Lemma loadind_correct:
exists rs',
exec_straight ge fn c rs m k rs' m
/\ rs'#(preg_of dst) = v
- /\ forall r, r <> PC -> r <> IR14 -> r <> preg_of dst -> rs'#r = rs#r.
+ /\ forall r, if_preg r = true -> r <> IR14 -> r <> preg_of dst -> rs'#r = rs#r.
Proof.
unfold loadind; intros. destruct ty; destruct (preg_of dst); inv H; simpl in H0.
- (* int *)
@@ -421,22 +548,22 @@ Proof.
apply indexed_memory_access_correct; intros.
econstructor; split.
apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto.
- split. Simpl. intros; Simpl.
+ split; intros; Simpl.
- (* single *)
apply indexed_memory_access_correct; intros.
econstructor; split.
apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto.
- split. Simpl. intros; Simpl.
+ split; intros; Simpl.
- (* any32 *)
apply indexed_memory_access_correct; intros.
econstructor; split.
apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto.
- split. Simpl. intros; Simpl.
+ split; intros; Simpl.
- (* any64 *)
apply indexed_memory_access_correct; intros.
econstructor; split.
apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto.
- split. Simpl. intros; Simpl.
+ split; intros; Simpl.
Qed.
(** Indexed memory stores. *)
@@ -447,10 +574,10 @@ Lemma storeind_correct:
Mem.storev (chunk_of_type ty) m (Val.add rs#base (Vint ofs)) (rs#(preg_of src)) = Some m' ->
exists rs',
exec_straight ge fn c rs m k rs' m'
- /\ forall r, r <> PC -> r <> IR14 -> preg_notin r (destroyed_by_setstack ty) -> rs'#r = rs#r.
+ /\ forall r, if_preg r = true -> r <> IR14 -> rs'#r = rs#r.
Proof.
unfold storeind; intros.
- assert (NOT14: preg_of src <> IR IR14) by eauto with asmgen.
+ assert (DATA: data_preg (preg_of src) = true) by eauto with asmgen.
destruct ty; destruct (preg_of src); inv H; simpl in H0.
- (* int *)
apply indexed_memory_access_correct; intros.
@@ -493,13 +620,6 @@ Proof.
intros. destruct s; simpl; auto.
Qed.
-Lemma transl_shift_addr_correct:
- forall s (r: ireg) (rs: regset),
- eval_shift_addr (transl_shift_addr s r) rs = eval_shift s (rs#r).
-Proof.
- intros. destruct s; simpl; auto.
-Qed.
-
(** Translation of conditions *)
Lemma compare_int_spec:
@@ -871,6 +991,7 @@ Lemma transl_cond_correct:
exec_straight ge fn c rs m k rs' m
/\ match eval_condition cond (map rs (map preg_of args)) m with
| Some b => eval_testcond (cond_for_cond cond) rs' = Some b
+ /\ eval_testcond (cond_for_cond (negate_condition cond)) rs' = Some (negb b)
| None => True
end
/\ forall r, data_preg r = true -> rs'#r = rs r.
@@ -880,57 +1001,66 @@ Proof.
- (* Ccomp *)
econstructor.
split. apply exec_straight_one. simpl. eauto. auto.
- split. destruct (Val.cmp_bool c0 (rs x) (rs x0)) eqn:CMP; auto. apply cond_for_signed_cmp_correct; auto.
+ split. destruct (Val.cmp_bool c0 (rs x) (rs x0)) eqn:CMP; auto.
+ split; apply cond_for_signed_cmp_correct; auto. rewrite Val.negate_cmp_bool, CMP; auto.
apply compare_int_inv.
- (* Ccompu *)
econstructor.
split. apply exec_straight_one. simpl. eauto. auto.
- split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (rs x0)) eqn:CMP; auto. apply cond_for_unsigned_cmp_correct; auto.
+ split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (rs x0)) eqn:CMP; auto.
+ split; apply cond_for_unsigned_cmp_correct; auto. rewrite Val.negate_cmpu_bool, CMP; auto.
apply compare_int_inv.
- (* Ccompshift *)
econstructor.
split. apply exec_straight_one. simpl. eauto. auto.
split. rewrite transl_shift_correct.
- destruct (Val.cmp_bool c0 (rs x) (eval_shift s (rs x0))) eqn:CMP; auto. apply cond_for_signed_cmp_correct; auto.
+ destruct (Val.cmp_bool c0 (rs x) (eval_shift s (rs x0))) eqn:CMP; auto.
+ split; apply cond_for_signed_cmp_correct; auto. rewrite Val.negate_cmp_bool, CMP; auto.
apply compare_int_inv.
- (* Ccompushift *)
econstructor.
split. apply exec_straight_one. simpl. eauto. auto.
split. rewrite transl_shift_correct.
- destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (eval_shift s (rs x0))) eqn:CMP; auto. apply cond_for_unsigned_cmp_correct; auto.
+ destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (eval_shift s (rs x0))) eqn:CMP; auto.
+ split; apply cond_for_unsigned_cmp_correct; auto. rewrite Val.negate_cmpu_bool, CMP; auto.
apply compare_int_inv.
- (* Ccompimm *)
destruct (is_immed_arith i).
econstructor.
split. apply exec_straight_one. simpl. eauto. auto.
- split. destruct (Val.cmp_bool c0 (rs x) (Vint i)) eqn:CMP; auto. apply cond_for_signed_cmp_correct; auto.
+ split. destruct (Val.cmp_bool c0 (rs x) (Vint i)) eqn:CMP; auto.
+ split; apply cond_for_signed_cmp_correct; auto. rewrite Val.negate_cmp_bool, CMP; auto.
apply compare_int_inv.
exploit (loadimm_correct IR14). intros [rs' [P [Q R]]].
econstructor.
split. eapply exec_straight_trans. eexact P. apply exec_straight_one. simpl.
rewrite Q. rewrite R; eauto with asmgen. auto.
split. rewrite <- R by (eauto with asmgen).
- destruct (Val.cmp_bool c0 (rs' x) (Vint i)) eqn:CMP; auto. apply cond_for_signed_cmp_correct; auto.
+ destruct (Val.cmp_bool c0 (rs' x) (Vint i)) eqn:CMP; auto.
+ split; apply cond_for_signed_cmp_correct; auto. rewrite Val.negate_cmp_bool, CMP; auto.
intros. rewrite compare_int_inv by auto. auto with asmgen.
- (* Ccompuimm *)
destruct (is_immed_arith i).
econstructor.
split. apply exec_straight_one. simpl. eauto. auto.
- split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (Vint i)) eqn:CMP; auto. apply cond_for_unsigned_cmp_correct; auto.
+ split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (Vint i)) eqn:CMP; auto.
+ split; apply cond_for_unsigned_cmp_correct; auto. rewrite Val.negate_cmpu_bool, CMP; auto.
apply compare_int_inv.
exploit (loadimm_correct IR14). intros [rs' [P [Q R]]].
econstructor.
split. eapply exec_straight_trans. eexact P. apply exec_straight_one. simpl.
rewrite Q. rewrite R; eauto with asmgen. auto.
split. rewrite <- R by (eauto with asmgen).
- destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs' x) (Vint i)) eqn:CMP; auto. apply cond_for_unsigned_cmp_correct; auto.
+ destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs' x) (Vint i)) eqn:CMP; auto.
+ split; apply cond_for_unsigned_cmp_correct; auto. rewrite Val.negate_cmpu_bool, CMP; auto.
intros. rewrite compare_int_inv by auto. auto with asmgen.
- (* Ccompf *)
econstructor.
split. apply exec_straight_one. simpl. eauto. apply compare_float_nextpc.
split. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) eqn:CMP; auto.
destruct (rs x); try discriminate. destruct (rs x0); try discriminate.
- simpl in CMP. inv CMP. apply cond_for_float_cmp_correct.
+ simpl in CMP. inv CMP.
+ split. apply cond_for_float_cmp_correct. apply cond_for_float_not_cmp_correct.
apply compare_float_inv.
- (* Cnotcompf *)
econstructor.
@@ -938,7 +1068,8 @@ Proof.
split. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) eqn:CMP; auto.
destruct (rs x); try discriminate. destruct (rs x0); try discriminate.
simpl in CMP. inv CMP.
-Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct.
+Local Opaque compare_float. simpl.
+ split. apply cond_for_float_not_cmp_correct. rewrite negb_involutive. apply cond_for_float_cmp_correct.
exact I.
apply compare_float_inv.
- (* Ccompfzero *)
@@ -946,14 +1077,16 @@ Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct.
split. apply exec_straight_one. simpl. eauto. apply compare_float_nextpc.
split. destruct (Val.cmpf_bool c0 (rs x) (Vfloat Float.zero)) eqn:CMP; auto.
destruct (rs x); try discriminate.
- simpl in CMP. inv CMP. apply cond_for_float_cmp_correct.
+ simpl in CMP. inv CMP.
+ split. apply cond_for_float_cmp_correct. apply cond_for_float_not_cmp_correct.
apply compare_float_inv.
- (* Cnotcompfzero *)
econstructor.
split. apply exec_straight_one. simpl. eauto. apply compare_float_nextpc.
split. destruct (Val.cmpf_bool c0 (rs x) (Vfloat Float.zero)) eqn:CMP; auto.
destruct (rs x); try discriminate. simpl in CMP. inv CMP.
-Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct.
+Local Opaque compare_float. simpl.
+ split. apply cond_for_float_not_cmp_correct. rewrite negb_involutive. apply cond_for_float_cmp_correct.
exact I.
apply compare_float_inv.
- (* Ccompfs *)
@@ -961,7 +1094,8 @@ Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct.
split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
split. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) eqn:CMP; auto.
destruct (rs x); try discriminate. destruct (rs x0); try discriminate.
- simpl in CMP. inv CMP. apply cond_for_float32_cmp_correct.
+ simpl in CMP. inv CMP.
+ split. apply cond_for_float32_cmp_correct. apply cond_for_float32_not_cmp_correct.
apply compare_float32_inv.
- (* Cnotcompfs *)
econstructor.
@@ -969,7 +1103,8 @@ Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct.
split. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) eqn:CMP; auto.
destruct (rs x); try discriminate. destruct (rs x0); try discriminate.
simpl in CMP. inv CMP.
-Local Opaque compare_float32. simpl. apply cond_for_float32_not_cmp_correct.
+Local Opaque compare_float32. simpl.
+ split. apply cond_for_float32_not_cmp_correct. rewrite negb_involutive. apply cond_for_float32_cmp_correct.
exact I.
apply compare_float32_inv.
- (* Ccompfszero *)
@@ -977,14 +1112,15 @@ Local Opaque compare_float32. simpl. apply cond_for_float32_not_cmp_correct.
split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
split. destruct (Val.cmpfs_bool c0 (rs x) (Vsingle Float32.zero)) eqn:CMP; auto.
destruct (rs x); try discriminate.
- simpl in CMP. inv CMP. apply cond_for_float32_cmp_correct.
+ simpl in CMP. inv CMP.
+ split. apply cond_for_float32_cmp_correct. apply cond_for_float32_not_cmp_correct.
apply compare_float32_inv.
- (* Cnotcompfzero *)
econstructor.
split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc.
split. destruct (Val.cmpfs_bool c0 (rs x) (Vsingle Float32.zero)) eqn:CMP; auto.
destruct (rs x); try discriminate. simpl in CMP. inv CMP.
- simpl. apply cond_for_float32_not_cmp_correct.
+ simpl. split. apply cond_for_float32_not_cmp_correct. rewrite negb_involutive. apply cond_for_float32_cmp_correct.
exact I.
apply compare_float32_inv.
Qed.
@@ -1009,13 +1145,10 @@ Proof.
intros until v; intros TR EV NOCMP.
unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; inv EV; try (TranslOpSimpl; fail).
(* Omove *)
- exists (nextinstr (rs#(preg_of res) <- (rs#(preg_of m0)))).
- split.
destruct (preg_of res) eqn:RES; try discriminate;
destruct (preg_of m0) eqn:ARG; inv TR.
- apply exec_straight_one; auto.
- apply exec_straight_one; auto.
- intuition Simpl.
+ econstructor; split. apply exec_straight_one; simpl; eauto. intuition Simpl.
+ econstructor; split. apply exec_straight_one; simpl; eauto. intuition Simpl.
(* Ointconst *)
generalize (loadimm_correct x i k rs m). intros [rs' [A [B C]]].
exists rs'; auto with asmgen.
@@ -1024,8 +1157,11 @@ Proof.
intros [rs' [EX [RES OTH]]].
exists rs'; auto with asmgen.
(* Ocast8signed *)
- set (rs1 := nextinstr (rs#x <- (Val.shl rs#x0 (Vint (Int.repr 24))))).
- set (rs2 := nextinstr (rs1#x <- (Val.shr rs1#x (Vint (Int.repr 24))))).
+ destruct (thumb tt).
+ econstructor; split. apply exec_straight_one; simpl; eauto. intuition Simpl.
+ destruct (rs x0); auto; simpl. rewrite Int.shru_zero. reflexivity.
+ set (rs1 := nextinstr_nf (rs#x <- (Val.shl rs#x0 (Vint (Int.repr 24))))).
+ set (rs2 := nextinstr_nf (rs1#x <- (Val.shr rs1#x (Vint (Int.repr 24))))).
exists rs2.
split. apply exec_straight_two with rs1 m; auto.
split. unfold rs2; Simpl. unfold rs1; Simpl.
@@ -1034,8 +1170,11 @@ Proof.
f_equal. symmetry. apply (Int.sign_ext_shr_shl 8). compute; auto.
intros. unfold rs2, rs1; Simpl.
(* Ocast16signed *)
- set (rs1 := nextinstr (rs#x <- (Val.shl rs#x0 (Vint (Int.repr 16))))).
- set (rs2 := nextinstr (rs1#x <- (Val.shr rs1#x (Vint (Int.repr 16))))).
+ destruct (thumb tt).
+ econstructor; split. apply exec_straight_one; simpl; eauto. intuition Simpl.
+ destruct (rs x0); auto; simpl. rewrite Int.shru_zero. reflexivity.
+ set (rs1 := nextinstr_nf (rs#x <- (Val.shl rs#x0 (Vint (Int.repr 16))))).
+ set (rs2 := nextinstr_nf (rs1#x <- (Val.shr rs1#x (Vint (Int.repr 16))))).
exists rs2.
split. apply exec_straight_two with rs1 m; auto.
split. unfold rs2; Simpl. unfold rs1; Simpl.
@@ -1051,22 +1190,6 @@ Proof.
generalize (rsubimm_correct x x0 i k rs m).
intros [rs' [A [B C]]].
exists rs'; auto with asmgen.
- (* Omul *)
- destruct (negb (ireg_eq x x0)).
- TranslOpSimpl.
- destruct (negb (ireg_eq x x1)).
- rewrite Val.mul_commut. TranslOpSimpl.
- econstructor; split.
- eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
- intuition Simpl.
- (* Omla *)
- destruct (negb (ireg_eq x x0)).
- TranslOpSimpl.
- destruct (negb (ireg_eq x x1)).
- rewrite Val.mul_commut. TranslOpSimpl.
- econstructor; split.
- eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto.
- intuition Simpl.
(* divs *)
Local Transparent destroyed_by_op.
econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto.
@@ -1117,9 +1240,9 @@ Local Transparent destroyed_by_op.
omega.
}
set (j := Int.sub Int.iwordsize i) in *.
- set (rs1 := nextinstr (rs#IR14 <- (Val.shr (Vint i0) (Vint (Int.repr 31))))).
- set (rs2 := nextinstr (rs1#IR14 <- (Val.add (Vint i0) (Val.shru rs1#IR14 (Vint j))))).
- set (rs3 := nextinstr (rs2#x <- (Val.shr rs2#IR14 (Vint i)))).
+ set (rs1 := nextinstr_nf (rs#IR14 <- (Val.shr (Vint i0) (Vint (Int.repr 31))))).
+ set (rs2 := nextinstr_nf (rs1#IR14 <- (Val.add (Vint i0) (Val.shru rs1#IR14 (Vint j))))).
+ set (rs3 := nextinstr_nf (rs2#x <- (Val.shr rs2#IR14 (Vint i)))).
exists rs3; split.
apply exec_straight_three with rs1 m rs2 m.
simpl. rewrite X0; reflexivity.
@@ -1180,23 +1303,11 @@ Proof.
subst op. simpl in H. monadInv H. simpl in H0. inv H0.
rewrite (ireg_of_eq _ _ EQ).
exploit transl_cond_correct; eauto. instantiate (1 := rs). instantiate (1 := m). intros [rs1 [A [B C]]].
- set (rs2 := nextinstr (rs1#x <- (Vint Int.zero))).
- set (rs3 := nextinstr (match eval_testcond (cond_for_cond cmp) rs2 with
- | Some true => rs2 # x <- (Vint Int.one)
- | Some false => rs2
- | None => rs2 # x <- Vundef end)).
- exists rs3; split.
- eapply exec_straight_trans. eexact A. apply exec_straight_two with rs2 m.
- auto.
- simpl. unfold rs3. destruct (eval_testcond (cond_for_cond cmp) rs2) as [[]|]; auto.
- auto. unfold rs3. destruct (eval_testcond (cond_for_cond cmp) rs2) as [[]|]; auto.
- split. destruct (eval_condition cmp rs ## (preg_of ## args) m) as [b|]; simpl; auto.
- unfold rs3.
- change (eval_testcond (cond_for_cond cmp) rs2) with (eval_testcond (cond_for_cond cmp) rs1). rewrite B.
- Simpl. destruct b. Simpl. unfold rs2. Simpl.
- intros. transitivity (rs2 r).
- unfold rs3. Simpl. destruct (eval_testcond (cond_for_cond cmp) rs2) as [[]|]; Simpl; auto.
- unfold rs2. Simpl.
+ econstructor; split.
+ eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto.
+ split; intros; Simpl.
+ destruct (eval_condition cmp rs ## (preg_of ## args) m) as [b|]; simpl; auto.
+ destruct B as [B1 B2]; rewrite B1. destruct b; auto.
Qed.
(** Translation of loads and stores. *)
@@ -1209,21 +1320,21 @@ Qed.
Lemma transl_memory_access_correct:
forall (P: regset -> Prop) (mk_instr_imm: ireg -> int -> instruction)
- (mk_instr_gen: option (ireg -> shift_addr -> instruction))
+ (mk_instr_gen: option (ireg -> shift_op -> instruction))
(mk_immed: int -> int)
addr args k c (rs: regset) a m m',
transl_memory_access mk_instr_imm mk_instr_gen mk_immed addr args k = OK c ->
eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a ->
(forall (r1: ireg) (rs1: regset) n k,
Val.add rs1#r1 (Vint n) = a ->
- (forall (r: preg), r <> PC -> r <> IR14 -> rs1 r = rs r) ->
+ (forall (r: preg), if_preg r = true -> r <> IR14 -> rs1 r = rs r) ->
exists rs',
exec_straight ge fn (mk_instr_imm r1 n :: k) rs1 m k rs' m' /\ P rs') ->
match mk_instr_gen with
| None => True
| Some mk =>
- (forall (r1: ireg) (sa: shift_addr) k,
- Val.add rs#r1 (eval_shift_addr sa rs) = a ->
+ (forall (r1: ireg) (sa: shift_op) k,
+ Val.add rs#r1 (eval_shift_op sa rs) = a ->
exists rs',
exec_straight ge fn (mk r1 sa :: k) rs m k rs' m' /\ P rs')
end ->
@@ -1239,7 +1350,7 @@ Proof.
simpl. erewrite ! ireg_of_eq; eauto.
(* Aindexed2shift *)
destruct mk_instr_gen as [mk | ]; monadInv TR. apply MK2.
- erewrite ! ireg_of_eq; eauto. rewrite transl_shift_addr_correct. auto.
+ erewrite ! ireg_of_eq; eauto. rewrite transl_shift_correct. auto.
(* Ainstack *)
inv TR. apply indexed_memory_access_correct. exact MK1.
Qed.
@@ -1249,9 +1360,9 @@ Lemma transl_load_int_correct:
transl_memory_access_int mk_instr is_immed dst addr args k = OK c ->
eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a ->
Mem.loadv chunk m a = Some v ->
- (forall (r1 r2: ireg) (sa: shift_addr) (rs1: regset),
+ (forall (r1 r2: ireg) (sa: shift_op) (rs1: regset),
exec_instr ge fn (mk_instr r1 r2 sa) rs1 m =
- exec_load chunk (Val.add rs1#r2 (eval_shift_addr sa rs1)) r1 rs1 m) ->
+ exec_load chunk (Val.add rs1#r2 (eval_shift_op sa rs1)) r1 rs1 m) ->
exists rs',
exec_straight ge fn c rs m k rs' m
/\ rs'#(preg_of dst) = v
@@ -1260,7 +1371,7 @@ Proof.
intros. monadInv H. erewrite ireg_of_eq by eauto.
eapply transl_memory_access_correct; eauto.
intros; simpl. econstructor; split. apply exec_straight_one.
- rewrite H2. unfold exec_load. simpl eval_shift_addr. rewrite H. rewrite H1. eauto. auto.
+ rewrite H2. unfold exec_load. simpl eval_shift_op. rewrite H. rewrite H1. eauto. auto.
split. Simpl. intros; Simpl.
simpl; intros.
econstructor; split. apply exec_straight_one.
@@ -1294,17 +1405,18 @@ Lemma transl_store_int_correct:
transl_memory_access_int mk_instr is_immed src addr args k = OK c ->
eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a ->
Mem.storev chunk m a rs#(preg_of src) = Some m' ->
- (forall (r1 r2: ireg) (sa: shift_addr) (rs1: regset),
+ (forall (r1 r2: ireg) (sa: shift_op) (rs1: regset),
exec_instr ge fn (mk_instr r1 r2 sa) rs1 m =
- exec_store chunk (Val.add rs1#r2 (eval_shift_addr sa rs1)) r1 rs1 m) ->
+ exec_store chunk (Val.add rs1#r2 (eval_shift_op sa rs1)) r1 rs1 m) ->
exists rs',
exec_straight ge fn c rs m k rs' m'
/\ forall r, data_preg r = true -> preg_notin r mr -> rs'#r = rs#r.
Proof.
- intros. monadInv H. erewrite ireg_of_eq in * by eauto.
+ intros. assert (DR: data_preg (preg_of src) = true) by eauto with asmgen.
+ monadInv H. erewrite ireg_of_eq in * by eauto.
eapply transl_memory_access_correct; eauto.
intros; simpl. econstructor; split. apply exec_straight_one.
- rewrite H2. unfold exec_store. simpl eval_shift_addr. rewrite H. rewrite H3; eauto with asmgen.
+ rewrite H2. unfold exec_store. simpl eval_shift_op. rewrite H. rewrite H3; eauto with asmgen.
rewrite H1. eauto. auto.
intros; Simpl.
simpl; intros.
@@ -1325,7 +1437,8 @@ Lemma transl_store_float_correct:
exec_straight ge fn c rs m k rs' m'
/\ forall r, data_preg r = true -> preg_notin r mr -> rs'#r = rs#r.
Proof.
- intros. monadInv H. erewrite freg_of_eq in * by eauto.
+ intros. assert (DR: data_preg (preg_of src) = true) by eauto with asmgen.
+ monadInv H. erewrite freg_of_eq in * by eauto.
eapply transl_memory_access_correct; eauto.
intros; simpl. econstructor; split. apply exec_straight_one.
rewrite H2. unfold exec_store. rewrite H. rewrite H3; auto with asmgen. rewrite H1. eauto. auto.
diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml
index 2ad9114..bb738a1 100644
--- a/arm/PrintAsm.ml
+++ b/arm/PrintAsm.ml
@@ -20,6 +20,11 @@ open AST
open Memdata
open Asm
+(* Code generation options. To be turned into proper options. *)
+
+let vfpv3 = ref true
+let literals_in_code = ref true
+
(* On-the-fly label renaming *)
let next_label = ref 100
@@ -102,35 +107,41 @@ let condition_name = function
| TCgt -> "gt"
| TCle -> "le"
-let movimm oc dst n =
- match Asmgen.decompose_int n with
- | [] -> assert false
- | hd::tl as l ->
- fprintf oc " mov %s, #%a\n" dst coqint hd;
- List.iter
- (fun n -> fprintf oc " orr %s, %s, #%a\n" dst dst coqint n)
- tl;
- List.length l
-
-let addimm oc dst src n =
- match Asmgen.decompose_int n with
- | [] -> assert false
- | hd::tl as l ->
- fprintf oc " add %s, %s, #%a\n" dst src coqint hd;
- List.iter
- (fun n -> fprintf oc " add %s, %s, #%a\n" dst dst coqint n)
- tl;
- List.length l
-
-let subimm oc dst src n =
- match Asmgen.decompose_int n with
- | [] -> assert false
- | hd::tl as l ->
- fprintf oc " sub %s, %s, #%a\n" dst src coqint hd;
- List.iter
- (fun n -> fprintf oc " sub %s, %s, #%a\n" dst dst coqint n)
- tl;
- List.length l
+let neg_condition_name = function
+ | TCeq -> "ne"
+ | TCne -> "eq"
+ | TChs -> "lo"
+ | TClo -> "hs"
+ | TCmi -> "pl"
+ | TCpl -> "mi"
+ | TChi -> "ls"
+ | TCls -> "hi"
+ | TCge -> "lt"
+ | TClt -> "ge"
+ | TCgt -> "le"
+ | TCle -> "gt"
+
+(* In Thumb2 mode, some arithmetic instructions have shorter encodings
+ if they carry the "S" flag (update condition flags):
+ add (but not sp + imm)
+ and
+ asr
+ bic
+ eor
+ lsl
+ lsr
+ mov
+ mvn
+ orr
+ rsb
+ sub (but not sp - imm)
+ The proof of Asmgen shows that CompCert-generated code behaves the
+ same whether flags are updated or not by those instructions. The
+ following printing function adds a "S" suffix if we are in Thumb2
+ mode. *)
+
+let thumbS oc =
+ if !Clflags.option_fthumb then output_char oc 's'
(* Names of sections *)
@@ -156,15 +167,16 @@ let size_constants = ref 0
let max_pos_constants = ref max_int
let distance_to_emit_constants () =
- !max_pos_constants - (!currpos + !size_constants)
+ if !literals_in_code
+ then !max_pos_constants - (!currpos + !size_constants)
+ else max_int
(* Associate labels to floating-point constants and to symbols *)
let float_labels = (Hashtbl.create 39 : (int64, int) Hashtbl.t)
let float32_labels = (Hashtbl.create 39 : (int32, int) Hashtbl.t)
-let label_float f =
- let bf = camlint64_of_coqint(Floats.Float.to_bits f) in
+let label_float bf =
try
Hashtbl.find float_labels bf
with Not_found ->
@@ -174,8 +186,7 @@ let label_float f =
max_pos_constants := min !max_pos_constants (!currpos + 1024);
lbl'
-let label_float32 f =
- let bf = camlint_of_coqint(Floats.Float32.to_bits f) in
+let label_float32 bf =
try
Hashtbl.find float32_labels bf
with Not_found ->
@@ -206,6 +217,7 @@ let reset_constants () =
max_pos_constants := max_int
let emit_constants oc =
+ fprintf oc " .balign 4\n";
Hashtbl.iter
(fun bf lbl ->
(* Little-endian floats *)
@@ -224,6 +236,67 @@ let emit_constants oc =
symbol_labels;
reset_constants ()
+(* Generate code to load the address of id + ofs in register r *)
+
+let loadsymbol oc r id ofs =
+ if !Clflags.option_fthumb then begin
+ fprintf oc " movw %a, #:lower16:%a\n"
+ ireg r print_symb_ofs (id, ofs);
+ fprintf oc " movt %a, #:upper16:%a\n"
+ ireg r print_symb_ofs (id, ofs); 2
+ end else begin
+ let lbl = label_symbol id ofs in
+ fprintf oc " ldr %a, .L%d @ %a\n"
+ ireg r lbl print_symb_ofs (id, ofs); 1
+ end
+
+(* Emit instruction sequences that set or offset a register by a constant. *)
+(* No S suffix because they are applied to SP most of the time. *)
+
+let movimm oc dst n =
+ match Asmgen.decompose_int n with
+ | [] -> assert false
+ | hd::tl as l ->
+ fprintf oc " mov %s, #%a\n" dst coqint hd;
+ List.iter
+ (fun n -> fprintf oc " orr %s, %s, #%a\n" dst dst coqint n)
+ tl;
+ List.length l
+
+let addimm oc dst src n =
+ match Asmgen.decompose_int n with
+ | [] -> assert false
+ | hd::tl as l ->
+ fprintf oc " add %s, %s, #%a\n" dst src coqint hd;
+ List.iter
+ (fun n -> fprintf oc " add %s, %s, #%a\n" dst dst coqint n)
+ tl;
+ List.length l
+
+let subimm oc dst src n =
+ match Asmgen.decompose_int n with
+ | [] -> assert false
+ | hd::tl as l ->
+ fprintf oc " sub %s, %s, #%a\n" dst src coqint hd;
+ List.iter
+ (fun n -> fprintf oc " sub %s, %s, #%a\n" dst dst coqint n)
+ tl;
+ List.length l
+
+(* Recognition of float constants appropriate for VMOV.
+ "a normalized binary floating point encoding with 1 sign bit, 4
+ bits of fraction and a 3-bit exponent" *)
+
+let is_immediate_float64 bits =
+ let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in
+ let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in
+ exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant
+
+let is_immediate_float32 bits =
+ let exp = (Int32.(to_int (shift_right_logical bits 23)) land 0xFF) - 127 in
+ let mant = Int32.logand bits 0x7F_FFFFl in
+ exp >= -3 && exp <= 4 && Int32.logand mant 0x78_0000l = mant
+
(* Emit .file / .loc debugging directives *)
let filename_num : (string, int) Hashtbl.t = Hashtbl.create 7
@@ -267,7 +340,7 @@ let cfi_rel_offset oc reg ofs =
- annotation statements: take their arguments in registers or stack
locations; generate no code;
- inlined by the compiler: take their arguments in arbitrary
- registers; preserve all registers except IR2, IR3, IR12 and FP6.
+ registers.
*)
(* Handling of annotations *)
@@ -365,9 +438,9 @@ let print_builtin_vload_common oc chunk args res =
fprintf oc " ldr %a, [%a, #0]\n" ireg res2 ireg addr
end; 2
| Mfloat32, [IR addr], [FR res] ->
- fprintf oc " flds %a, [%a, #0]\n" freg_single res ireg addr; 1
+ fprintf oc " vldr %a, [%a, #0]\n" freg_single res ireg addr; 1
| Mfloat64, [IR addr], [FR res] ->
- fprintf oc " fldd %a, [%a, #0]\n" freg res ireg addr; 1
+ fprintf oc " vldr %a, [%a, #0]\n" freg res ireg addr; 1
| _ ->
assert false
@@ -378,10 +451,9 @@ let print_builtin_vload oc chunk args res =
let print_builtin_vload_global oc chunk id ofs args res =
fprintf oc "%s begin builtin __builtin_volatile_read\n" comment;
- let lbl = label_symbol id ofs in
- fprintf oc " ldr %a, .L%d @ %a\n" ireg IR14 lbl print_symb_ofs (id, ofs);
- let n = print_builtin_vload_common oc chunk [IR IR14] res in
- fprintf oc "%s end builtin __builtin_volatile_read\n" comment; n + 1
+ let n1 = loadsymbol oc IR14 id ofs in
+ let n2 = print_builtin_vload_common oc chunk [IR IR14] res in
+ fprintf oc "%s end builtin __builtin_volatile_read\n" comment; n1 + n2
let print_builtin_vstore_common oc chunk args =
match chunk, args with
@@ -395,9 +467,9 @@ let print_builtin_vstore_common oc chunk args =
fprintf oc " str %a, [%a, #0]\n" ireg src2 ireg addr;
fprintf oc " str %a, [%a, #4]\n" ireg src1 ireg addr; 2
| Mfloat32, [IR addr; FR src] ->
- fprintf oc " fsts %a, [%a, #0]\n" freg_single src ireg addr; 1
+ fprintf oc " vstr %a, [%a, #0]\n" freg_single src ireg addr; 1
| Mfloat64, [IR addr; FR src] ->
- fprintf oc " fstd %a, [%a, #0]\n" freg src ireg addr; 1
+ fprintf oc " vstr %a, [%a, #0]\n" freg src ireg addr; 1
| _ ->
assert false
@@ -408,10 +480,9 @@ let print_builtin_vstore oc chunk args =
let print_builtin_vstore_global oc chunk id ofs args =
fprintf oc "%s begin builtin __builtin_volatile_write\n" comment;
- let lbl = label_symbol id ofs in
- fprintf oc " ldr %a, .L%d @ %a\n" ireg IR14 lbl print_symb_ofs (id, ofs);
- let n = print_builtin_vstore_common oc chunk (IR IR14 :: args) in
- fprintf oc "%s end builtin __builtin_volatile_write\n" comment; n + 1
+ let n1 = loadsymbol oc IR14 id ofs in
+ let n2 = print_builtin_vstore_common oc chunk (IR IR14 :: args) in
+ fprintf oc "%s end builtin __builtin_volatile_write\n" comment; n1 + n2
(* Handling of varargs *)
@@ -458,24 +529,30 @@ let print_builtin_inline oc name args res =
fprintf oc " clz %a, %a\n" ireg res ireg a1; 1
(* Float arithmetic *)
| "__builtin_fabs", [FR a1], [FR res] ->
- fprintf oc " fabsd %a, %a\n" freg res freg a1; 1
+ fprintf oc " vabs.f64 %a, %a\n" freg res freg a1; 1
| "__builtin_fsqrt", [FR a1], [FR res] ->
- fprintf oc " fsqrtd %a, %a\n" freg res freg a1; 1
+ fprintf oc " vsqrt.f64 %a, %a\n" freg res freg a1; 1
(* 64-bit integer arithmetic *)
| "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] ->
- if rl = ah then begin
- fprintf oc " rsbs %a, %a, #0\n" ireg IR14 ireg al;
- fprintf oc " rsc %a, %a, #0\n" ireg rh ireg ah;
- fprintf oc " mov %a, %a\n" ireg rl ireg IR14; 3
- end else begin
- fprintf oc " rsbs %a, %a, #0\n" ireg rl ireg al;
- fprintf oc " rsc %a, %a, #0\n" ireg rh ireg ah; 2
- end
+ fprintf oc " rsbs %a, %a, #0\n"
+ ireg (if rl = ah then IR14 else rl) ireg al;
+ (* No "rsc" instruction in Thumb2. Emulate based on
+ rsc a, b, #0 == a <- AddWithCarry(~b, 0, carry)
+ == mvn a, b; adc a, a, #0 *)
+ if !Clflags.option_fthumb then begin
+ fprintf oc " mvn %a, %a\n" ireg rh ireg ah;
+ fprintf oc " adc %a, %a, #0\n" ireg rh ireg rh
+ end else begin
+ fprintf oc " rsc %a, %a, #0\n" ireg rh ireg ah
+ end;
+ if rl = ah then
+ fprintf oc " mov%t %a, %a\n" thumbS ireg rl ireg IR14;
+ 4
| "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] ->
if rl = ah || rl = bh then begin
fprintf oc " adds %a, %a, %a\n" ireg IR14 ireg al ireg bl;
fprintf oc " adc %a, %a, %a\n" ireg rh ireg ah ireg bh;
- fprintf oc " mov %a, %a\n" ireg rl ireg IR14; 3
+ fprintf oc " mov%t %a, %a\n" thumbS ireg rl ireg IR14; 3
end else begin
fprintf oc " adds %a, %a, %a\n" ireg rl ireg al ireg bl;
fprintf oc " adc %a, %a, %a\n" ireg rh ireg ah ireg bh; 2
@@ -484,18 +561,13 @@ let print_builtin_inline oc name args res =
if rl = ah || rl = bh then begin
fprintf oc " subs %a, %a, %a\n" ireg IR14 ireg al ireg bl;
fprintf oc " sbc %a, %a, %a\n" ireg rh ireg ah ireg bh;
- fprintf oc " mov %a, %a\n" ireg rl ireg IR14; 3
+ fprintf oc " mov%t %a, %a\n" thumbS ireg rl ireg IR14; 3
end else begin
fprintf oc " subs %a, %a, %a\n" ireg rl ireg al ireg bl;
fprintf oc " sbc %a, %a, %a\n" ireg rh ireg ah ireg bh; 2
end
| "__builtin_mull", [IR a; IR b], [IR rh; IR rl] ->
- if rl = a || rh = a then begin
- fprintf oc " mov %a, %a\n" ireg IR14 ireg a;
- fprintf oc " umull %a, %a, %a, %a\n" ireg rl ireg rh ireg IR14 ireg b; 2
- end else begin
- fprintf oc " umull %a, %a, %a, %a\n" ireg rl ireg rh ireg a ireg b; 1
- end
+ fprintf oc " umull %a, %a, %a, %a\n" ireg rl ireg rh ireg a ireg b; 1
(* Memory accesses *)
| "__builtin_read16_reversed", [IR a1], [IR res] ->
fprintf oc " ldrh %a, [%a, #0]\n" ireg res ireg a1;
@@ -534,16 +606,16 @@ module FixupEABI = struct
let fixup_double oc dir f i1 i2 =
match dir with
| Incoming -> (* f <- (i1, i2) *)
- fprintf oc " fmdrr %a, %a, %a\n" freg f ireg i1 ireg i2
+ fprintf oc " vmov %a, %a, %a\n" freg f ireg i1 ireg i2
| Outgoing -> (* (i1, i2) <- f *)
- fprintf oc " fmrrd %a, %a, %a\n" ireg i1 ireg i2 freg f
+ fprintf oc " vmov %a, %a, %a\n" ireg i1 ireg i2 freg f
let fixup_single oc dir f i =
match dir with
| Incoming -> (* f <- i *)
- fprintf oc " fmsr %a, %a\n" freg_single f ireg i
+ fprintf oc " vmov %a, %a\n" freg_single f ireg i
| Outgoing -> (* i <- f *)
- fprintf oc " fmrs %a, %a\n" ireg i freg_single f
+ fprintf oc " vmov %a, %a\n" ireg i freg_single f
let fixup_conventions oc dir tyl =
let rec fixup i tyl =
@@ -608,11 +680,11 @@ module FixupHF = struct
| [] -> 0
| (fr, Double, dr) :: act ->
if fr = dr then fixup_outgoing oc act else begin
- fprintf oc " fcpyd d%d, d%d\n" dr fr;
+ fprintf oc " vmov.f64 d%d, d%d\n" dr fr;
1 + fixup_outgoing oc act
end
| (fr, Single, sr) :: act ->
- fprintf oc " fcpys s%d, s%d\n" sr (2*fr);
+ fprintf oc " vmov.f32 s%d, s%d\n" sr (2*fr);
1 + fixup_outgoing oc act
let rec fixup_incoming oc = function
@@ -620,13 +692,13 @@ module FixupHF = struct
| (fr, Double, dr) :: act ->
let n = fixup_incoming oc act in
if fr = dr then n else begin
- fprintf oc " fcpyd d%d, d%d\n" fr dr;
+ fprintf oc " vmov.f64 d%d, d%d\n" fr dr;
1 + n
end
| (fr, Single, sr) :: act ->
let n = fixup_incoming oc act in
if fr = sr then n else begin
- fprintf oc " fcpys s%d, s%d\n" (2*fr) sr;
+ fprintf oc " vmov.f32 s%d, s%d\n" (2*fr) sr;
1 + n
end
@@ -658,29 +730,23 @@ let (fixup_arguments, fixup_result) =
let shift_op oc = function
| SOimm n -> fprintf oc "#%a" coqint n
| SOreg r -> ireg oc r
- | SOlslimm(r, n) -> fprintf oc "%a, lsl #%a" ireg r coqint n
- | SOlslreg(r, r') -> fprintf oc "%a, lsl %a" ireg r ireg r'
- | SOlsrimm(r, n) -> fprintf oc "%a, lsr #%a" ireg r coqint n
- | SOlsrreg(r, r') -> fprintf oc "%a, lsr %a" ireg r ireg r'
- | SOasrimm(r, n) -> fprintf oc "%a, asr #%a" ireg r coqint n
- | SOasrreg(r, r') -> fprintf oc "%a, asr %a" ireg r ireg r'
- | SOrorimm(r, n) -> fprintf oc "%a, ror #%a" ireg r coqint n
- | SOrorreg(r, r') -> fprintf oc "%a, ror %a" ireg r ireg r'
-
-let shift_addr oc = function
- | SAimm n -> fprintf oc "#%a" coqint n
- | SAreg r -> ireg oc r
- | SAlsl(r, n) -> fprintf oc "%a, lsl #%a" ireg r coqint n
- | SAlsr(r, n) -> fprintf oc "%a, lsr #%a" ireg r coqint n
- | SAasr(r, n) -> fprintf oc "%a, asr #%a" ireg r coqint n
- | SAror(r, n) -> fprintf oc "%a, ror #%a" ireg r coqint n
+ | SOlsl(r, n) -> fprintf oc "%a, lsl #%a" ireg r coqint n
+ | SOlsr(r, n) -> fprintf oc "%a, lsr #%a" ireg r coqint n
+ | SOasr(r, n) -> fprintf oc "%a, asr #%a" ireg r coqint n
+ | SOror(r, n) -> fprintf oc "%a, ror #%a" ireg r coqint n
let print_instruction oc = function
(* Core instructions *)
| Padd(r1, r2, so) ->
- fprintf oc " add %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
+ fprintf oc " add%s %a, %a, %a\n"
+ (if !Clflags.option_fthumb && r2 <> IR14 then "s" else "")
+ ireg r1 ireg r2 shift_op so; 1
| Pand(r1, r2, so) ->
- fprintf oc " and %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
+ fprintf oc " and%t %a, %a, %a\n"
+ thumbS ireg r1 ireg r2 shift_op so; 1
+ | Pasr(r1, r2, r3) ->
+ fprintf oc " asr%t %a, %a, %a\n"
+ thumbS ireg r1 ireg r2 ireg r3; 1
| Pb lbl ->
fprintf oc " b %a\n" print_label lbl; 1
| Pbc(bit, lbl) ->
@@ -707,141 +773,181 @@ let print_instruction oc = function
let n2 = fixup_result oc Incoming sg in
n1 + 1 + n2
| Pbic(r1, r2, so) ->
- fprintf oc " bic %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
+ fprintf oc " bic%t %a, %a, %a\n"
+ thumbS ireg r1 ireg r2 shift_op so; 1
| Pcmp(r1, so) ->
fprintf oc " cmp %a, %a\n" ireg r1 shift_op so; 1
| Peor(r1, r2, so) ->
- fprintf oc " eor %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
+ fprintf oc " eor%t %a, %a, %a\n"
+ thumbS ireg r1 ireg r2 shift_op so; 1
| Pldr(r1, r2, sa) | Pldr_a(r1, r2, sa) ->
- fprintf oc " ldr %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
+ fprintf oc " ldr %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
| Pldrb(r1, r2, sa) ->
- fprintf oc " ldrb %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
+ fprintf oc " ldrb %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
| Pldrh(r1, r2, sa) ->
- fprintf oc " ldrh %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
+ fprintf oc " ldrh %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
| Pldrsb(r1, r2, sa) ->
- fprintf oc " ldrsb %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
+ fprintf oc " ldrsb %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
| Pldrsh(r1, r2, sa) ->
- fprintf oc " ldrsh %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
+ fprintf oc " ldrsh %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
+ | Plsl(r1, r2, r3) ->
+ fprintf oc " lsl%t %a, %a, %a\n"
+ thumbS ireg r1 ireg r2 ireg r3; 1
+ | Plsr(r1, r2, r3) ->
+ fprintf oc " lsr%t %a, %a, %a\n"
+ thumbS ireg r1 ireg r2 ireg r3; 1
| Pmla(r1, r2, r3, r4) ->
fprintf oc " mla %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1
| Pmov(r1, so) ->
- fprintf oc " mov %a, %a\n" ireg r1 shift_op so; 1
- | Pmovc(bit, r1, so) ->
- fprintf oc " mov%s %a, %a\n" (condition_name bit) ireg r1 shift_op so; 1
+ fprintf oc " mov%t %a, %a\n" thumbS ireg r1 shift_op so; 1
+ | Pmovw(r1, n) ->
+ fprintf oc " movw %a, #%a\n" ireg r1 coqint n; 1
+ | Pmovt(r1, n) ->
+ fprintf oc " movt %a, #%a\n" ireg r1 coqint n; 1
| Pmul(r1, r2, r3) ->
fprintf oc " mul %a, %a, %a\n" ireg r1 ireg r2 ireg r3; 1
| Pmvn(r1, so) ->
- fprintf oc " mvn %a, %a\n" ireg r1 shift_op so; 1
+ fprintf oc " mvn%t %a, %a\n" thumbS ireg r1 shift_op so; 1
| Porr(r1, r2, so) ->
- fprintf oc " orr %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
+ fprintf oc " orr%t %a, %a, %a\n"
+ thumbS ireg r1 ireg r2 shift_op so; 1
| Prsb(r1, r2, so) ->
- fprintf oc " rsb %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
+ fprintf oc " rsb%t %a, %a, %a\n"
+ thumbS ireg r1 ireg r2 shift_op so; 1
| Pstr(r1, r2, sa) | Pstr_a(r1, r2, sa) ->
- fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa;
+ fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa;
begin match r1, r2, sa with
- | IR14, IR13, SAimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n)
+ | IR14, IR13, SOimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n)
| _ -> ()
end;
1
| Pstrb(r1, r2, sa) ->
- fprintf oc " strb %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
+ fprintf oc " strb %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
| Pstrh(r1, r2, sa) ->
- fprintf oc " strh %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1
+ fprintf oc " strh %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1
| Psdiv ->
fprintf oc " bl __aeabi_idiv\n"; 1
+ | Psbfx(r1, r2, lsb, sz) ->
+ fprintf oc " sbfx %a, %a, #%a, #%a\n" ireg r1 ireg r2 coqint lsb coqint sz; 1
| Psmull(r1, r2, r3, r4) ->
fprintf oc " smull %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1
| Psub(r1, r2, so) ->
- fprintf oc " sub %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1
+ fprintf oc " sub%t %a, %a, %a\n"
+ thumbS ireg r1 ireg r2 shift_op so; 1
| Pudiv ->
fprintf oc " bl __aeabi_uidiv\n"; 1
| Pumull(r1, r2, r3, r4) ->
fprintf oc " umull %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1
- (* Floating-point coprocessor instructions *)
+ (* Floating-point VFD instructions *)
| Pfcpyd(r1, r2) ->
- fprintf oc " fcpyd %a, %a\n" freg r1 freg r2; 1
+ fprintf oc " vmov.f64 %a, %a\n" freg r1 freg r2; 1
| Pfabsd(r1, r2) ->
- fprintf oc " fabsd %a, %a\n" freg r1 freg r2; 1
+ fprintf oc " vabs.f64 %a, %a\n" freg r1 freg r2; 1
| Pfnegd(r1, r2) ->
- fprintf oc " fnegd %a, %a\n" freg r1 freg r2; 1
+ fprintf oc " vneg.f64 %a, %a\n" freg r1 freg r2; 1
| Pfaddd(r1, r2, r3) ->
- fprintf oc " faddd %a, %a, %a\n" freg r1 freg r2 freg r3; 1
+ fprintf oc " vadd.f64 %a, %a, %a\n" freg r1 freg r2 freg r3; 1
| Pfdivd(r1, r2, r3) ->
- fprintf oc " fdivd %a, %a, %a\n" freg r1 freg r2 freg r3; 1
+ fprintf oc " vdiv.f64 %a, %a, %a\n" freg r1 freg r2 freg r3; 1
| Pfmuld(r1, r2, r3) ->
- fprintf oc " fmuld %a, %a, %a\n" freg r1 freg r2 freg r3; 1
+ fprintf oc " vmul.f64 %a, %a, %a\n" freg r1 freg r2 freg r3; 1
| Pfsubd(r1, r2, r3) ->
- fprintf oc " fsubd %a, %a, %a\n" freg r1 freg r2 freg r3; 1
+ fprintf oc " vsub.f64 %a, %a, %a\n" freg r1 freg r2 freg r3; 1
| Pflid(r1, f) ->
- (* We could make good use of the fconstd instruction, but it's available
- in VFD v3 and up, not in v1 nor v2 *)
- let lbl = label_float f in
- fprintf oc " fldd %a, .L%d @ %.12g\n" freg r1 lbl (camlfloat_of_coqfloat f); 1
+ let f = camlint64_of_coqint(Floats.Float.to_bits f) in
+ if !vfpv3 && is_immediate_float64 f then begin
+ fprintf oc " vmov.f64 %a, #%F\n"
+ freg r1 (Int64.float_of_bits f); 1
+ (* immediate floats have at most 4 bits of fraction, so they
+ should print exactly with OCaml's F decimal format. *)
+ end else if !literals_in_code then begin
+ let lbl = label_float f in
+ fprintf oc " vldr %a, .L%d @ %.12g\n"
+ freg r1 lbl (Int64.float_of_bits f); 1
+ end else begin
+ let lbl = label_float f in
+ fprintf oc " movw r14, #:lower16:.L%d\n" lbl;
+ fprintf oc " movt r14, #:upper16:.L%d\n" lbl;
+ fprintf oc " vldr %a, [r14, #0] @ %.12g\n"
+ freg r1 (Int64.float_of_bits f); 3
+ end
| Pfcmpd(r1, r2) ->
- fprintf oc " fcmpd %a, %a\n" freg r1 freg r2;
- fprintf oc " fmstat\n"; 2
+ fprintf oc " vcmp.f64 %a, %a\n" freg r1 freg r2;
+ fprintf oc " vmrs APSR_nzcv, FPSCR\n"; 2
| Pfcmpzd(r1) ->
- fprintf oc " fcmpzd %a\n" freg r1;
- fprintf oc " fmstat\n"; 2
+ fprintf oc " vcmp.f64 %a, #0\n" freg r1;
+ fprintf oc " vmrs APSR_nzcv, FPSCR\n"; 2
| Pfsitod(r1, r2) ->
- fprintf oc " fmsr %a, %a\n" freg_single r1 ireg r2;
- fprintf oc " fsitod %a, %a\n" freg r1 freg_single r1; 2
+ fprintf oc " vmov %a, %a\n" freg_single r1 ireg r2;
+ fprintf oc " vcvt.f64.s32 %a, %a\n" freg r1 freg_single r1; 2
| Pfuitod(r1, r2) ->
- fprintf oc " fmsr %a, %a\n" freg_single r1 ireg r2;
- fprintf oc " fuitod %a, %a\n" freg r1 freg_single r1; 2
+ fprintf oc " vmov %a, %a\n" freg_single r1 ireg r2;
+ fprintf oc " vcvt.f64.u32 %a, %a\n" freg r1 freg_single r1; 2
| Pftosizd(r1, r2) ->
- fprintf oc " ftosizd %a, %a\n" freg_single FR6 freg r2;
- fprintf oc " fmrs %a, %a\n" ireg r1 freg_single FR6; 2
+ fprintf oc " vcvt.s32.f64 %a, %a\n" freg_single FR6 freg r2;
+ fprintf oc " vmov %a, %a\n" ireg r1 freg_single FR6; 2
| Pftouizd(r1, r2) ->
- fprintf oc " ftouizd %a, %a\n" freg_single FR6 freg r2;
- fprintf oc " fmrs %a, %a\n" ireg r1 freg_single FR6; 2
+ fprintf oc " vcvt.u32.f64 %a, %a\n" freg_single FR6 freg r2;
+ fprintf oc " vmov %a, %a\n" ireg r1 freg_single FR6; 2
| Pfabss(r1, r2) ->
- fprintf oc " fabss %a, %a\n" freg_single r1 freg_single r2; 1
+ fprintf oc " vabs.f32 %a, %a\n" freg_single r1 freg_single r2; 1
| Pfnegs(r1, r2) ->
- fprintf oc " fnegs %a, %a\n" freg_single r1 freg_single r2; 1
+ fprintf oc " vneg.f32 %a, %a\n" freg_single r1 freg_single r2; 1
| Pfadds(r1, r2, r3) ->
- fprintf oc " fadds %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+ fprintf oc " vadd.f32 %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
| Pfdivs(r1, r2, r3) ->
- fprintf oc " fdivs %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+ fprintf oc " vdiv.f32 %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
| Pfmuls(r1, r2, r3) ->
- fprintf oc " fmuls %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+ fprintf oc " vmul.f32 %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
| Pfsubs(r1, r2, r3) ->
- fprintf oc " fsubs %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
+ fprintf oc " vsub.f32 %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1
| Pflis(r1, f) ->
- (* We could make good use of the fconsts instruction, but it's available
- in VFD v3 and up, not in v1 nor v2 *)
- let lbl = label_float32 f in
- fprintf oc " flds %a, .L%d @ %.12g\n" freg_single r1 lbl (camlfloat_of_coqfloat32 f); 1
+ let f = camlint_of_coqint(Floats.Float32.to_bits f) in
+ if !vfpv3 && is_immediate_float32 f then begin
+ fprintf oc " vmov.f32 %a, #%F\n"
+ freg_single r1 (Int32.float_of_bits f); 1
+ (* immediate floats have at most 4 bits of fraction, so they
+ should print exactly with OCaml's F decimal format. *)
+ end else if !literals_in_code then begin
+ let lbl = label_float32 f in
+ fprintf oc " vldr %a, .L%d @ %.12g\n"
+ freg_single r1 lbl (Int32.float_of_bits f); 1
+ end else begin
+ fprintf oc " movw r14, #%ld\n" (Int32.logand f 0xFFFFl);
+ fprintf oc " movt r14, #%ld\n" (Int32.shift_right_logical f 16);
+ fprintf oc " vmov %a, r14 @ %.12g\n"
+ freg_single r1 (Int32.float_of_bits f); 3
+ end
| Pfcmps(r1, r2) ->
- fprintf oc " fcmps %a, %a\n" freg_single r1 freg_single r2;
- fprintf oc " fmstat\n"; 2
+ fprintf oc " vcmp.f32 %a, %a\n" freg_single r1 freg_single r2;
+ fprintf oc " vmrs APSR_nzcv, FPSCR\n"; 2
| Pfcmpzs(r1) ->
- fprintf oc " fcmpzs %a\n" freg_single r1;
- fprintf oc " fmstat\n"; 2
+ fprintf oc " vcmp.f32 %a, #0\n" freg_single r1;
+ fprintf oc " vmrs APSR_nzcv, FPSCR\n"; 2
| Pfsitos(r1, r2) ->
- fprintf oc " fmsr %a, %a\n" freg_single r1 ireg r2;
- fprintf oc " fsitos %a, %a\n" freg_single r1 freg_single r1; 2
+ fprintf oc " vmov %a, %a\n" freg_single r1 ireg r2;
+ fprintf oc " vcvt.f32.s32 %a, %a\n" freg_single r1 freg_single r1; 2
| Pfuitos(r1, r2) ->
- fprintf oc " fmsr %a, %a\n" freg_single r1 ireg r2;
- fprintf oc " fuitos %a, %a\n" freg_single r1 freg_single r1; 2
+ fprintf oc " vmov %a, %a\n" freg_single r1 ireg r2;
+ fprintf oc " vcvt.f32.u32 %a, %a\n" freg_single r1 freg_single r1; 2
| Pftosizs(r1, r2) ->
- fprintf oc " ftosizs %a, %a\n" freg_single FR6 freg_single r2;
- fprintf oc " fmrs %a, %a\n" ireg r1 freg_single FR6; 2
+ fprintf oc " vcvt.s32.f32 %a, %a\n" freg_single FR6 freg_single r2;
+ fprintf oc " vmov %a, %a\n" ireg r1 freg_single FR6; 2
| Pftouizs(r1, r2) ->
- fprintf oc " ftouizs %a, %a\n" freg_single FR6 freg_single r2;
- fprintf oc " fmrs %a, %a\n" ireg r1 freg_single FR6; 2
+ fprintf oc " vcvt.u32.f32 %a, %a\n" freg_single FR6 freg_single r2;
+ fprintf oc " vmov %a, %a\n" ireg r1 freg_single FR6; 2
| Pfcvtsd(r1, r2) ->
- fprintf oc " fcvtsd %a, %a\n" freg_single r1 freg r2; 1
+ fprintf oc " vcvt.f32.f64 %a, %a\n" freg_single r1 freg r2; 1
| Pfcvtds(r1, r2) ->
- fprintf oc " fcvtds %a, %a\n" freg r1 freg_single r2; 1
+ fprintf oc " vcvt.f64.f32 %a, %a\n" freg r1 freg_single r2; 1
| Pfldd(r1, r2, n) | Pfldd_a(r1, r2, n) ->
- fprintf oc " fldd %a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
+ fprintf oc " vldr %a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
| Pflds(r1, r2, n) ->
- fprintf oc " flds %a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1
+ fprintf oc " vldr %a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1
| Pfstd(r1, r2, n) | Pfstd_a(r1, r2, n) ->
- fprintf oc " fstd %a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
+ fprintf oc " vstr %a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1
| Pfsts(r1, r2, n) ->
- fprintf oc " fsts %a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1
+ fprintf oc " vstr %a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1
(* Pseudo-instructions *)
| Pallocframe(sz, ofs) ->
fprintf oc " mov r12, sp\n";
@@ -866,17 +972,32 @@ let print_instruction oc = function
| Plabel lbl ->
fprintf oc "%a:\n" print_label lbl; 0
| Ploadsymbol(r1, id, ofs) ->
- let lbl = label_symbol id ofs in
- fprintf oc " ldr %a, .L%d @ %a\n"
- ireg r1 lbl print_symb_ofs (id, ofs); 1
+ loadsymbol oc r1 id ofs
+ | Pmovite(cond, r1, ifso, ifnot) ->
+ fprintf oc " ite %s\n" (condition_name cond);
+ fprintf oc " mov%s %a, %a\n"
+ (condition_name cond) ireg r1 shift_op ifso;
+ fprintf oc " mov%s %a, %a\n"
+ (neg_condition_name cond) ireg r1 shift_op ifnot; 2
| Pbtbl(r, tbl) ->
- fprintf oc " mov r14, %a, lsl #2\n" ireg r;
- fprintf oc " ldr pc, [pc, r14]\n";
- fprintf oc " mov r0, r0\n"; (* no-op *)
- List.iter
- (fun l -> fprintf oc " .word %a\n" print_label l)
- tbl;
- 3 + List.length tbl
+ if !Clflags.option_fthumb then begin
+ let lbl = new_label() in
+ fprintf oc " adr r14, .L%d\n" lbl;
+ fprintf oc " add r14, r14, %a, lsl #2\n" ireg r;
+ fprintf oc " mov pc, r14\n";
+ fprintf oc ".L%d:\n" lbl;
+ List.iter
+ (fun l -> fprintf oc " b.w %a\n" print_label l)
+ tbl;
+ 3 + List.length tbl
+ end else begin
+ fprintf oc " add pc, pc, %a, lsl #2\n" ireg r;
+ fprintf oc " nop\n";
+ List.iter
+ (fun l -> fprintf oc " b %a\n" print_label l)
+ tbl;
+ 2 + List.length tbl
+ end
| Pbuiltin(ef, args, res) ->
begin match ef with
| EF_builtin(name, sg) ->
@@ -938,25 +1059,31 @@ let print_function oc name fn =
reset_constants();
current_function_sig := fn.fn_sig;
currpos := 0;
- let text =
+ let (text, lit) =
match C2C.atom_sections name with
- | t :: _ -> t
- | _ -> Section_text in
+ | t :: l :: _ -> (t, l)
+ | _ -> (Section_text, Section_literal) in
section oc text;
let alignment =
match !Clflags.option_falignfunctions with Some n -> n | None -> 4 in
fprintf oc " .balign %d\n" alignment;
if not (C2C.atom_is_static name) then
fprintf oc " .global %a\n" print_symb name;
+ if !Clflags.option_fthumb then
+ fprintf oc " .thumb_func\n";
fprintf oc "%a:\n" print_symb name;
print_location oc (C2C.atom_location name);
cfi_startproc oc;
ignore (fixup_arguments oc Incoming fn.fn_sig);
print_instructions oc fn.fn_code;
- emit_constants oc;
+ if !literals_in_code then emit_constants oc;
cfi_endproc oc;
fprintf oc " .type %a, %%function\n" print_symb name;
- fprintf oc " .size %a, . - %a\n" print_symb name print_symb name
+ fprintf oc " .size %a, . - %a\n" print_symb name print_symb name;
+ if not !literals_in_code && !size_constants > 0 then begin
+ section oc lit;
+ emit_constants oc
+ end
(* Data *)
@@ -1031,9 +1158,14 @@ let print_globdef oc (name, gdef) =
| Gvar v -> print_var oc name v
let print_program oc p =
-(* fprintf oc " .fpu vfp\n"; *)
PrintAnnot.print_version_and_options oc comment;
Hashtbl.clear filename_num;
+ fprintf oc " .syntax unified\n";
+ fprintf oc " .arch %s\n"
+ (if !Clflags.option_fthumb then "armv7" else "armv6");
+ fprintf oc " .fpu %s\n"
+ (if !vfpv3 then "vfpv3-d16" else "vfpv2");
+ fprintf oc " .%s\n" (if !Clflags.option_fthumb then "thumb" else "arm");
List.iter (print_globdef oc) p.prog_defs
diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp
index ad8a945..6102d82 100644
--- a/arm/SelectOp.vp
+++ b/arm/SelectOp.vp
@@ -454,7 +454,7 @@ Definition can_use_Aindexed2 (chunk: memory_chunk): bool :=
| Many64 => false
end.
-Definition can_use_Aindexed2shift (chunk: memory_chunk): bool :=
+Definition can_use_Aindexed2shift (chunk: memory_chunk) (s: shift): bool :=
match chunk with
| Mint8signed => false
| Mint8unsigned => true
@@ -466,14 +466,20 @@ Definition can_use_Aindexed2shift (chunk: memory_chunk): bool :=
| Mfloat64 => false
| Many32 => true
| Many64 => false
- end.
+ end &&
+ (if thumb tt then
+ match s with
+ | Slsl s => Int.ltu s (Int.repr 4)
+ | _ => false
+ end
+ else true).
Nondetfunction addressing (chunk: memory_chunk) (e: expr) :=
match e with
| Eop (Oaddrstack n) Enil => (Ainstack n, Enil)
| Eop (Oaddimm n) (e1:::Enil) => (Aindexed n, e1:::Enil)
| Eop (Oaddshift s) (e1:::e2:::Enil) =>
- if can_use_Aindexed2shift chunk
+ if can_use_Aindexed2shift chunk s
then (Aindexed2shift s, e1:::e2:::Enil)
else (Aindexed Int.zero, Eop (Oaddshift s) (e1:::e2:::Enil) ::: Enil)
| Eop Oadd (e1:::e2:::Enil) =>
diff --git a/driver/Clflags.ml b/driver/Clflags.ml
index 610d807..7104c32 100644
--- a/driver/Clflags.ml
+++ b/driver/Clflags.ml
@@ -28,6 +28,7 @@ let option_falignfunctions = ref (None: int option)
let option_falignbranchtargets = ref 0
let option_faligncondbranchs = ref 0
let option_finline_asm = ref false
+let option_fthumb = ref false
let option_Osize = ref false
let option_dparse = ref false
let option_dcmedium = ref false
diff --git a/driver/Compopts.v b/driver/Compopts.v
index 205f768..01109f5 100644
--- a/driver/Compopts.v
+++ b/driver/Compopts.v
@@ -30,4 +30,5 @@ Parameter va_strict: unit -> bool.
(** Flag -ftailcalls. For tail call optimization. *)
Parameter eliminate_tailcalls: unit -> bool.
-
+(** Flag -fthumb. For the ARM back-end. *)
+Parameter thumb: unit -> bool.
diff --git a/driver/Driver.ml b/driver/Driver.ml
index 88871f7..556a476 100644
--- a/driver/Driver.ml
+++ b/driver/Driver.ml
@@ -429,6 +429,7 @@ Code generation options: (use -fno-<opt> to turn off -f<opt>) :
-O Optimize for speed [on by default]
-Os Optimize for code size
-ffpu Use FP registers for some integer operations [on]
+ -fthumb (ARM only) Use Thumb2 instruction encoding
-fsmall-data <n> Set maximal size <n> for allocation in small data area
-fsmall-const <n> Set maximal size <n> for allocation in small constant area
-ffloat-const-prop <n> Control constant propagation of floats
@@ -552,6 +553,7 @@ let cmdline_actions =
@ f_opt "inline-asm" option_finline_asm
@ f_opt "fpu" option_ffpu
@ f_opt "sse" option_ffpu (* backward compatibility *)
+ @ f_opt "thumb" option_fthumb
let _ =
Gc.set { (Gc.get()) with
diff --git a/extraction/extraction.v b/extraction/extraction.v
index 0f23264..f0033af 100644
--- a/extraction/extraction.v
+++ b/extraction/extraction.v
@@ -93,6 +93,8 @@ Extract Constant Compopts.generate_float_constants =>
"fun _ -> !Clflags.option_ffloatconstprop >= 2".
Extract Constant Compopts.eliminate_tailcalls =>
"fun _ -> !Clflags.option_ftailcalls".
+Extract Constant Compopts.thumb =>
+ "fun _ -> !Clflags.option_fthumb".
(* Compiler *)
Extract Constant Compiler.print_Clight => "PrintClight.print_if".
diff --git a/runtime/arm/i64_dtos.S b/runtime/arm/i64_dtos.S
index e454fd4..638a5ff 100644
--- a/runtime/arm/i64_dtos.S
+++ b/runtime/arm/i64_dtos.S
@@ -34,66 +34,67 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Conversion from double float to signed 64-bit integer
- .global __i64_dtos
-__i64_dtos:
-#ifdef VARIANT_hardfloat
- fmrrd r0, r1, d0
+FUNCTION(__i64_dtos)
+#ifndef VARIANT_eabi
+ vmov r0, r1, d0
#endif
- mov r12, r1, asr #31 @ save sign of result in r12
+ ASR r12, r1, #31 @ save sign of result in r12
@ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
@ note: 1023 + 52 = 1075 = 1024 + 51
@ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
- mov r2, r1, lsl #1
- mov r2, r2, lsr #21
- sub r2, r2, #51
- sub r2, r2, #1024
+ LSL r2, r1, #1
+ LSR r2, r2, #21
+ SUB r2, r2, #51
+ SUB r2, r2, #1024
@ check range of exponent
cmn r2, #52 @ if EXP < -52, |double| is < 1.0
blt 1f
cmp r2, #11 @ if EXP >= 63 - 52, |double| is >= 2^63
bge 2f
@ extract true mantissa
- bic r1, r1, #0xFF000000
- bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
- orr r1, r1, #0x00100000 @ HI |= 0x00100000
+ BIC r1, r1, #0xFF000000
+ BIC r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+ ORR r1, r1, #0x00100000 @ HI |= 0x00100000
@ shift it appropriately
cmp r2, #0
blt 3f
@ EXP >= 0: shift left by EXP. Note that EXP < 12
rsb r3, r2, #32 @ r3 = 32 - amount
- mov r1, r1, lsl r2
- orr r1, r1, r0, lsr r3
- mov r0, r0, lsl r2
+ LSL r1, r1, r2
+ LSR r3, r0, r3
+ ORR r1, r1, r3
+ LSL r0, r0, r2
b 4f
@ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32
-3: rsb r2, r2, #0 @ r2 = -EXP - shift amount
- rsb r3, r2, #32 @ r3 = 32 - amount
- mov r0, r0, lsr r2
- orr r0, r0, r1, lsl r3
- sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s)
- orr r0, r0, r1, lsr r3
- mov r1, r1, lsr r2
+3: RSB r2, r2, #0 @ r2 = -EXP = shift amount
+ RSB r3, r2, #32 @ r3 = 32 - amount
+ LSR r0, r0, r2
+ LSL r3, r1, r3
+ ORR r0, r0, r3
+ SUB r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s)
+ LSR r3, r1, r3
+ ORR r0, r0, r3
+ LSR r1, r1, r2
@ apply sign to result
-4: eor r0, r0, r12
- eor r1, r1, r12
+4: EOR r0, r0, r12
+ EOR r1, r1, r12
subs r0, r0, r12
sbc r1, r1, r12
bx lr
@ special cases
-1: mov r0, #0 @ result is 0
- mov r1, #0
+1: MOV r0, #0 @ result is 0
+ MOV r1, #0
bx lr
-2: cmp r4, #0
+2: cmp r12, #0
blt 6f
mvn r0, #0 @ result is 0x7F....FF (MAX_SINT)
- mov r1, r0, lsr #1
+ LSR r1, r0, #1
bx lr
-6: mov r0, #0 @ result is 0x80....00 (MIN_SINT)
- mov r1, #0x80000000
+6: MOV r0, #0 @ result is 0x80....00 (MIN_SINT)
+ MOV r1, #0x80000000
bx lr
- .type __i64_dtos, %function
- .size __i64_dtos, . - __i64_dtos
+ENDFUNCTION(__i64_dtos)
diff --git a/runtime/arm/i64_dtou.S b/runtime/arm/i64_dtou.S
index a9e7b65..1c632d2 100644
--- a/runtime/arm/i64_dtou.S
+++ b/runtime/arm/i64_dtou.S
@@ -34,58 +34,58 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Conversion from double float to unsigned 64-bit integer
- .global __i64_dtou
-__i64_dtou:
-#ifdef VARIANT_hardfloat
- fmrrd r0, r1, d0
+FUNCTION(__i64_dtou)
+#ifndef VARIANT_eabi
+ vmov r0, r1, d0
#endif
cmp r1, #0 @ is double < 0 ?
blt 1f @ then it converts to 0
@ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
@ note: 1023 + 52 = 1075 = 1024 + 51
@ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
- mov r2, r1, lsl #1
- mov r2, r2, lsr #21
- sub r2, r2, #51
- sub r2, r2, #1024
+ LSL r2, r1, #1
+ LSR r2, r2, #21
+ SUB r2, r2, #51
+ SUB r2, r2, #1024
@ check range of exponent
cmn r2, #52 @ if EXP < -52, double is < 1.0
blt 1f
cmp r2, #12 @ if EXP >= 64 - 52, double is >= 2^64
bge 2f
@ extract true mantissa
- bic r1, r1, #0xFF000000
- bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
- orr r1, r1, #0x00100000 @ HI |= 0x00100000
+ BIC r1, r1, #0xFF000000
+ BIC r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+ ORR r1, r1, #0x00100000 @ HI |= 0x00100000
@ shift it appropriately
cmp r2, #0
blt 3f
@ EXP >= 0: shift left by EXP. Note that EXP < 12
rsb r3, r2, #32 @ r3 = 32 - amount
- mov r1, r1, lsl r2
- orr r1, r1, r0, lsr r3
- mov r0, r0, lsl r2
+ LSL r1, r1, r2
+ LSR r3, r0, r3
+ ORR r1, r1, r3
+ LSL r0, r0, r2
bx lr
@ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32
-3: rsb r2, r2, #0 @ r2 = -EXP - shift amount
- rsb r3, r2, #32 @ r3 = 32 - amount
- mov r0, r0, lsr r2
- orr r0, r0, r1, lsl r3
- sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s)
- orr r0, r0, r1, lsr r3
- mov r1, r1, lsr r2
+3: RSB r2, r2, #0 @ r2 = -EXP = shift amount
+ RSB r3, r2, #32 @ r3 = 32 - amount
+ LSR r0, r0, r2
+ LSL r3, r1, r3
+ ORR r0, r0, r3
+ SUB r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s)
+ LSR r3, r1, r3
+ ORR r0, r0, r3
+ LSR r1, r1, r2
bx lr
@ special cases
-1: mov r0, #0 @ result is 0
- mov r1, #0
+1: MOV r0, #0 @ result is 0
+ MOV r1, #0
bx lr
2: mvn r0, #0 @ result is 0xFF....FF (MAX_UINT)
- mvn r1, #0
+ MOV r1, r0
bx lr
- .type __i64_dtou, %function
- .size __i64_dtou, . - __i64_dtou
-
+ENDFUNCTION(__i64_dtou)
diff --git a/runtime/arm/i64_sar.s b/runtime/arm/i64_sar.S
index a96d092..1bbd8a7 100644
--- a/runtime/arm/i64_sar.s
+++ b/runtime/arm/i64_sar.S
@@ -34,24 +34,24 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Shift right signed
- .global __i64_sar
-__i64_sar:
- and r2, r2, #63 @ normalize amount to 0...63
+FUNCTION(__i64_sar)
+ AND r2, r2, #63 @ normalize amount to 0...63
rsbs r3, r2, #32 @ r3 = 32 - amount
ble 1f @ branch if <= 0, namely if amount >= 32
- mov r0, r0, lsr r2
- orr r0, r1, lsl r3
- mov r1, r1, asr r2
+ LSR r0, r0, r2
+ LSL r3, r1, r3
+ ORR r0, r1, r3
+ ASR r1, r1, r2
bx lr
1:
- sub r2, r2, #32
- mov r0, r1, asr r2
- mov r1, r1, asr #31
+ SUB r2, r2, #32
+ ASR r0, r1, r2
+ ASR r1, r1, #31
bx lr
- .type __i64_sar, %function
- .size __i64_sar, . - __i64_sar
+ENDFUNCTION(__i64_sar)
+
diff --git a/runtime/arm/i64_smod.s b/runtime/arm/i64_sdiv.S
index 8e766a0..dd88c12 100644
--- a/runtime/arm/i64_smod.s
+++ b/runtime/arm/i64_sdiv.S
@@ -34,31 +34,28 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
-@@@ Signed modulus
-
- .global __i64_smod
-__i64_smod:
+@@@ Signed division
+
+FUNCTION(__i64_sdiv)
push {r4, r5, r6, r7, r8, r10, lr}
- mov r10, r1 @ r10 = sign of result
- mov r4, r1, asr#31 @ take absolute value of N
- eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
- eor r1, r1, r4
+ ASR r4, r1, #31 @ r4 = sign of N
+ ASR r5, r3, #31 @ r5 = sign of D
+ EOR r10, r4, r5 @ r10 = sign of result
+ EOR r0, r0, r4 @ take absolute value of N
+ EOR r1, r1, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
subs r0, r0, r4
sbc r1, r1, r4
- mov r4, r3, asr #31 @ take absolute value of D
- eor r2, r2, r4
- eor r3, r3, r4
- subs r2, r2, r4
- sbc r3, r3, r4
+ EOR r2, r2, r5 @ take absolute value of D
+ EOR r3, r3, r5
+ subs r2, r2, r5
+ sbc r3, r3, r5
bl __i64_udivmod @ do unsigned division
- eor r0, r0, r10, asr#31 @ apply expected sign
- eor r1, r1, r10, asr#31
- subs r0, r0, r10, asr#31
- sbc r1, r1, r10, asr#31
+ EOR r0, r4, r10 @ apply expected sign
+ EOR r1, r5, r10
+ subs r0, r0, r10
+ sbc r1, r1, r10
pop {r4, r5, r6, r7, r8, r10, lr}
bx lr
- .type __i64_smod, %function
- .size __i64_smod, . - __i64_smod
-
+ENDFUNCTION(__i64_sdiv)
diff --git a/runtime/arm/i64_shl.s b/runtime/arm/i64_shl.S
index afd92db..66569d3 100644
--- a/runtime/arm/i64_shl.s
+++ b/runtime/arm/i64_shl.S
@@ -34,7 +34,7 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Shift left
@@ -47,26 +47,25 @@
@ If N = 0:
@ RH = XH | 0 | 0
@ RL = XL
-@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255
+@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 2s5 <= N-32 mod 256 <= 255
@ RH = (XH << N) | (XL >> (32-N) | 0
@ RL = XL << N
@ If N = 32:
@ RH = 0 | XL | 0
@ RL = 0
-@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31
+@ If 33 <= N <= 63: 225 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31
@ RH = 0 | 0 | (XL << (N-32))
@ RL = 0
- .global __i64_shl
-__i64_shl:
- and r2, r2, #63 @ normalize amount to 0...63
- rsb r3, r2, #32 @ r3 = 32 - amount
- mov r1, r1, lsl r2
- orr r1, r1, r0, lsr r3
- sub r3, r2, #32 @ r3 = amount - 32
- orr r1, r1, r0, lsl r3
- mov r0, r0, lsl r2
+FUNCTION(__i64_shl)
+ AND r2, r2, #63 @ normalize amount to 0...63
+ RSB r3, r2, #32 @ r3 = 32 - amount
+ LSL r1, r1, r2
+ LSR r3, r0, r3
+ ORR r1, r1, r3
+ SUB r3, r2, #32 @ r3 = amount - 32
+ LSL r3, r0, r3
+ ORR r1, r1, r3
+ LSL r0, r0, r2
bx lr
- .type __i64_shl, %function
- .size __i64_shl, . - __i64_shl
-
+ENDFUNCTION(__i64_shl)
diff --git a/runtime/arm/i64_shr.s b/runtime/arm/i64_shr.S
index 9d60441..a5418f4 100644
--- a/runtime/arm/i64_shr.s
+++ b/runtime/arm/i64_shr.S
@@ -34,7 +34,7 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Shift right unsigned
@@ -57,16 +57,15 @@
@ RL = 0 | 0 | (XH >> (N-32))
@ RH = 0
- .global __i64_shr
-__i64_shr:
- and r2, r2, #63 @ normalize amount to 0...63
- rsb r3, r2, #32 @ r3 = 32 - amount
- mov r0, r0, lsr r2
- orr r0, r0, r1, lsl r3
- sub r3, r2, #32 @ r3 = amount - 32
- orr r0, r0, r1, lsr r3
- mov r1, r1, lsr r2
+FUNCTION(__i64_shr)
+ AND r2, r2, #63 @ normalize amount to 0...63
+ RSB r3, r2, #32 @ r3 = 32 - amount
+ LSR r0, r0, r2
+ LSL r3, r1, r3
+ ORR r0, r0, r3
+ SUB r3, r2, #32 @ r3 = amount - 32
+ LSR r3, r1, r3
+ ORR r0, r0, r3
+ LSR r1, r1, r2
bx lr
- .type __i64_shr, %function
- .size __i64_shr, . - __i64_shr
-
+ENDFUNCTION(__i64_shr)
diff --git a/runtime/arm/i64_sdiv.s b/runtime/arm/i64_smod.S
index e96008d..b109ecc 100644
--- a/runtime/arm/i64_sdiv.s
+++ b/runtime/arm/i64_smod.S
@@ -34,33 +34,28 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
-@@@ Signed division
+@@@ Signed modulus
- .global __i64_sdiv
-__i64_sdiv:
+FUNCTION(__i64_smod)
push {r4, r5, r6, r7, r8, r10, lr}
- eor r10, r1, r3 @ r10 = sign of result
- mov r4, r1, asr #31 @ take absolute value of N
- eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
- eor r1, r1, r4
+ ASR r4, r1, #31 @ r4 = sign of N
+ ASR r5, r3, #31 @ r5 = sign of D
+ MOV r10, r4 @ r10 = sign of result
+ EOR r0, r0, r4 @ take absolute value of N
+ EOR r1, r1, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
subs r0, r0, r4
sbc r1, r1, r4
- mov r4, r3, asr #31 @ take absolute value of D
- eor r2, r2, r4
- eor r3, r3, r4
- subs r2, r2, r4
- sbc r3, r3, r4
+ EOR r2, r2, r5 @ take absolute value of D
+ EOR r3, r3, r5
+ subs r2, r2, r5
+ sbc r3, r3, r5
bl __i64_udivmod @ do unsigned division
- mov r0, r4
- mov r1, r5
- eor r0, r0, r10, asr#31 @ apply expected sign
- eor r1, r1, r10, asr#31
- subs r0, r0, r10, asr#31
- sbc r1, r1, r10, asr#31
+ EOR r0, r0, r10 @ apply expected sign
+ EOR r1, r1, r10
+ subs r0, r0, r10
+ sbc r1, r1, r10
pop {r4, r5, r6, r7, r8, r10, lr}
bx lr
- .type __i64_sdiv, %function
- .size __i64_sdiv, . - __i64_sdiv
-
+ENDFUNCTION(__i64_smod)
diff --git a/runtime/arm/i64_stod.S b/runtime/arm/i64_stod.S
index 7e5a06f..81e43e2 100644
--- a/runtime/arm/i64_stod.S
+++ b/runtime/arm/i64_stod.S
@@ -34,24 +34,23 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Conversion from signed 64-bit integer to double float
- .global __i64_stod
+FUNCTION(__i64_stod)
__i64_stod:
- fmsr s0, r0
- fuitod d0, s0 @ convert low half to double (unsigned)
- fmsr s2, r1
- fsitod d1, s2 @ convert high half to double (signed)
- fldd d2, .LC1 @ d2 = 2^32
- fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
+ vmov s0, r0
+ vcvt.f64.u32 d0, s0 @ convert low half to double (unsigned)
+ vmov s2, r1
+ vcvt.f64.s32 d1, s2 @ convert high half to double (signed)
+ vldr d2, .LC1 @ d2 = 2^32
+ vmla.f64 d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
#ifdef VARIANT_eabi
- fmrrd r0, r1, d0 @ return result in r0, r1
+ vmov r0, r1, d0 @ return result in r0, r1
#endif
bx lr
- .type __i64_stod, %function
- .size __i64_stod, . - __i64_stod
+ENDFUNCTION(__i64_stod)
.balign 8
.LC1: .quad 0x41f0000000000000 @ 2^32 in double precision
diff --git a/runtime/arm/i64_stof.S b/runtime/arm/i64_stof.S
index 3f33f04..f1051f5 100644
--- a/runtime/arm/i64_stof.S
+++ b/runtime/arm/i64_stof.S
@@ -34,15 +34,14 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Conversion from signed 64-bit integer to single float
- .global __i64_stof
-__i64_stof:
+FUNCTION(__i64_stof)
@ Check whether -2^53 <= X < 2^53
- mov r2, r1, asr #21
- mov r3, r1, asr #31 @ (r2,r3) = X >> 53
+ ASR r2, r1, #21
+ ASR r3, r1, #31 @ (r2,r3) = X >> 53
adds r2, r2, #1
adc r3, r3, #0 @ (r2,r3) = X >> 53 + 1
cmp r3, #2
@@ -50,30 +49,29 @@ __i64_stof:
@ X is large enough that double rounding can occur.
@ Avoid it by nudging X away from the points where double rounding
@ occurs (the "round to odd" technique)
- mov r2, #0x700
- orr r2, r2, #0xFF @ r2 = 0x7FF
- and r3, r0, r2 @ extract bits 0 to 11 of X
- add r3, r3, r2 @ r3 = (X & 0x7FF) + 0x7FF
+ MOV r2, #0x700
+ ORR r2, r2, #0xFF @ r2 = 0x7FF
+ AND r3, r0, r2 @ extract bits 0 to 11 of X
+ ADD r3, r3, r2 @ r3 = (X & 0x7FF) + 0x7FF
@ bit 12 of r3 is 0 if all low 12 bits of X are 0, 1 otherwise
@ bits 13-31 of r3 are 0
- orr r0, r0, r3 @ correct bit number 12 of X
- bic r0, r0, r2 @ set to 0 bits 0 to 11 of X
+ ORR r0, r0, r3 @ correct bit number 12 of X
+ BIC r0, r0, r2 @ set to 0 bits 0 to 11 of X
@ Convert to double
-1: fmsr s0, r0
- fuitod d0, s0 @ convert low half to double (unsigned)
- fmsr s2, r1
- fsitod d1, s2 @ convert high half to double (signed)
- fldd d2, .LC1 @ d2 = 2^32
- fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
+1: vmov s0, r0
+ vcvt.f64.u32 d0, s0 @ convert low half to double (unsigned)
+ vmov s2, r1
+ vcvt.f64.s32 d1, s2 @ convert high half to double (signed)
+ vldr d2, .LC1 @ d2 = 2^32
+ vmla.f64 d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
@ Round to single
- fcvtsd s0, d0
+ vcvt.f32.f64 s0, d0
#ifdef VARIANT_eabi
@ Return result in r0
- fmrs r0, s0
+ vmov r0, s0
#endif
bx lr
- .type __i64_stof, %function
- .size __i64_stof, . - __i64_stof
-
+ENDFUNCTION(__i64_stof)
+
.balign 8
.LC1: .quad 0x41f0000000000000 @ 2^32 in double precision
diff --git a/runtime/arm/i64_udiv.s b/runtime/arm/i64_udiv.S
index de41e00..3b59944 100644
--- a/runtime/arm/i64_udiv.s
+++ b/runtime/arm/i64_udiv.S
@@ -34,18 +34,15 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Unsigned division
- .global __i64_udiv
-__i64_udiv:
+FUNCTION(__i64_udiv)
push {r4, r5, r6, r7, r8, lr}
bl __i64_udivmod
- mov r0, r4
- mov r1, r5
+ MOV r0, r4
+ MOV r1, r5
pop {r4, r5, r6, r7, r8, lr}
bx lr
- .type __i64_udiv, %function
- .size __i64_udiv, . - __i64_udiv
-
+ENDFUNCTION(__i64_udiv)
diff --git a/runtime/arm/i64_udivmod.s b/runtime/arm/i64_udivmod.S
index e3d9c87..e5373ad 100644
--- a/runtime/arm/i64_udivmod.s
+++ b/runtime/arm/i64_udivmod.S
@@ -34,7 +34,7 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Auxiliary function for division and modulus. Don't call from C
@@ -42,14 +42,14 @@
@ On exit: Q = (r4, r5) quotient R = (r0, r1) remainder
@ Locals: M = (r6, r7) mask TMP = r8 temporary
- .global __i64_udivmod
-__i64_udivmod:
+FUNCTION(__i64_udivmod)
orrs r8, r2, r3 @ is D == 0?
+ it eq
bxeq lr @ if so, return with unspecified results
- mov r4, #0 @ Q = 0
- mov r5, #0
- mov r6, #1 @ M = 1
- mov r7, #0
+ MOV r4, #0 @ Q = 0
+ MOV r5, #0
+ MOV r6, #1 @ M = 1
+ MOV r7, #0
1: cmp r3, #0 @ while ((signed) D >= 0) ...
blt 2f
subs r8, r0, r2 @ ... and N >= D ...
@@ -69,12 +69,11 @@ __i64_udivmod:
adc r1, r1, r3 @ N = N + D
bic r4, r4, r6 @ Q = Q & ~M
bic r5, r5, r7
-3: movs r7, r7, lsr #1 @ M = M >> 1
- mov r6, r6, rrx
- movs r3, r3, lsr #1 @ D = D >> 1
- mov r2, r2, rrx
+3: lsrs r7, r7, #1 @ M = M >> 1
+ rrx r6, r6
+ lsrs r3, r3, #1 @ D = D >> 1
+ rrx r2, r2
orrs r8, r6, r7 @ repeat while (M != 0) ...
bne 2b
bx lr
- .type __i64_udivmod, %function
- .size __i64_udivmod, . - __i64_udivmod
+ENDFUNCTION(__i64_udivmod)
diff --git a/runtime/arm/i64_umod.s b/runtime/arm/i64_umod.S
index 80af56e..e59fd20 100644
--- a/runtime/arm/i64_umod.s
+++ b/runtime/arm/i64_umod.S
@@ -34,16 +34,13 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
-@@@ Unsigned modulus
+@@@ Unsigned remainder
- .global __i64_umod
-__i64_umod:
+FUNCTION(__i64_umod)
push {r4, r5, r6, r7, r8, lr}
bl __i64_udivmod @ remainder is already in r0,r1
pop {r4, r5, r6, r7, r8, lr}
bx lr
- .type __i64_umod, %function
- .size __i64_umod, . - __i64_umod
-
+ENDFUNCTION(__i64_umod)
diff --git a/runtime/arm/i64_utod.S b/runtime/arm/i64_utod.S
index 1110874..b12d7c2 100644
--- a/runtime/arm/i64_utod.S
+++ b/runtime/arm/i64_utod.S
@@ -34,25 +34,23 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Conversion from unsigned 64-bit integer to double float
- .global __i64_utod
-__i64_utod:
- fmsr s0, r0
- fuitod d0, s0 @ convert low half to double (unsigned)
- fmsr s2, r1
- fuitod d1, s2 @ convert high half to double (unsigned)
- fldd d2, .LC1 @ d2 = 2^32
- fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
+FUNCTION(__i64_utod)
+__i64_stod:
+ vmov s0, r0
+ vcvt.f64.u32 d0, s0 @ convert low half to double (unsigned)
+ vmov s2, r1
+ vcvt.f64.u32 d1, s2 @ convert high half to double (unsigned)
+ vldr d2, .LC1 @ d2 = 2^32
+ vmla.f64 d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
#ifdef VARIANT_eabi
- fmrrd r0, r1, d0 @ return result in r0, r1
-#endif
+ vmov r0, r1, d0 @ return result in r0, r1
+#endif
bx lr
- .type __i64_utod, %function
- .size __i64_utod, . - __i64_utod
-
+ENDFUNCTION(__i64_utod)
+
.balign 8
.LC1: .quad 0x41f0000000000000 @ 2^32 in double precision
-
diff --git a/runtime/arm/i64_utof.S b/runtime/arm/i64_utof.S
index a959076..711cda0 100644
--- a/runtime/arm/i64_utof.S
+++ b/runtime/arm/i64_utof.S
@@ -34,42 +34,40 @@
@ Helper functions for 64-bit integer arithmetic. ARM version.
- .text
+#include "sysdeps.h"
@@@ Conversion from unsigned 64-bit integer to single float
- .global __i64_utof
-__i64_utof:
+FUNCTION(__i64_utof)
@ Check whether X < 2^53
- movs r2, r1, lsr #21 @ test if X >> 53 == 0
+ lsrs r2, r1, #21 @ test if X >> 53 == 0
beq 1f
@ X is large enough that double rounding can occur.
@ Avoid it by nudging X away from the points where double rounding
@ occurs (the "round to odd" technique)
- mov r2, #0x700
- orr r2, r2, #0xFF @ r2 = 0x7FF
- and r3, r0, r2 @ extract bits 0 to 11 of X
- add r3, r3, r2 @ r3 = (X & 0x7FF) + 0x7FF
+ MOV r2, #0x700
+ ORR r2, r2, #0xFF @ r2 = 0x7FF
+ AND r3, r0, r2 @ extract bits 0 to 11 of X
+ ADD r3, r3, r2 @ r3 = (X & 0x7FF) + 0x7FF
@ bit 12 of r3 is 0 if all low 12 bits of X are 0, 1 otherwise
@ bits 13-31 of r3 are 0
- orr r0, r0, r3 @ correct bit number 12 of X
- bic r0, r0, r2 @ set to 0 bits 0 to 11 of X
+ ORR r0, r0, r3 @ correct bit number 12 of X
+ BIC r0, r0, r2 @ set to 0 bits 0 to 11 of X
@ Convert to double
-1: fmsr s0, r0
- fuitod d0, s0 @ convert low half to double (unsigned)
- fmsr s2, r1
- fuitod d1, s2 @ convert high half to double (unsigned)
- fldd d2, .LC1 @ d2 = 2^32
- fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
+1: vmov s0, r0
+ vcvt.f64.u32 d0, s0 @ convert low half to double (unsigned)
+ vmov s2, r1
+ vcvt.f64.u32 d1, s2 @ convert high half to double (unsigned)
+ vldr d2, .LC1 @ d2 = 2^32
+ vmla.f64 d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
@ Round to single
- fcvtsd s0, d0
+ vcvt.f32.f64 s0, d0
#ifdef VARIANT_eabi
@ Return result in r0
- fmrs r0, s0
+ vmov r0, s0
#endif
bx lr
- .type __i64_utof, %function
- .size __i64_utof, . - __i64_utof
+ENDFUNCTION(__i64_utof)
.balign 8
.LC1: .quad 0x41f0000000000000 @ 2^32 in double precision
diff --git a/runtime/arm/sysdeps.h b/runtime/arm/sysdeps.h
new file mode 100644
index 0000000..b72af0f
--- /dev/null
+++ b/runtime/arm/sysdeps.h
@@ -0,0 +1,84 @@
+// *****************************************************************
+//
+// The Compcert verified compiler
+//
+// Xavier Leroy, INRIA Paris-Rocquencourt
+//
+// Copyright (c) 2013 Institut National de Recherche en Informatique et
+// en Automatique.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of the <organization> nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+// HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// *********************************************************************
+
+// System dependencies
+
+#ifdef THUMB
+#define FUNCTION(f) \
+ .text; \
+ .thumb; \
+ .thumb_func; \
+ .global f; \
+f:
+#else
+#define FUNCTION(f) \
+ .text; \
+ .arm; \
+ .global f; \
+f:
+#endif
+
+#define ENDFUNCTION(f) \
+ .type f, %function; .size f, . - f
+
+// In Thumb2 mode, some instructions have shorter encodings in their "S" form
+// (update condition flags). For cases where the condition flags do not
+// matter, we use the following macros for these instructions.
+// In classic ARM mode, we prefer not to use the "S" form unless necessary.
+
+#ifdef THUMB
+#define THUMB_S(x) x##s
+#else
+#define THUMB_S(x) x
+#endif
+
+#define ADD THUMB_S(add)
+#define AND THUMB_S(and)
+#define ASR THUMB_S(asr)
+#define BIC THUMB_S(bic)
+#define EOR THUMB_S(eor)
+#define LSL THUMB_S(lsl)
+#define LSR THUMB_S(lsr)
+#define MOV THUMB_S(mov)
+#define ORR THUMB_S(orr)
+#define RSB THUMB_S(rsb)
+#define SUB THUMB_S(sub)
+
+ .syntax unified
+#ifdef THUMB
+ .arch armv7
+#else
+ .arch armv6
+#endif
+ .fpu vfpv2
diff --git a/runtime/arm/vararg.S b/runtime/arm/vararg.S
index 8d431d3..e352582 100644
--- a/runtime/arm/vararg.S
+++ b/runtime/arm/vararg.S
@@ -34,50 +34,44 @@
@ Helper functions for variadic functions <stdarg.h>. ARM version
- .text
+#include "sysdeps.h"
@ typedef void * va_list;
@ unsigned int __compcert_va_int32(va_list * ap);
@ unsigned long long __compcert_va_int64(va_list * ap);
@ double __compcert_va_float64(va_list * ap);
- .global __compcert_va_int32
-__compcert_va_int32:
+FUNCTION(__compcert_va_int32)
@ r0 = ap parameter
ldr r1, [r0, #0] @ r1 = pointer to next argument
- add r1, r1, #4 @ advance ap by 4
+ ADD r1, r1, #4 @ advance ap by 4
str r1, [r0, #0]
ldr r0, [r1, #-4] @ load next argument and return it in r0
bx lr
- .type __compcert_va_int32, %function
- .size __compcert_va_int32, . - __compcert_va_int32
+ENDFUNCTION(__compcert_va_int32)
- .global __compcert_va_int64
-__compcert_va_int64:
+FUNCTION(__compcert_va_int64)
@ r0 = ap parameter
ldr r1, [r0, #0] @ r1 = pointer to next argument
- add r1, r1, #15 @ 8-align and advance by 8
- bic r1, r1, #7
+ ADD r1, r1, #15 @ 8-align and advance by 8
+ BIC r1, r1, #7
str r1, [r0, #0] @ update ap
ldr r0, [r1, #-8] @ load next argument and return it in r0,r1
ldr r1, [r1, #-4]
bx lr
- .type __compcert_va_int64, %function
- .size __compcert_va_int64, . - __compcert_va_int64
+ENDFUNCTION(__compcert_va_int64)
- .global __compcert_va_float64
-__compcert_va_float64:
+FUNCTION(__compcert_va_float64)
@ r0 = ap parameter
ldr r1, [r0, #0] @ r1 = pointer to next argument
- add r1, r1, #15 @ 8-align and advance by 8
- bic r1, r1, #7
+ ADD r1, r1, #15 @ 8-align and advance by 8
+ BIC r1, r1, #7
str r1, [r0, #0] @ update ap
-#ifdef VARIANT_hardfloat
- fldd d0, [r1, #-8] @ load next argument and return it in d0
-#else
+#ifdef VARIANT_eabi
ldr r0, [r1, #-8] @ load next argument and return it in r0,r1
ldr r1, [r1, #-4]
+#else
+ vldr d0, [r1, #-8] @ load next argument and return it in d0
#endif
bx lr
- .type __compcert_va_float64, %function
- .size __compcert_va_float64, . - __compcert_va_float64
+ENDFUNCTION(__compcert_va_float64)
diff --git a/runtime/ia32/sysdeps.h b/runtime/ia32/sysdeps.h
index 1aa54d3..9d957a8 100644
--- a/runtime/ia32/sysdeps.h
+++ b/runtime/ia32/sysdeps.h
@@ -1,38 +1,38 @@
-# *****************************************************************
-#
-# The Compcert verified compiler
-#
-# Xavier Leroy, INRIA Paris-Rocquencourt
-#
-# Copyright (c) 2013 Institut National de Recherche en Informatique et
-# en Automatique.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of the <organization> nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
-# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *********************************************************************
+// *****************************************************************
+//
+// The Compcert verified compiler
+//
+// Xavier Leroy, INRIA Paris-Rocquencourt
+//
+// Copyright (c) 2013 Institut National de Recherche en Informatique et
+// en Automatique.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// * Neither the name of the <organization> nor the
+// names of its contributors may be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+// HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// *********************************************************************
-# System dependencies
+// System dependencies
#if defined(SYS_linux) || defined(SYS_bsd)