From f4e106d4fc1cce484678b5cdd86ab57d7a43076a Mon Sep 17 00:00:00 2001 From: xleroy Date: Sun, 27 Jul 2014 07:35:49 +0000 Subject: ARM port: add support for Thumb2. To be tested. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2549 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- arm/Archi.v | 4 + arm/Asm.v | 169 ++++++++-------- arm/Asmgen.v | 251 +++++++++++++++--------- arm/Asmgenproof.v | 35 ++-- arm/Asmgenproof1.v | 383 +++++++++++++++++++++++------------- arm/PrintAsm.ml | 486 +++++++++++++++++++++++++++++----------------- arm/SelectOp.vp | 12 +- driver/Clflags.ml | 1 + driver/Compopts.v | 3 +- driver/Driver.ml | 2 + extraction/extraction.v | 2 + runtime/arm/i64_dtos.S | 67 +++---- runtime/arm/i64_dtou.S | 56 +++--- runtime/arm/i64_sar.S | 57 ++++++ runtime/arm/i64_sar.s | 57 ------ runtime/arm/i64_sdiv.S | 61 ++++++ runtime/arm/i64_sdiv.s | 66 ------- runtime/arm/i64_shl.S | 71 +++++++ runtime/arm/i64_shl.s | 72 ------- runtime/arm/i64_shr.S | 71 +++++++ runtime/arm/i64_shr.s | 72 ------- runtime/arm/i64_smod.S | 61 ++++++ runtime/arm/i64_smod.s | 64 ------ runtime/arm/i64_stod.S | 21 +- runtime/arm/i64_stof.S | 42 ++-- runtime/arm/i64_udiv.S | 48 +++++ runtime/arm/i64_udiv.s | 51 ----- runtime/arm/i64_udivmod.S | 79 ++++++++ runtime/arm/i64_udivmod.s | 80 -------- runtime/arm/i64_umod.S | 46 +++++ runtime/arm/i64_umod.s | 49 ----- runtime/arm/i64_utod.S | 28 ++- runtime/arm/i64_utof.S | 38 ++-- runtime/arm/sysdeps.h | 84 ++++++++ runtime/arm/vararg.S | 36 ++-- runtime/ia32/sysdeps.h | 68 +++---- 36 files changed, 1594 insertions(+), 1199 deletions(-) create mode 100644 runtime/arm/i64_sar.S delete mode 100644 runtime/arm/i64_sar.s create mode 100644 runtime/arm/i64_sdiv.S delete mode 100644 runtime/arm/i64_sdiv.s create mode 100644 runtime/arm/i64_shl.S delete mode 100644 runtime/arm/i64_shl.s create mode 100644 runtime/arm/i64_shr.S delete mode 100644 runtime/arm/i64_shr.s create mode 100644 runtime/arm/i64_smod.S delete mode 100644 runtime/arm/i64_smod.s create mode 100644 runtime/arm/i64_udiv.S delete mode 100644 runtime/arm/i64_udiv.s create mode 100644 runtime/arm/i64_udivmod.S delete mode 100644 runtime/arm/i64_udivmod.s create mode 100644 runtime/arm/i64_umod.S delete mode 100644 runtime/arm/i64_umod.s create mode 100644 runtime/arm/sysdeps.h diff --git a/arm/Archi.v b/arm/Archi.v index 1306459..00d9895 100644 --- a/arm/Archi.v +++ b/arm/Archi.v @@ -47,5 +47,9 @@ Global Opaque big_endian default_pl_64 choose_binop_pl_64 default_pl_32 choose_binop_pl_32. +(** Which ABI to use: either the standard ARM EABI with floats passed + in integer registers, or the "hardfloat" variant of the EABI + that uses FP registers instead. *) + Inductive abi_kind := Softfloat | Hardfloat. Parameter abi: abi_kind. diff --git a/arm/Asm.v b/arm/Asm.v index f054db0..0790c6f 100644 --- a/arm/Asm.v +++ b/arm/Asm.v @@ -91,29 +91,19 @@ Notation "'RA'" := IR14 (only parsing). reference manuals for more details. Some instructions, described below, are pseudo-instructions: they expand to canned instruction sequences during the printing of the assembly - code. *) + code. Most instructions are common to Thumb2 and ARM classic. + We use a few Thumb2-specific instructions when available, and avoid + to use ARM classic features that are not in Thumb2. *) Definition label := positive. Inductive shift_op : Type := | SOimm: int -> shift_op | SOreg: ireg -> shift_op - | SOlslimm: ireg -> int -> shift_op - | SOlslreg: ireg -> ireg -> shift_op - | SOlsrimm: ireg -> int -> shift_op - | SOlsrreg: ireg -> ireg -> shift_op - | SOasrimm: ireg -> int -> shift_op - | SOasrreg: ireg -> ireg -> shift_op - | SOrorimm: ireg -> int -> shift_op - | SOrorreg: ireg -> ireg -> shift_op. - -Inductive shift_addr : Type := - | SAimm: int -> shift_addr - | SAreg: ireg -> shift_addr - | SAlsl: ireg -> int -> shift_addr - | SAlsr: ireg -> int -> shift_addr - | SAasr: ireg -> int -> shift_addr - | SAror: ireg -> int -> shift_addr. + | SOlsl: ireg -> int -> shift_op + | SOlsr: ireg -> int -> shift_op + | SOasr: ireg -> int -> shift_op + | SOror: ireg -> int -> shift_op. Inductive testcond : Type := | TCeq: testcond (**r equal *) @@ -133,6 +123,7 @@ Inductive instruction : Type := (* Core instructions *) | Padd: ireg -> ireg -> shift_op -> instruction (**r integer addition *) | Pand: ireg -> ireg -> shift_op -> instruction (**r bitwise and *) + | Pasr: ireg -> ireg -> ireg -> instruction (**r arithmetic shift right *) | Pb: label -> instruction (**r branch to label *) | Pbc: testcond -> label -> instruction (**r conditional branch to label *) | Pbsymb: ident -> signature -> instruction (**r branch to symbol *) @@ -142,23 +133,27 @@ Inductive instruction : Type := | Pbic: ireg -> ireg -> shift_op -> instruction (**r bitwise bit-clear *) | Pcmp: ireg -> shift_op -> instruction (**r integer comparison *) | Peor: ireg -> ireg -> shift_op -> instruction (**r bitwise exclusive or *) - | Pldr: ireg -> ireg -> shift_addr -> instruction (**r int32 load *) - | Pldr_a: ireg -> ireg -> shift_addr -> instruction (**r any32 load to int register *) - | Pldrb: ireg -> ireg -> shift_addr -> instruction (**r unsigned int8 load *) - | Pldrh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *) - | Pldrsb: ireg -> ireg -> shift_addr -> instruction (**r signed int8 load *) - | Pldrsh: ireg -> ireg -> shift_addr -> instruction (**r unsigned int16 load *) + | Pldr: ireg -> ireg -> shift_op -> instruction (**r int32 load *) + | Pldr_a: ireg -> ireg -> shift_op -> instruction (**r any32 load to int register *) + | Pldrb: ireg -> ireg -> shift_op -> instruction (**r unsigned int8 load *) + | Pldrh: ireg -> ireg -> shift_op -> instruction (**r unsigned int16 load *) + | Pldrsb: ireg -> ireg -> shift_op -> instruction (**r signed int8 load *) + | Pldrsh: ireg -> ireg -> shift_op -> instruction (**r unsigned int16 load *) + | Plsl: ireg -> ireg -> ireg -> instruction (**r shift left *) + | Plsr: ireg -> ireg -> ireg -> instruction (**r logical shift right *) | Pmla: ireg -> ireg -> ireg -> ireg -> instruction (**r integer multiply-add *) | Pmov: ireg -> shift_op -> instruction (**r integer move *) - | Pmovc: testcond -> ireg -> shift_op -> instruction (**r integer conditional move *) + | Pmovw: ireg -> int -> instruction (**r move 16-bit immediate *) + | Pmovt: ireg -> int -> instruction (**r set high 16 bits *) | Pmul: ireg -> ireg -> ireg -> instruction (**r integer multiplication *) | Pmvn: ireg -> shift_op -> instruction (**r integer complement *) | Porr: ireg -> ireg -> shift_op -> instruction (**r bitwise or *) | Prsb: ireg -> ireg -> shift_op -> instruction (**r integer reverse subtraction *) - | Pstr: ireg -> ireg -> shift_addr -> instruction (**r int32 store *) - | Pstr_a: ireg -> ireg -> shift_addr -> instruction (**r any32 store from int register *) - | Pstrb: ireg -> ireg -> shift_addr -> instruction (**r int8 store *) - | Pstrh: ireg -> ireg -> shift_addr -> instruction (**r int16 store *) + | Psbfx: ireg -> ireg -> int -> int -> instruction (**r signed bitfield extract *) + | Pstr: ireg -> ireg -> shift_op -> instruction (**r int32 store *) + | Pstr_a: ireg -> ireg -> shift_op -> instruction (**r any32 store from int register *) + | Pstrb: ireg -> ireg -> shift_op -> instruction (**r int8 store *) + | Pstrh: ireg -> ireg -> shift_op -> instruction (**r int16 store *) | Psdiv: instruction (**r signed division *) | Psmull: ireg -> ireg -> ireg -> ireg -> instruction (**r signed multiply long *) | Psub: ireg -> ireg -> shift_op -> instruction (**r integer subtraction *) @@ -206,6 +201,7 @@ Inductive instruction : Type := | Pfreeframe: Z -> int -> instruction (**r deallocate stack frame and restore previous frame *) | Plabel: label -> instruction (**r define a code label *) | Ploadsymbol: ireg -> ident -> int -> instruction (**r load the address of a symbol *) + | Pmovite: testcond -> ireg -> shift_op -> shift_op -> instruction (**r integer conditional move *) | Pbtbl: ireg -> list label -> instruction (**r N-way branch through a jump table *) | Pbuiltin: external_function -> list preg -> list preg -> instruction (**r built-in function *) | Pannot: external_function -> list annot_param -> instruction (**r annotation statement *) @@ -288,6 +284,11 @@ Fixpoint undef_regs (l: list preg) (rs: regset) : regset := | r :: l' => undef_regs l' (rs#r <- Vundef) end. +(** Undefining the condition codes *) + +Definition undef_flags (rs: regset) : regset := + fun r => match r with CR _ => Vundef | _ => rs r end. + (** Assigning multiple registers *) Fixpoint set_regs (rl: list preg) (vl: list val) (rs: regset) : regset := @@ -347,6 +348,9 @@ Inductive outcome: Type := Definition nextinstr (rs: regset) := rs#PC <- (Val.add rs#PC Vone). +Definition nextinstr_nf (rs: regset) := + nextinstr (undef_flags rs). + Definition goto_label (f: function) (lbl: label) (rs: regset) (m: mem) := match label_pos lbl 0 (fn_code f) with | None => Stuck @@ -363,29 +367,12 @@ Definition eval_shift_op (so: shift_op) (rs: regset) := match so with | SOimm n => Vint n | SOreg r => rs r - | SOlslimm r n => Val.shl (rs r) (Vint n) - | SOlslreg r r' => Val.shl (rs r) (rs r') - | SOlsrimm r n => Val.shru (rs r) (Vint n) - | SOlsrreg r r' => Val.shru (rs r) (rs r') - | SOasrimm r n => Val.shr (rs r) (Vint n) - | SOasrreg r r' => Val.shr (rs r) (rs r') - | SOrorimm r n => Val.ror (rs r) (Vint n) - | SOrorreg r r' => Val.ror (rs r) (rs r') - end. - -(** Evaluation of [shift_addr] operands *) - -Definition eval_shift_addr (sa: shift_addr) (rs: regset) := - match sa with - | SAimm n => Vint n - | SAreg r => rs r - | SAlsl r n => Val.shl (rs r) (Vint n) - | SAlsr r n => Val.shru (rs r) (Vint n) - | SAasr r n => Val.shr (rs r) (Vint n) - | SAror r n => Val.ror (rs r) (Vint n) + | SOlsl r n => Val.shl (rs r) (Vint n) + | SOlsr r n => Val.shru (rs r) (Vint n) + | SOasr r n => Val.shr (rs r) (Vint n) + | SOror r n => Val.ror (rs r) (Vint n) end. - (** Auxiliaries for memory accesses *) Definition exec_load (chunk: memory_chunk) (addr: val) (r: preg) @@ -515,22 +502,29 @@ Definition eval_testcond (c: testcond) (rs: regset) : option bool := (** Execution of a single instruction [i] in initial state [rs] and [m]. Return updated state. For instructions - that correspond to actual PowerPC instructions, the cases are + that correspond to actual ARM instructions, the cases are straightforward transliterations of the informal descriptions - given in the PowerPC reference manuals. For pseudo-instructions, - refer to the informal descriptions given above. Note that - we set to [Vundef] the registers used as temporaries by the - expansions of the pseudo-instructions, so that the PPC code - we generate cannot use those registers to hold values that - must survive the execution of the pseudo-instruction. + given in the ARM reference manuals. For pseudo-instructions, + refer to the informal descriptions given above. + + Note that we set to [Vundef] the registers used as temporaries by + the expansions of the pseudo-instructions, so that the ARM code we + generate cannot use those registers to hold values that must + survive the execution of the pseudo-instruction. + + Likewise, for several instructions we set the condition flags + to [Vundef], so that we can expand them later to the S form + or to the non-S form, whichever is more compact in Thumb2. *) Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : outcome := match i with | Padd r1 r2 so => - Next (nextinstr (rs#r1 <- (Val.add rs#r2 (eval_shift_op so rs)))) m + Next (nextinstr_nf (rs#r1 <- (Val.add rs#r2 (eval_shift_op so rs)))) m | Pand r1 r2 so => - Next (nextinstr (rs#r1 <- (Val.and rs#r2 (eval_shift_op so rs)))) m + Next (nextinstr_nf (rs#r1 <- (Val.and rs#r2 (eval_shift_op so rs)))) m + | Pasr r1 r2 r3 => + Next (nextinstr_nf (rs#r1 <- (Val.shr rs#r2 rs#r3))) m | Pb lbl => goto_label f lbl rs m | Pbc cond lbl => @@ -548,49 +542,52 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out | Pblreg r sg => Next (rs#IR14 <- (Val.add rs#PC Vone) #PC <- (rs#r)) m | Pbic r1 r2 so => - Next (nextinstr (rs#r1 <- (Val.and rs#r2 (Val.notint (eval_shift_op so rs))))) m + Next (nextinstr_nf (rs#r1 <- (Val.and rs#r2 (Val.notint (eval_shift_op so rs))))) m | Pcmp r1 so => Next (nextinstr (compare_int rs rs#r1 (eval_shift_op so rs) m)) m | Peor r1 r2 so => - Next (nextinstr (rs#r1 <- (Val.xor rs#r2 (eval_shift_op so rs)))) m + Next (nextinstr_nf (rs#r1 <- (Val.xor rs#r2 (eval_shift_op so rs)))) m | Pldr r1 r2 sa => - exec_load Mint32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_load Mint32 (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m | Pldr_a r1 r2 sa => - exec_load Many32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_load Many32 (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m | Pldrb r1 r2 sa => - exec_load Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_load Mint8unsigned (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m | Pldrh r1 r2 sa => - exec_load Mint16unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_load Mint16unsigned (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m | Pldrsb r1 r2 sa => - exec_load Mint8signed (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_load Mint8signed (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m | Pldrsh r1 r2 sa => - exec_load Mint16signed (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_load Mint16signed (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m + | Plsl r1 r2 r3 => + Next (nextinstr_nf (rs#r1 <- (Val.shl rs#r2 rs#r3))) m + | Plsr r1 r2 r3 => + Next (nextinstr_nf (rs#r1 <- (Val.shru rs#r2 rs#r3))) m | Pmla r1 r2 r3 r4 => Next (nextinstr (rs#r1 <- (Val.add (Val.mul rs#r2 rs#r3) rs#r4))) m | Pmov r1 so => - Next (nextinstr (rs#r1 <- (eval_shift_op so rs))) m - | Pmovc cond r1 so => - match eval_testcond cond rs with - | Some true => Next (nextinstr (rs#r1 <- (eval_shift_op so rs))) m - | Some false => Next (nextinstr rs) m - | None => Next (nextinstr (rs#r1 <- Vundef)) m - end + Next (nextinstr_nf (rs#r1 <- (eval_shift_op so rs))) m + | Pmovw r n => + Next (nextinstr (rs#r <- (Vint n))) m + | Pmovt r n => + Next (nextinstr (rs#r <- (Val.or (Val.and rs#r (Vint (Int.repr 65535))) + (Vint (Int.shl n (Int.repr 16)))))) m | Pmul r1 r2 r3 => Next (nextinstr (rs#r1 <- (Val.mul rs#r2 rs#r3))) m | Pmvn r1 so => - Next (nextinstr (rs#r1 <- (Val.notint (eval_shift_op so rs)))) m + Next (nextinstr_nf (rs#r1 <- (Val.notint (eval_shift_op so rs)))) m | Porr r1 r2 so => - Next (nextinstr (rs#r1 <- (Val.or rs#r2 (eval_shift_op so rs)))) m + Next (nextinstr_nf (rs#r1 <- (Val.or rs#r2 (eval_shift_op so rs)))) m | Prsb r1 r2 so => - Next (nextinstr (rs#r1 <- (Val.sub (eval_shift_op so rs) rs#r2))) m + Next (nextinstr_nf (rs#r1 <- (Val.sub (eval_shift_op so rs) rs#r2))) m | Pstr r1 r2 sa => - exec_store Mint32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_store Mint32 (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m | Pstr_a r1 r2 sa => - exec_store Many32 (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_store Many32 (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m | Pstrb r1 r2 sa => - exec_store Mint8unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_store Mint8unsigned (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m | Pstrh r1 r2 sa => - exec_store Mint16unsigned (Val.add rs#r2 (eval_shift_addr sa rs)) r1 rs m + exec_store Mint16unsigned (Val.add rs#r2 (eval_shift_op sa rs)) r1 rs m | Psdiv => match Val.divs rs#IR0 rs#IR1 with | Some v => Next (nextinstr (rs#IR0 <- v @@ -598,11 +595,13 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out #IR3 <- Vundef #IR12 <- Vundef)) m | None => Stuck end + | Psbfx r1 r2 lsb sz => + Next (nextinstr (rs#r1 <- (Val.sign_ext (Int.unsigned sz) (Val.shru rs#r2 (Vint lsb))))) m | Psmull rdl rdh r1 r2 => Next (nextinstr (rs#rdl <- (Val.mul rs#r1 rs#r2) #rdh <- (Val.mulhs rs#r1 rs#r2))) m | Psub r1 r2 so => - Next (nextinstr (rs#r1 <- (Val.sub rs#r2 (eval_shift_op so rs)))) m + Next (nextinstr_nf (rs#r1 <- (Val.sub rs#r2 (eval_shift_op so rs)))) m | Pudiv => match Val.divu rs#IR0 rs#IR1 with | Some v => Next (nextinstr (rs#IR0 <- v @@ -709,6 +708,14 @@ Definition exec_instr (f: function) (i: instruction) (rs: regset) (m: mem) : out Next (nextinstr rs) m | Ploadsymbol r1 lbl ofs => Next (nextinstr (rs#r1 <- (Genv.symbol_address ge lbl ofs))) m + | Pmovite cond r1 ifso ifnot => + let v := + match eval_testcond cond rs with + | Some true => eval_shift_op ifso rs + | Some false => eval_shift_op ifnot rs + | None => Vundef + end in + Next (nextinstr (rs#r1 <- v)) m | Pbtbl r tbl => match rs#r with | Vint n => diff --git a/arm/Asmgen.v b/arm/Asmgen.v index fa4faa6..de4b87f 100644 --- a/arm/Asmgen.v +++ b/arm/Asmgen.v @@ -21,6 +21,7 @@ Require Import Op. Require Import Locations. Require Import Mach. Require Import Asm. +Require Import Compopts. Open Local Scope string_scope. Open Local Scope error_monad_scope. @@ -33,62 +34,120 @@ Definition ireg_of (r: mreg) : res ireg := Definition freg_of (r: mreg) : res freg := match preg_of r with FR mr => OK mr | _ => Error(msg "Asmgen.freg_of") end. -(** Recognition of integer immediate arguments. -- For arithmetic operations, immediates are - 8-bit quantities zero-extended and rotated right by 0, 2, 4, ... 30 bits. -- For memory accesses of type [Mint32], immediate offsets are - 12-bit quantities plus a sign bit. -- For other memory accesses, immediate offsets are - 8-bit quantities plus a sign bit. *) - -Fixpoint is_immed_arith_aux (n: nat) (x msk: int) {struct n}: bool := +(** Recognition of integer immediate arguments for arithmetic operations. +- ARM: immediates are 8-bit quantities zero-extended and rotated right + by 0, 2, 4, ... 30 bits. In other words, [n] is an immediate iff + [rotate-left(n, p)] is between 0 and 255 for some [p = 0, 2, 4, ..., 30]. +- Thumb: immediates are 8-bit quantities zero-extended and shifted left + by 0, 1, ..., 31 bits. In other words, [n] is an immediate if + all bits are 0 except a run of 8 adjacent bits. In addition, + [00XY00XY] and [XY00XY00] and [XYXYXYXY] are immediates for + a given [XY] 8-bit constant. +*) + +Fixpoint is_immed_arith_arm (n: nat) (x: int) {struct n}: bool := match n with | Datatypes.O => false - | Datatypes.S n' => - Int.eq (Int.and x (Int.not msk)) Int.zero || - is_immed_arith_aux n' x (Int.ror msk (Int.repr 2)) + | Datatypes.S n => + Int.eq x (Int.and x (Int.repr 255)) || + is_immed_arith_arm n (Int.rol x (Int.repr 2)) + end. + +Fixpoint is_immed_arith_thumb (n: nat) (x: int) {struct n}: bool := + match n with + | Datatypes.O => true + | Datatypes.S n => + Int.eq x (Int.and x (Int.repr 255)) || + (Int.eq (Int.and x Int.one) Int.zero + && is_immed_arith_thumb n (Int.shru x Int.one)) end. -Definition is_immed_arith (x: int) : bool := - is_immed_arith_aux 16%nat x (Int.repr 255). - -Definition is_immed_mem_word (x: int) : bool := - Int.lt x (Int.repr 4096) && Int.lt (Int.repr (-4096)) x. - -Definition mk_immed_mem_word (x: int) : int := - Int.sign_ext 13 x. - -Definition is_immed_mem_small (x: int) : bool := - Int.lt x (Int.repr 256) && Int.lt (Int.repr (-256)) x. - -Definition mk_immed_mem_small (x: int) : int := - Int.sign_ext 9 x. - -Definition is_immed_mem_float (x: int) : bool := - Int.eq (Int.and x (Int.repr 3)) Int.zero - && Int.lt x (Int.repr 1024) && Int.lt (Int.repr (-1024)) x. - -Definition mk_immed_mem_float (x: int) : int := - Int.and (Int.sign_ext 11 x) (Int.repr 4294967288). (**r 0xfffffff8 *) - +Definition is_immed_arith_thumb_special (x: int): bool := + let l1 := Int.and x (Int.repr 255) in + let l2 := Int.shl l1 (Int.repr 8) in + let l3 := Int.shl l2 (Int.repr 8) in + let l4 := Int.shl l3 (Int.repr 8) in + let l13 := Int.or l1 l3 in + let l24 := Int.or l2 l4 in + Int.eq x l13 || Int.eq x l24 || Int.eq x (Int.or l13 l24). + +Definition is_immed_arith (x: int): bool := + if thumb tt + then is_immed_arith_thumb 24%nat x || is_immed_arith_thumb_special x + else is_immed_arith_arm 16%nat x. + +(** Recognition of integer immediate arguments for indexed memory accesses. +- For 32-bit integers, immediate offsets are [(-2^12,2^12)] for ARM classic + and [(-2^8,2^12)] for Thumb2. +- For 8- and 16-bit integers, immediate offsets are [(-2^8,2^8)]. +- For 32- and 64-bit integers, immediate offsets are multiples of 4 + in [(-2^10,2^10)]. + +For all 3 kinds of accesses, we provide not a recognizer but a synthesizer: +a function taking an arbitrary offset [n] and returning a valid offset [n'] +that contains as many useful bits of [n] as possible, so that the +computation of the remainder [n - n'] is as simple as possible. +In particular, if [n] is a representable immediate argument, we should have +[n' = n]. +*) + +Definition mk_immed_mem_word (x: int): int := + if Int.ltu x Int.zero then + Int.neg (Int.zero_ext (if thumb tt then 8 else 12) (Int.neg x)) + else + Int.zero_ext 12 x. + +Definition mk_immed_mem_small (x: int): int := + if Int.ltu x Int.zero then + Int.neg (Int.zero_ext 8 (Int.neg x)) + else + Int.zero_ext 8 x. + +Definition mk_immed_mem_float (x: int): int := + let x := Int.and x (Int.repr (-4)) in (**r mask low 2 bits off *) + if Int.ltu x Int.zero then + Int.neg (Int.zero_ext 10 (Int.neg x)) + else + Int.zero_ext 10 x. + (** Decomposition of a 32-bit integer into a list of immediate arguments, whose sum or "or" or "xor" equals the integer. *) -Fixpoint decompose_int_rec (N: nat) (n p: int) : list int := +Fixpoint decompose_int_arm (N: nat) (n p: int) : list int := match N with | Datatypes.O => if Int.eq n Int.zero then nil else n :: nil | Datatypes.S M => if Int.eq (Int.and n (Int.shl (Int.repr 3) p)) Int.zero then - decompose_int_rec M n (Int.add p (Int.repr 2)) + decompose_int_arm M n (Int.add p (Int.repr 2)) + else + let m := Int.shl (Int.repr 255) p in + Int.and n m :: + decompose_int_arm M (Int.and n (Int.not m)) (Int.add p (Int.repr 2)) + end. + +Fixpoint decompose_int_thumb (N: nat) (n p: int) : list int := + match N with + | Datatypes.O => + if Int.eq n Int.zero then nil else n :: nil + | Datatypes.S M => + if Int.eq (Int.and n (Int.shl Int.one p)) Int.zero then + decompose_int_thumb M n (Int.add p Int.one) else let m := Int.shl (Int.repr 255) p in Int.and n m :: - decompose_int_rec M (Int.and n (Int.not m)) (Int.add p (Int.repr 2)) + decompose_int_thumb M (Int.and n (Int.not m)) (Int.add p Int.one) end. +Definition decompose_int_base (n: int): list int := + if thumb tt + then if is_immed_arith_thumb_special n + then n :: nil + else decompose_int_thumb 24%nat n Int.zero + else decompose_int_arm 12%nat n Int.zero. + Definition decompose_int (n: int) : list int := - match decompose_int_rec 12%nat n Int.zero with + match decompose_int_base n with | nil => Int.zero :: nil | l => l end. @@ -103,28 +162,46 @@ Definition iterate_op (op1 op2: shift_op -> instruction) (l: list int) (k: code) (** Smart constructors for integer immediate arguments. *) +Definition loadimm_thumb (r: ireg) (n: int) (k: code) := + let hi := Int.shru n (Int.repr 16) in + if Int.eq hi Int.zero + then Pmovw r n :: k + else Pmovw r (Int.zero_ext 16 n) :: Pmovt r hi :: k. + Definition loadimm (r: ireg) (n: int) (k: code) := let d1 := decompose_int n in let d2 := decompose_int (Int.not n) in - if NPeano.leb (List.length d1) (List.length d2) - then iterate_op (Pmov r) (Porr r r) d1 k - else iterate_op (Pmvn r) (Pbic r r) d2 k. + let l1 := List.length d1 in + let l2 := List.length d2 in + if NPeano.leb l1 1%nat then + Pmov r (SOimm n) :: k + else if NPeano.leb l2 1%nat then + Pmvn r (SOimm (Int.not n)) :: k + else if thumb tt then + loadimm_thumb r n k + else if NPeano.leb l1 l2 then + iterate_op (Pmov r) (Porr r r) d1 k + else + iterate_op (Pmvn r) (Pbic r r) d2 k. Definition addimm (r1 r2: ireg) (n: int) (k: code) := - let d1 := decompose_int n in - let d2 := decompose_int (Int.neg n) in - if NPeano.leb (List.length d1) (List.length d2) - then iterate_op (Padd r1 r2) (Padd r1 r1) d1 k - else iterate_op (Psub r1 r2) (Psub r1 r1) d2 k. + if Int.ltu (Int.repr (-256)) n then + Psub r1 r2 (SOimm (Int.neg n)) :: k + else + (let d1 := decompose_int n in + let d2 := decompose_int (Int.neg n) in + if NPeano.leb (List.length d1) (List.length d2) + then iterate_op (Padd r1 r2) (Padd r1 r1) d1 k + else iterate_op (Psub r1 r2) (Psub r1 r1) d2 k). + +Definition rsubimm (r1 r2: ireg) (n: int) (k: code) := + iterate_op (Prsb r1 r2) (Padd r1 r1) (decompose_int n) k. Definition andimm (r1 r2: ireg) (n: int) (k: code) := if is_immed_arith n then Pand r1 r2 (SOimm n) :: k else iterate_op (Pbic r1 r2) (Pbic r1 r1) (decompose_int (Int.not n)) k. -Definition rsubimm (r1 r2: ireg) (n: int) (k: code) := - iterate_op (Prsb r1 r2) (Padd r1 r1) (decompose_int n) k. - Definition orimm (r1 r2: ireg) (n: int) (k: code) := iterate_op (Porr r1 r2) (Porr r1 r1) (decompose_int n) k. @@ -135,10 +212,10 @@ Definition xorimm (r1 r2: ireg) (n: int) (k: code) := Definition transl_shift (s: shift) (r: ireg) : shift_op := match s with - | Slsl n => SOlslimm r (s_amount n) - | Slsr n => SOlsrimm r (s_amount n) - | Sasr n => SOasrimm r (s_amount n) - | Sror n => SOrorimm r (s_amount n) + | Slsl n => SOlsl r (s_amount n) + | Slsr n => SOlsr r (s_amount n) + | Sasr n => SOasr r (s_amount n) + | Sror n => SOror r (s_amount n) end. (** Translation of a condition. Prepends to [k] the instructions @@ -288,12 +365,18 @@ Definition transl_op OK (addimm r IR13 n k) | Ocast8signed, a1 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; - OK (Pmov r (SOlslimm r1 (Int.repr 24)) :: - Pmov r (SOasrimm r (Int.repr 24)) :: k) + OK (if thumb tt then + Psbfx r r1 Int.zero (Int.repr 8) :: k + else + Pmov r (SOlsl r1 (Int.repr 24)) :: + Pmov r (SOasr r (Int.repr 24)) :: k) | Ocast16signed, a1 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; - OK (Pmov r (SOlslimm r1 (Int.repr 16)) :: - Pmov r (SOasrimm r (Int.repr 16)) :: k) + OK (if thumb tt then + Psbfx r r1 Int.zero (Int.repr 16) :: k + else + Pmov r (SOlsl r1 (Int.repr 16)) :: + Pmov r (SOasr r (Int.repr 16)) :: k) | Oadd, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Padd r r1 (SOreg r2) :: k) @@ -317,15 +400,11 @@ Definition transl_op OK (rsubimm r r1 n k) | Omul, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (if negb (ireg_eq r r1) then Pmul r r1 r2 :: k - else if negb (ireg_eq r r2) then Pmul r r2 r1 :: k - else Pmul IR14 r1 r2 :: Pmov r (SOreg IR14) :: k) + OK (Pmul r r1 r2 :: k) | Omla, a1 :: a2 :: a3 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; do r3 <- ireg_of a3; - OK (if negb (ireg_eq r r1) then Pmla r r1 r2 r3 :: k - else if negb (ireg_eq r r2) then Pmla r r2 r1 r3 :: k - else Pmla IR14 r1 r2 r3 :: Pmov r (SOreg IR14) :: k) + OK (Pmla r r1 r2 r3 :: k) | Omulhs, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; OK (Psmull IR14 r r1 r2 :: k) @@ -383,13 +462,13 @@ Definition transl_op OK (Pmvn r (transl_shift s r1) :: k) | Oshl, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pmov r (SOlslreg r1 r2) :: k) + OK (Plsl r r1 r2 :: k) | Oshr, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pmov r (SOasrreg r1 r2) :: k) + OK (Pasr r r1 r2 :: k) | Oshru, a1 :: a2 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (Pmov r (SOlsrreg r1 r2) :: k) + OK (Plsr r r1 r2 :: k) | Oshift s, a1 :: nil => do r <- ireg_of res; do r1 <- ireg_of a1; OK (Pmov r (transl_shift s r1) :: k) @@ -398,9 +477,9 @@ Definition transl_op if Int.eq n Int.zero then OK (Pmov r (SOreg r1) :: k) else - OK (Pmov IR14 (SOasrimm r1 (Int.repr 31)) :: - Padd IR14 r1 (SOlsrimm IR14 (Int.sub Int.iwordsize n)) :: - Pmov r (SOasrimm IR14 n) :: k) + OK (Pmov IR14 (SOasr r1 (Int.repr 31)) :: + Padd IR14 r1 (SOlsr IR14 (Int.sub Int.iwordsize n)) :: + Pmov r (SOasr IR14 n) :: k) | Onegf, a1 :: nil => do r <- freg_of res; do r1 <- freg_of a1; OK (Pfnegd r r1 :: k) @@ -470,9 +549,7 @@ Definition transl_op | Ocmp cmp, _ => do r <- ireg_of res; transl_cond cmp args - (Pmov r (SOimm Int.zero) :: - Pmovc (cond_for_cond cmp) r (SOimm Int.one) :: - k) + (Pmovite (cond_for_cond cmp) r (SOimm Int.one) (SOimm Int.zero) :: k) | _, _ => Error(msg "Asmgen.transl_op") end. @@ -489,14 +566,14 @@ Definition indexed_memory_access else addimm IR14 base (Int.sub n n1) (mk_instr IR14 n1 :: k). Definition loadind_int (base: ireg) (ofs: int) (dst: ireg) (k: code) := - indexed_memory_access (fun base n => Pldr dst base (SAimm n)) mk_immed_mem_word base ofs k. + indexed_memory_access (fun base n => Pldr dst base (SOimm n)) mk_immed_mem_word base ofs k. Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) := match ty, preg_of dst with | Tint, IR r => - OK (indexed_memory_access (fun base n => Pldr r base (SAimm n)) mk_immed_mem_word base ofs k) + OK (indexed_memory_access (fun base n => Pldr r base (SOimm n)) mk_immed_mem_word base ofs k) | Tany32, IR r => - OK (indexed_memory_access (fun base n => Pldr_a r base (SAimm n)) mk_immed_mem_word base ofs k) + OK (indexed_memory_access (fun base n => Pldr_a r base (SOimm n)) mk_immed_mem_word base ofs k) | Tsingle, FR r => OK (indexed_memory_access (Pflds r) mk_immed_mem_float base ofs k) | Tfloat, FR r => @@ -510,9 +587,9 @@ Definition loadind (base: ireg) (ofs: int) (ty: typ) (dst: mreg) (k: code) := Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := match ty, preg_of src with | Tint, IR r => - OK (indexed_memory_access (fun base n => Pstr r base (SAimm n)) mk_immed_mem_word base ofs k) + OK (indexed_memory_access (fun base n => Pstr r base (SOimm n)) mk_immed_mem_word base ofs k) | Tany32, IR r => - OK (indexed_memory_access (fun base n => Pstr_a r base (SAimm n)) mk_immed_mem_word base ofs k) + OK (indexed_memory_access (fun base n => Pstr_a r base (SOimm n)) mk_immed_mem_word base ofs k) | Tsingle, FR r => OK (indexed_memory_access (Pfsts r) mk_immed_mem_float base ofs k) | Tfloat, FR r => @@ -525,17 +602,9 @@ Definition storeind (src: mreg) (base: ireg) (ofs: int) (ty: typ) (k: code) := (** Translation of memory accesses *) -Definition transl_shift_addr (s: shift) (r: ireg) : shift_addr := - match s with - | Slsl n => SAlsl r (s_amount n) - | Slsr n => SAlsr r (s_amount n) - | Sasr n => SAasr r (s_amount n) - | Sror n => SAror r (s_amount n) - end. - Definition transl_memory_access (mk_instr_imm: ireg -> int -> instruction) - (mk_instr_gen: option (ireg -> shift_addr -> instruction)) + (mk_instr_gen: option (ireg -> shift_op -> instruction)) (mk_immed: int -> int) (addr: addressing) (args: list mreg) (k: code) := match addr, args with @@ -546,7 +615,7 @@ Definition transl_memory_access match mk_instr_gen with | Some f => do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (f r1 (SAreg r2) :: k) + OK (f r1 (SOreg r2) :: k) | None => Error (msg "Asmgen.Aindexed2") end @@ -554,7 +623,7 @@ Definition transl_memory_access match mk_instr_gen with | Some f => do r1 <- ireg_of a1; do r2 <- ireg_of a2; - OK (f r1 (transl_shift_addr s r2) :: k) + OK (f r1 (transl_shift s r2) :: k) | None => Error (msg "Asmgen.Aindexed2shift") end @@ -565,12 +634,12 @@ Definition transl_memory_access end. Definition transl_memory_access_int - (mk_instr: ireg -> ireg -> shift_addr -> instruction) + (mk_instr: ireg -> ireg -> shift_op -> instruction) (mk_immed: int -> int) (dst: mreg) (addr: addressing) (args: list mreg) (k: code) := do rd <- ireg_of dst; transl_memory_access - (fun r n => mk_instr rd r (SAimm n)) + (fun r n => mk_instr rd r (SOimm n)) (Some (mk_instr rd)) mk_immed addr args k. @@ -729,7 +798,7 @@ Definition transl_function (f: Mach.function) := do c <- transl_code f f.(Mach.fn_code) true; OK (mkfunction f.(Mach.fn_sig) (Pallocframe f.(fn_stacksize) f.(fn_link_ofs) :: - Pstr IR14 IR13 (SAimm f.(fn_retaddr_ofs)) :: c)). + Pstr IR14 IR13 (SOimm f.(fn_retaddr_ofs)) :: c)). Definition transf_function (f: Mach.function) : res Asm.function := do tf <- transl_function f; diff --git a/arm/Asmgenproof.v b/arm/Asmgenproof.v index 341f6a0..713e012 100644 --- a/arm/Asmgenproof.v +++ b/arm/Asmgenproof.v @@ -27,6 +27,7 @@ Require Import Op. Require Import Locations. Require Import Conventions. Require Import Mach. +Require Import Compopts. Require Import Asm. Require Import Asmgen. Require Import Asmgenproof0. @@ -179,8 +180,13 @@ Remark loadimm_label: forall r n k, tail_nolabel k (loadimm r n k). Proof. intros. unfold loadimm. - destruct (NPeano.leb (length (decompose_int n)) (length (decompose_int (Int.not n)))); - auto with labels. + set (l1 := length (decompose_int n)). + set (l2 := length (decompose_int (Int.not n))). + destruct (NPeano.leb l1 1%nat). TailNoLabel. + destruct (NPeano.leb l2 1%nat). TailNoLabel. + destruct (thumb tt). unfold loadimm_thumb. + destruct (Int.eq (Int.shru n (Int.repr 16)) Int.zero); TailNoLabel. + destruct (NPeano.leb l1 l2); auto with labels. Qed. Hint Resolve loadimm_label: labels. @@ -188,6 +194,7 @@ Remark addimm_label: forall r1 r2 n k, tail_nolabel k (addimm r1 r2 n k). Proof. intros; unfold addimm. + destruct (Int.ltu (Int.repr (-256)) n). TailNoLabel. destruct (NPeano.leb (length (decompose_int n)) (length (decompose_int (Int.neg n)))); auto with labels. Qed. @@ -262,15 +269,15 @@ Proof. Opaque Int.eq. unfold transl_op; intros; destruct op; TailNoLabel. destruct (preg_of r); try discriminate; destruct (preg_of m); inv H; TailNoLabel. - destruct (negb (ireg_eq x x0)). TailNoLabel. destruct (negb (ireg_eq x x1)); TailNoLabel. - destruct (negb (ireg_eq x x0)). TailNoLabel. destruct (negb (ireg_eq x x1)); TailNoLabel. + destruct (thumb tt); TailNoLabel. + destruct (thumb tt); TailNoLabel. eapply tail_nolabel_trans; TailNoLabel. eapply tail_nolabel_trans. eapply transl_cond_label; eauto. TailNoLabel. Qed. Remark transl_memory_access_label: forall (mk_instr_imm: ireg -> int -> instruction) - (mk_instr_gen: option (ireg -> shift_addr -> instruction)) + (mk_instr_gen: option (ireg -> shift_op -> instruction)) (mk_immed: int -> int) (addr: addressing) (args: list mreg) c k, transl_memory_access mk_instr_imm mk_instr_gen mk_immed addr args k = OK c -> @@ -556,8 +563,6 @@ Proof. exists rs'; split. eauto. split. eapply agree_undef_regs; eauto with asmgen. simpl; intros. rewrite Q; auto with asmgen. -Local Transparent destroyed_by_setstack. - destruct ty; simpl; auto with asmgen. - (* Mgetparam *) assert (f0 = f) by congruence; subst f0. @@ -687,7 +692,7 @@ Opaque loadind. k rs2 m2' /\ rs2#SP = parent_sp s /\ rs2#RA = parent_ra s - /\ forall r, r <> PC -> r <> SP -> r <> IR14 -> rs2#r = rs0#r). + /\ forall r, if_preg r = true -> r <> SP -> r <> IR14 -> rs2#r = rs0#r). { intros. exploit loadind_int_correct. eexact C. intros [rs1 [P [Q R]]]. @@ -695,9 +700,7 @@ Opaque loadind. eapply exec_straight_trans. eexact P. apply exec_straight_one. simpl. rewrite R; auto with asmgen. unfold chunk_of_type in A. rewrite A. rewrite <- (sp_val _ _ _ AG). rewrite E. eauto. auto. - split. Simpl. - split. Simpl. - intros. Simpl. + split. Simpl. split. Simpl. intros. Simpl. } destruct ros as [rf|fid]; simpl in H; monadInv H7. + (* Indirect call *) @@ -794,19 +797,19 @@ Opaque loadind. left; eapply exec_straight_steps_goto; eauto. intros. simpl in TR. destruct (transl_cond_correct tge tf cond args _ rs0 m' _ TR) as [rs' [A [B C]]]. - rewrite EC in B. + rewrite EC in B. destruct B as [Bpos Bneg]. econstructor; econstructor; econstructor; split. eexact A. split. eapply agree_undef_regs; eauto with asmgen. - simpl. rewrite B. reflexivity. + simpl. rewrite Bpos. reflexivity. - (* Mcond false *) exploit eval_condition_lessdef. eapply preg_vals; eauto. eauto. eauto. intros EC. left; eapply exec_straight_steps; eauto. intros. simpl in TR. destruct (transl_cond_correct tge tf cond args _ rs0 m' _ TR) as [rs' [A [B C]]]. - rewrite EC in B. + rewrite EC in B. destruct B as [Bpos Bneg]. econstructor; split. eapply exec_straight_trans. eexact A. - apply exec_straight_one. simpl. rewrite B. reflexivity. auto. + apply exec_straight_one. simpl. rewrite Bpos. reflexivity. auto. split. eapply agree_undef_regs; eauto with asmgen. intros; Simpl. simpl. congruence. @@ -849,7 +852,7 @@ Opaque loadind. k rs2 m2' /\ rs2#SP = parent_sp s /\ rs2#RA = parent_ra s - /\ forall r, r <> PC -> r <> SP -> r <> IR14 -> rs2#r = rs0#r). + /\ forall r, if_preg r = true -> r <> SP -> r <> IR14 -> rs2#r = rs0#r). { intros. exploit loadind_int_correct. eexact C. intros [rs1 [P [Q R]]]. diff --git a/arm/Asmgenproof1.v b/arm/Asmgenproof1.v index a0d6752..c859434 100644 --- a/arm/Asmgenproof1.v +++ b/arm/Asmgenproof1.v @@ -24,6 +24,7 @@ Require Import Globalenvs. Require Import Op. Require Import Locations. Require Import Mach. +Require Import Compopts. Require Import Asm. Require Import Asmgen. Require Import Conventions. @@ -45,13 +46,55 @@ Proof. Qed. Hint Resolve ireg_of_not_R14': asmgen. +(** [undef_flags] and [nextinstr_nf] *) + +Lemma nextinstr_nf_pc: + forall rs, (nextinstr_nf rs)#PC = Val.add rs#PC Vone. +Proof. + intros. reflexivity. +Qed. + +Definition if_preg (r: preg) : bool := + match r with + | IR _ => true + | FR _ => true + | CR _ => false + | PC => false + end. + +Lemma data_if_preg: forall r, data_preg r = true -> if_preg r = true. +Proof. + intros. destruct r; reflexivity || discriminate. +Qed. + +Lemma if_preg_not_PC: forall r, if_preg r = true -> r <> PC. +Proof. + intros; red; intros; subst; discriminate. +Qed. + +Hint Resolve data_if_preg if_preg_not_PC: asmgen. + +Lemma nextinstr_nf_inv: + forall r rs, if_preg r = true -> (nextinstr_nf rs)#r = rs#r. +Proof. + intros. destruct r; reflexivity || discriminate. +Qed. + +Lemma nextinstr_nf_inv1: + forall r rs, data_preg r = true -> (nextinstr_nf rs)#r = rs#r. +Proof. + intros. destruct r; reflexivity || discriminate. +Qed. + (** Useful simplification tactic *) Ltac Simplif := ((rewrite nextinstr_inv by eauto with asmgen) || (rewrite nextinstr_inv1 by eauto with asmgen) + || (rewrite nextinstr_nf_inv by eauto with asmgen) || (rewrite Pregmap.gss) || (rewrite nextinstr_pc) + || (rewrite nextinstr_nf_pc) || (rewrite Pregmap.gso by eauto with asmgen)); auto with asmgen. Ltac Simpl := repeat Simplif. @@ -65,8 +108,8 @@ Variable fn: function. (** Decomposition of an integer constant *) -Lemma decompose_int_rec_or: - forall N n p x, List.fold_left Int.or (decompose_int_rec N n p) x = Int.or x n. +Lemma decompose_int_arm_or: + forall N n p x, List.fold_left Int.or (decompose_int_arm N n p) x = Int.or x n. Proof. induction N; intros; simpl. predSpec Int.eq Int.eq_spec n Int.zero; simpl. @@ -78,8 +121,8 @@ Proof. rewrite Int.or_not_self. apply Int.and_mone. Qed. -Lemma decompose_int_rec_xor: - forall N n p x, List.fold_left Int.xor (decompose_int_rec N n p) x = Int.xor x n. +Lemma decompose_int_arm_xor: + forall N n p x, List.fold_left Int.xor (decompose_int_arm N n p) x = Int.xor x n. Proof. induction N; intros; simpl. predSpec Int.eq Int.eq_spec n Int.zero; simpl. @@ -91,8 +134,8 @@ Proof. rewrite Int.xor_not_self. apply Int.and_mone. Qed. -Lemma decompose_int_rec_add: - forall N n p x, List.fold_left Int.add (decompose_int_rec N n p) x = Int.add x n. +Lemma decompose_int_arm_add: + forall N n p x, List.fold_left Int.add (decompose_int_arm N n p) x = Int.add x n. Proof. induction N; intros; simpl. predSpec Int.eq Int.eq_spec n Int.zero; simpl. @@ -104,10 +147,56 @@ Proof. rewrite Int.or_not_self. apply Int.and_mone. apply Int.and_not_self. Qed. -Remark decompose_int_rec_nil: - forall N n p, decompose_int_rec N n p = nil -> n = Int.zero. +Remark decompose_int_arm_nil: + forall N n p, decompose_int_arm N n p = nil -> n = Int.zero. Proof. - intros. generalize (decompose_int_rec_or N n p Int.zero). rewrite H. simpl. + intros. generalize (decompose_int_arm_or N n p Int.zero). rewrite H. simpl. + rewrite Int.or_commut; rewrite Int.or_zero; auto. +Qed. + +Lemma decompose_int_thumb_or: + forall N n p x, List.fold_left Int.or (decompose_int_thumb N n p) x = Int.or x n. +Proof. + induction N; intros; simpl. + predSpec Int.eq Int.eq_spec n Int.zero; simpl. + subst n. rewrite Int.or_zero. auto. + auto. + predSpec Int.eq Int.eq_spec (Int.and n (Int.shl Int.one p)) Int.zero. + auto. + simpl. rewrite IHN. rewrite Int.or_assoc. decEq. rewrite <- Int.and_or_distrib. + rewrite Int.or_not_self. apply Int.and_mone. +Qed. + +Lemma decompose_int_thumb_xor: + forall N n p x, List.fold_left Int.xor (decompose_int_thumb N n p) x = Int.xor x n. +Proof. + induction N; intros; simpl. + predSpec Int.eq Int.eq_spec n Int.zero; simpl. + subst n. rewrite Int.xor_zero. auto. + auto. + predSpec Int.eq Int.eq_spec (Int.and n (Int.shl Int.one p)) Int.zero. + auto. + simpl. rewrite IHN. rewrite Int.xor_assoc. decEq. rewrite <- Int.and_xor_distrib. + rewrite Int.xor_not_self. apply Int.and_mone. +Qed. + +Lemma decompose_int_thumb_add: + forall N n p x, List.fold_left Int.add (decompose_int_thumb N n p) x = Int.add x n. +Proof. + induction N; intros; simpl. + predSpec Int.eq Int.eq_spec n Int.zero; simpl. + subst n. rewrite Int.add_zero. auto. + auto. + predSpec Int.eq Int.eq_spec (Int.and n (Int.shl Int.one p)) Int.zero. + auto. + simpl. rewrite IHN. rewrite Int.add_assoc. decEq. rewrite Int.add_and. + rewrite Int.or_not_self. apply Int.and_mone. apply Int.and_not_self. +Qed. + +Remark decompose_int_thumb_nil: + forall N n p, decompose_int_thumb N n p = nil -> n = Int.zero. +Proof. + intros. generalize (decompose_int_thumb_or N n p Int.zero). rewrite H. simpl. rewrite Int.or_commut; rewrite Int.or_zero; auto. Qed. @@ -116,23 +205,31 @@ Lemma decompose_int_general: (forall v1 n2 n3, f (f v1 n2) n3 = f v1 (g n2 n3)) -> (forall n1 n2 n3, g (g n1 n2) n3 = g n1 (g n2 n3)) -> (forall n, g Int.zero n = n) -> - (forall N n p x, List.fold_left g (decompose_int_rec N n p) x = g x n) -> + (forall N n p x, List.fold_left g (decompose_int_arm N n p) x = g x n) -> + (forall N n p x, List.fold_left g (decompose_int_thumb N n p) x = g x n) -> forall n v, List.fold_left f (decompose_int n) v = f v n. Proof. - intros f g DISTR ASSOC ZERO DECOMP. + intros f g DISTR ASSOC ZERO DECOMP1 DECOMP2. assert (A: forall l x y, g x (fold_left g l y) = fold_left g l (g x y)). induction l; intros; simpl. auto. rewrite IHl. decEq. rewrite ASSOC; auto. assert (B: forall l v n, fold_left f l (f v n) = f v (fold_left g l n)). induction l; intros; simpl. auto. rewrite IHl. rewrite DISTR. decEq. decEq. auto. - intros. unfold decompose_int. - destruct (decompose_int_rec 12 n Int.zero) eqn:?. - simpl. exploit decompose_int_rec_nil; eauto. congruence. - simpl. rewrite B. decEq. - generalize (DECOMP 12%nat n Int.zero Int.zero). - rewrite Heql. simpl. repeat rewrite ZERO. auto. + intros. unfold decompose_int, decompose_int_base. + destruct (thumb tt); [destruct (is_immed_arith_thumb_special n)|]. +- reflexivity. +- destruct (decompose_int_thumb 24%nat n Int.zero) eqn:DB. + + simpl. exploit decompose_int_thumb_nil; eauto. congruence. + + simpl. rewrite B. decEq. + generalize (DECOMP2 24%nat n Int.zero Int.zero). + rewrite DB; simpl. rewrite ! ZERO. auto. +- destruct (decompose_int_arm 12%nat n Int.zero) eqn:DB. + + simpl. exploit decompose_int_arm_nil; eauto. congruence. + + simpl. rewrite B. decEq. + generalize (DECOMP1 12%nat n Int.zero Int.zero). + rewrite DB; simpl. rewrite ! ZERO. auto. Qed. Lemma decompose_int_or: @@ -143,7 +240,7 @@ Proof. intros. rewrite Val.or_assoc. auto. apply Int.or_assoc. intros. rewrite Int.or_commut. apply Int.or_zero. - apply decompose_int_rec_or. + apply decompose_int_arm_or. apply decompose_int_thumb_or. Qed. Lemma decompose_int_bic: @@ -154,7 +251,7 @@ Proof. intros. rewrite Val.and_assoc. simpl. decEq. decEq. rewrite Int.not_or_and_not. auto. apply Int.or_assoc. intros. rewrite Int.or_commut. apply Int.or_zero. - apply decompose_int_rec_or. + apply decompose_int_arm_or. apply decompose_int_thumb_or. Qed. Lemma decompose_int_xor: @@ -165,7 +262,7 @@ Proof. intros. rewrite Val.xor_assoc. auto. apply Int.xor_assoc. intros. rewrite Int.xor_commut. apply Int.xor_zero. - apply decompose_int_rec_xor. + apply decompose_int_arm_xor. apply decompose_int_thumb_xor. Qed. Lemma decompose_int_add: @@ -176,7 +273,7 @@ Proof. intros. rewrite Val.add_assoc. auto. apply Int.add_assoc. intros. rewrite Int.add_commut. apply Int.add_zero. - apply decompose_int_rec_add. + apply decompose_int_arm_add. apply decompose_int_thumb_add. Qed. Lemma decompose_int_sub: @@ -188,26 +285,26 @@ Proof. rewrite Int.neg_add_distr; auto. apply Int.add_assoc. intros. rewrite Int.add_commut. apply Int.add_zero. - apply decompose_int_rec_add. + apply decompose_int_arm_add. apply decompose_int_thumb_add. Qed. Lemma iterate_op_correct: forall op1 op2 (f: val -> int -> val) (rs: regset) (r: ireg) m v0 n k, (forall (rs:regset) n, exec_instr ge fn (op2 (SOimm n)) rs m = - Next (nextinstr (rs#r <- (f (rs#r) n))) m) -> + Next (nextinstr_nf (rs#r <- (f (rs#r) n))) m) -> (forall n, exec_instr ge fn (op1 (SOimm n)) rs m = - Next (nextinstr (rs#r <- (f v0 n))) m) -> + Next (nextinstr_nf (rs#r <- (f v0 n))) m) -> exists rs', exec_straight ge fn (iterate_op op1 op2 (decompose_int n) k) rs m k rs' m /\ rs'#r = List.fold_left f (decompose_int n) v0 - /\ forall r': preg, r' <> r -> r' <> PC -> rs'#r' = rs#r'. + /\ forall r': preg, r' <> r -> if_preg r' = true -> rs'#r' = rs#r'. Proof. intros until k; intros SEM2 SEM1. unfold iterate_op. - destruct (decompose_int n) as [ | i tl] eqn:?. - unfold decompose_int in Heql. destruct (decompose_int_rec 12%nat n Int.zero); congruence. + destruct (decompose_int n) as [ | i tl] eqn:DI. + unfold decompose_int in DI. destruct (decompose_int_base n); congruence. revert k. pattern tl. apply List.rev_ind. (* base case *) intros; simpl. econstructor. @@ -231,22 +328,52 @@ Lemma loadimm_correct: exists rs', exec_straight ge fn (loadimm r n k) rs m k rs' m /\ rs'#r = Vint n - /\ forall r': preg, r' <> r -> r' <> PC -> rs'#r' = rs#r'. + /\ forall r': preg, r' <> r -> if_preg r' = true -> rs'#r' = rs#r'. Proof. intros. unfold loadimm. - destruct (NPeano.leb (length (decompose_int n)) (length (decompose_int (Int.not n)))). - (* mov - orr* *) + set (l1 := length (decompose_int n)). + set (l2 := length (decompose_int (Int.not n))). + destruct (NPeano.leb l1 1%nat). +{ (* single mov *) + econstructor; split. apply exec_straight_one. simpl; reflexivity. auto. + split; intros; Simpl. } + destruct (NPeano.leb l2 1%nat). +{ (* single movn *) + econstructor; split. apply exec_straight_one. + simpl. rewrite Int.not_involutive. reflexivity. auto. + split; intros; Simpl. } + destruct (thumb tt). +{ (* movw / movt *) + unfold loadimm_thumb. destruct (Int.eq (Int.shru n (Int.repr 16)) Int.zero). + econstructor; split. + apply exec_straight_one. simpl; eauto. auto. split; intros; Simpl. + econstructor; split. + eapply exec_straight_two. simpl; reflexivity. simpl; reflexivity. auto. auto. + split; intros; Simpl. simpl. f_equal. rewrite Int.zero_ext_and by omega. + rewrite Int.and_assoc. change 65535 with (two_p 16 - 1). rewrite Int.and_idem. + apply Int.same_bits_eq; intros. + rewrite Int.bits_or, Int.bits_and, Int.bits_shl, Int.testbit_repr by auto. + rewrite Int.Ztestbit_two_p_m1 by omega. change (Int.unsigned (Int.repr 16)) with 16. + destruct (zlt i 16). + rewrite andb_true_r, orb_false_r; auto. + rewrite andb_false_r; simpl. rewrite Int.bits_shru by omega. + change (Int.unsigned (Int.repr 16)) with 16. rewrite zlt_true by omega. f_equal; omega. +} + destruct (NPeano.leb l1 l2). +{ (* mov - orr* *) replace (Vint n) with (List.fold_left (fun v i => Val.or v (Vint i)) (decompose_int n) Vzero). apply iterate_op_correct. auto. intros; simpl. rewrite Int.or_commut; rewrite Int.or_zero; auto. rewrite decompose_int_or. simpl. rewrite Int.or_commut; rewrite Int.or_zero; auto. - (* mvn - bic* *) +} +{ (* mvn - bic* *) replace (Vint n) with (List.fold_left (fun v i => Val.and v (Vint (Int.not i))) (decompose_int (Int.not n)) (Vint Int.mone)). apply iterate_op_correct. auto. intros. simpl. rewrite Int.and_commut; rewrite Int.and_mone; auto. rewrite decompose_int_bic. simpl. rewrite Int.not_involutive. rewrite Int.and_commut. rewrite Int.and_mone; auto. +} Qed. (** Add integer immediate. *) @@ -256,9 +383,13 @@ Lemma addimm_correct: exists rs', exec_straight ge fn (addimm r1 r2 n k) rs m k rs' m /\ rs'#r1 = Val.add rs#r2 (Vint n) - /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'. + /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'. Proof. intros. unfold addimm. + destruct (Int.ltu (Int.repr (-256)) n). + (* sub *) + econstructor; split. apply exec_straight_one; simpl; auto. + split; intros; Simpl. apply Val.sub_opp_add. destruct (NPeano.leb (length (decompose_int n)) (length (decompose_int (Int.neg n)))). (* add - add* *) replace (Val.add (rs r2) (Vint n)) @@ -283,21 +414,17 @@ Lemma andimm_correct: exists rs', exec_straight ge fn (andimm r1 r2 n k) rs m k rs' m /\ rs'#r1 = Val.and rs#r2 (Vint n) - /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'. + /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'. Proof. - intros. unfold andimm. + intros. unfold andimm. destruct (is_immed_arith n). (* andi *) - case (is_immed_arith n). - exists (nextinstr (rs#r1 <- (Val.and rs#r2 (Vint n)))). - split. apply exec_straight_one; auto. - split. rewrite nextinstr_inv; auto with asmgen. apply Pregmap.gss. - intros. rewrite nextinstr_inv; auto. apply Pregmap.gso; auto. + exists (nextinstr_nf (rs#r1 <- (Val.and rs#r2 (Vint n)))). + split. apply exec_straight_one; auto. split; intros; Simpl. (* bic - bic* *) replace (Val.and (rs r2) (Vint n)) with (List.fold_left (fun v i => Val.and v (Vint (Int.not i))) (decompose_int (Int.not n)) (rs r2)). apply iterate_op_correct. - auto. - auto. + auto. auto. rewrite decompose_int_bic. rewrite Int.not_involutive. auto. Qed. @@ -308,7 +435,7 @@ Lemma rsubimm_correct: exists rs', exec_straight ge fn (rsubimm r1 r2 n k) rs m k rs' m /\ rs'#r1 = Val.sub (Vint n) rs#r2 - /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'. + /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'. Proof. intros. unfold rsubimm. (* rsb - add* *) @@ -329,7 +456,7 @@ Lemma orimm_correct: exists rs', exec_straight ge fn (orimm r1 r2 n k) rs m k rs' m /\ rs'#r1 = Val.or rs#r2 (Vint n) - /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'. + /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'. Proof. intros. unfold orimm. (* ori - ori* *) @@ -348,7 +475,7 @@ Lemma xorimm_correct: exists rs', exec_straight ge fn (xorimm r1 r2 n k) rs m k rs' m /\ rs'#r1 = Val.xor rs#r2 (Vint n) - /\ forall r': preg, r' <> r1 -> r' <> PC -> rs'#r' = rs#r'. + /\ forall r': preg, r' <> r1 -> if_preg r' = true -> rs'#r' = rs#r'. Proof. intros. unfold xorimm. (* xori - xori* *) @@ -367,7 +494,7 @@ Lemma indexed_memory_access_correct: (mk_immed: int -> int) (base: ireg) n k (rs: regset) m m', (forall (r1: ireg) (rs1: regset) n1 k, Val.add rs1#r1 (Vint n1) = Val.add rs#base (Vint n) -> - (forall (r: preg), r <> PC -> r <> IR14 -> rs1 r = rs r) -> + (forall (r: preg), if_preg r = true -> r <> IR14 -> rs1 r = rs r) -> exists rs', exec_straight ge fn (mk_instr r1 n1 :: k) rs1 m k rs' m' /\ P rs') -> exists rs', @@ -397,12 +524,12 @@ Lemma loadind_int_correct: exists rs', exec_straight ge fn (loadind_int base ofs dst k) rs m k rs' m /\ rs'#dst = v - /\ forall r, r <> PC -> r <> IR14 -> r <> dst -> rs'#r = rs#r. + /\ forall r, if_preg r = true -> r <> IR14 -> r <> dst -> rs'#r = rs#r. Proof. intros; unfold loadind_int. apply indexed_memory_access_correct; intros. econstructor; split. apply exec_straight_one. simpl. unfold exec_load. rewrite H0; rewrite H; eauto. auto. - split. Simpl. intros; Simpl. + split; intros; Simpl. Qed. Lemma loadind_correct: @@ -412,7 +539,7 @@ Lemma loadind_correct: exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dst) = v - /\ forall r, r <> PC -> r <> IR14 -> r <> preg_of dst -> rs'#r = rs#r. + /\ forall r, if_preg r = true -> r <> IR14 -> r <> preg_of dst -> rs'#r = rs#r. Proof. unfold loadind; intros. destruct ty; destruct (preg_of dst); inv H; simpl in H0. - (* int *) @@ -421,22 +548,22 @@ Proof. apply indexed_memory_access_correct; intros. econstructor; split. apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. - split. Simpl. intros; Simpl. + split; intros; Simpl. - (* single *) apply indexed_memory_access_correct; intros. econstructor; split. apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. - split. Simpl. intros; Simpl. + split; intros; Simpl. - (* any32 *) apply indexed_memory_access_correct; intros. econstructor; split. apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. - split. Simpl. intros; Simpl. + split; intros; Simpl. - (* any64 *) apply indexed_memory_access_correct; intros. econstructor; split. apply exec_straight_one. simpl. unfold exec_load. rewrite H. rewrite H0. eauto. auto. - split. Simpl. intros; Simpl. + split; intros; Simpl. Qed. (** Indexed memory stores. *) @@ -447,10 +574,10 @@ Lemma storeind_correct: Mem.storev (chunk_of_type ty) m (Val.add rs#base (Vint ofs)) (rs#(preg_of src)) = Some m' -> exists rs', exec_straight ge fn c rs m k rs' m' - /\ forall r, r <> PC -> r <> IR14 -> preg_notin r (destroyed_by_setstack ty) -> rs'#r = rs#r. + /\ forall r, if_preg r = true -> r <> IR14 -> rs'#r = rs#r. Proof. unfold storeind; intros. - assert (NOT14: preg_of src <> IR IR14) by eauto with asmgen. + assert (DATA: data_preg (preg_of src) = true) by eauto with asmgen. destruct ty; destruct (preg_of src); inv H; simpl in H0. - (* int *) apply indexed_memory_access_correct; intros. @@ -493,13 +620,6 @@ Proof. intros. destruct s; simpl; auto. Qed. -Lemma transl_shift_addr_correct: - forall s (r: ireg) (rs: regset), - eval_shift_addr (transl_shift_addr s r) rs = eval_shift s (rs#r). -Proof. - intros. destruct s; simpl; auto. -Qed. - (** Translation of conditions *) Lemma compare_int_spec: @@ -871,6 +991,7 @@ Lemma transl_cond_correct: exec_straight ge fn c rs m k rs' m /\ match eval_condition cond (map rs (map preg_of args)) m with | Some b => eval_testcond (cond_for_cond cond) rs' = Some b + /\ eval_testcond (cond_for_cond (negate_condition cond)) rs' = Some (negb b) | None => True end /\ forall r, data_preg r = true -> rs'#r = rs r. @@ -880,57 +1001,66 @@ Proof. - (* Ccomp *) econstructor. split. apply exec_straight_one. simpl. eauto. auto. - split. destruct (Val.cmp_bool c0 (rs x) (rs x0)) eqn:CMP; auto. apply cond_for_signed_cmp_correct; auto. + split. destruct (Val.cmp_bool c0 (rs x) (rs x0)) eqn:CMP; auto. + split; apply cond_for_signed_cmp_correct; auto. rewrite Val.negate_cmp_bool, CMP; auto. apply compare_int_inv. - (* Ccompu *) econstructor. split. apply exec_straight_one. simpl. eauto. auto. - split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (rs x0)) eqn:CMP; auto. apply cond_for_unsigned_cmp_correct; auto. + split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (rs x0)) eqn:CMP; auto. + split; apply cond_for_unsigned_cmp_correct; auto. rewrite Val.negate_cmpu_bool, CMP; auto. apply compare_int_inv. - (* Ccompshift *) econstructor. split. apply exec_straight_one. simpl. eauto. auto. split. rewrite transl_shift_correct. - destruct (Val.cmp_bool c0 (rs x) (eval_shift s (rs x0))) eqn:CMP; auto. apply cond_for_signed_cmp_correct; auto. + destruct (Val.cmp_bool c0 (rs x) (eval_shift s (rs x0))) eqn:CMP; auto. + split; apply cond_for_signed_cmp_correct; auto. rewrite Val.negate_cmp_bool, CMP; auto. apply compare_int_inv. - (* Ccompushift *) econstructor. split. apply exec_straight_one. simpl. eauto. auto. split. rewrite transl_shift_correct. - destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (eval_shift s (rs x0))) eqn:CMP; auto. apply cond_for_unsigned_cmp_correct; auto. + destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (eval_shift s (rs x0))) eqn:CMP; auto. + split; apply cond_for_unsigned_cmp_correct; auto. rewrite Val.negate_cmpu_bool, CMP; auto. apply compare_int_inv. - (* Ccompimm *) destruct (is_immed_arith i). econstructor. split. apply exec_straight_one. simpl. eauto. auto. - split. destruct (Val.cmp_bool c0 (rs x) (Vint i)) eqn:CMP; auto. apply cond_for_signed_cmp_correct; auto. + split. destruct (Val.cmp_bool c0 (rs x) (Vint i)) eqn:CMP; auto. + split; apply cond_for_signed_cmp_correct; auto. rewrite Val.negate_cmp_bool, CMP; auto. apply compare_int_inv. exploit (loadimm_correct IR14). intros [rs' [P [Q R]]]. econstructor. split. eapply exec_straight_trans. eexact P. apply exec_straight_one. simpl. rewrite Q. rewrite R; eauto with asmgen. auto. split. rewrite <- R by (eauto with asmgen). - destruct (Val.cmp_bool c0 (rs' x) (Vint i)) eqn:CMP; auto. apply cond_for_signed_cmp_correct; auto. + destruct (Val.cmp_bool c0 (rs' x) (Vint i)) eqn:CMP; auto. + split; apply cond_for_signed_cmp_correct; auto. rewrite Val.negate_cmp_bool, CMP; auto. intros. rewrite compare_int_inv by auto. auto with asmgen. - (* Ccompuimm *) destruct (is_immed_arith i). econstructor. split. apply exec_straight_one. simpl. eauto. auto. - split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (Vint i)) eqn:CMP; auto. apply cond_for_unsigned_cmp_correct; auto. + split. destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs x) (Vint i)) eqn:CMP; auto. + split; apply cond_for_unsigned_cmp_correct; auto. rewrite Val.negate_cmpu_bool, CMP; auto. apply compare_int_inv. exploit (loadimm_correct IR14). intros [rs' [P [Q R]]]. econstructor. split. eapply exec_straight_trans. eexact P. apply exec_straight_one. simpl. rewrite Q. rewrite R; eauto with asmgen. auto. split. rewrite <- R by (eauto with asmgen). - destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs' x) (Vint i)) eqn:CMP; auto. apply cond_for_unsigned_cmp_correct; auto. + destruct (Val.cmpu_bool (Mem.valid_pointer m) c0 (rs' x) (Vint i)) eqn:CMP; auto. + split; apply cond_for_unsigned_cmp_correct; auto. rewrite Val.negate_cmpu_bool, CMP; auto. intros. rewrite compare_int_inv by auto. auto with asmgen. - (* Ccompf *) econstructor. split. apply exec_straight_one. simpl. eauto. apply compare_float_nextpc. split. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) eqn:CMP; auto. destruct (rs x); try discriminate. destruct (rs x0); try discriminate. - simpl in CMP. inv CMP. apply cond_for_float_cmp_correct. + simpl in CMP. inv CMP. + split. apply cond_for_float_cmp_correct. apply cond_for_float_not_cmp_correct. apply compare_float_inv. - (* Cnotcompf *) econstructor. @@ -938,7 +1068,8 @@ Proof. split. destruct (Val.cmpf_bool c0 (rs x) (rs x0)) eqn:CMP; auto. destruct (rs x); try discriminate. destruct (rs x0); try discriminate. simpl in CMP. inv CMP. -Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct. +Local Opaque compare_float. simpl. + split. apply cond_for_float_not_cmp_correct. rewrite negb_involutive. apply cond_for_float_cmp_correct. exact I. apply compare_float_inv. - (* Ccompfzero *) @@ -946,14 +1077,16 @@ Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct. split. apply exec_straight_one. simpl. eauto. apply compare_float_nextpc. split. destruct (Val.cmpf_bool c0 (rs x) (Vfloat Float.zero)) eqn:CMP; auto. destruct (rs x); try discriminate. - simpl in CMP. inv CMP. apply cond_for_float_cmp_correct. + simpl in CMP. inv CMP. + split. apply cond_for_float_cmp_correct. apply cond_for_float_not_cmp_correct. apply compare_float_inv. - (* Cnotcompfzero *) econstructor. split. apply exec_straight_one. simpl. eauto. apply compare_float_nextpc. split. destruct (Val.cmpf_bool c0 (rs x) (Vfloat Float.zero)) eqn:CMP; auto. destruct (rs x); try discriminate. simpl in CMP. inv CMP. -Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct. +Local Opaque compare_float. simpl. + split. apply cond_for_float_not_cmp_correct. rewrite negb_involutive. apply cond_for_float_cmp_correct. exact I. apply compare_float_inv. - (* Ccompfs *) @@ -961,7 +1094,8 @@ Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct. split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc. split. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) eqn:CMP; auto. destruct (rs x); try discriminate. destruct (rs x0); try discriminate. - simpl in CMP. inv CMP. apply cond_for_float32_cmp_correct. + simpl in CMP. inv CMP. + split. apply cond_for_float32_cmp_correct. apply cond_for_float32_not_cmp_correct. apply compare_float32_inv. - (* Cnotcompfs *) econstructor. @@ -969,7 +1103,8 @@ Local Opaque compare_float. simpl. apply cond_for_float_not_cmp_correct. split. destruct (Val.cmpfs_bool c0 (rs x) (rs x0)) eqn:CMP; auto. destruct (rs x); try discriminate. destruct (rs x0); try discriminate. simpl in CMP. inv CMP. -Local Opaque compare_float32. simpl. apply cond_for_float32_not_cmp_correct. +Local Opaque compare_float32. simpl. + split. apply cond_for_float32_not_cmp_correct. rewrite negb_involutive. apply cond_for_float32_cmp_correct. exact I. apply compare_float32_inv. - (* Ccompfszero *) @@ -977,14 +1112,15 @@ Local Opaque compare_float32. simpl. apply cond_for_float32_not_cmp_correct. split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc. split. destruct (Val.cmpfs_bool c0 (rs x) (Vsingle Float32.zero)) eqn:CMP; auto. destruct (rs x); try discriminate. - simpl in CMP. inv CMP. apply cond_for_float32_cmp_correct. + simpl in CMP. inv CMP. + split. apply cond_for_float32_cmp_correct. apply cond_for_float32_not_cmp_correct. apply compare_float32_inv. - (* Cnotcompfzero *) econstructor. split. apply exec_straight_one. simpl. eauto. apply compare_float32_nextpc. split. destruct (Val.cmpfs_bool c0 (rs x) (Vsingle Float32.zero)) eqn:CMP; auto. destruct (rs x); try discriminate. simpl in CMP. inv CMP. - simpl. apply cond_for_float32_not_cmp_correct. + simpl. split. apply cond_for_float32_not_cmp_correct. rewrite negb_involutive. apply cond_for_float32_cmp_correct. exact I. apply compare_float32_inv. Qed. @@ -1009,13 +1145,10 @@ Proof. intros until v; intros TR EV NOCMP. unfold transl_op in TR; destruct op; ArgsInv; simpl in EV; inv EV; try (TranslOpSimpl; fail). (* Omove *) - exists (nextinstr (rs#(preg_of res) <- (rs#(preg_of m0)))). - split. destruct (preg_of res) eqn:RES; try discriminate; destruct (preg_of m0) eqn:ARG; inv TR. - apply exec_straight_one; auto. - apply exec_straight_one; auto. - intuition Simpl. + econstructor; split. apply exec_straight_one; simpl; eauto. intuition Simpl. + econstructor; split. apply exec_straight_one; simpl; eauto. intuition Simpl. (* Ointconst *) generalize (loadimm_correct x i k rs m). intros [rs' [A [B C]]]. exists rs'; auto with asmgen. @@ -1024,8 +1157,11 @@ Proof. intros [rs' [EX [RES OTH]]]. exists rs'; auto with asmgen. (* Ocast8signed *) - set (rs1 := nextinstr (rs#x <- (Val.shl rs#x0 (Vint (Int.repr 24))))). - set (rs2 := nextinstr (rs1#x <- (Val.shr rs1#x (Vint (Int.repr 24))))). + destruct (thumb tt). + econstructor; split. apply exec_straight_one; simpl; eauto. intuition Simpl. + destruct (rs x0); auto; simpl. rewrite Int.shru_zero. reflexivity. + set (rs1 := nextinstr_nf (rs#x <- (Val.shl rs#x0 (Vint (Int.repr 24))))). + set (rs2 := nextinstr_nf (rs1#x <- (Val.shr rs1#x (Vint (Int.repr 24))))). exists rs2. split. apply exec_straight_two with rs1 m; auto. split. unfold rs2; Simpl. unfold rs1; Simpl. @@ -1034,8 +1170,11 @@ Proof. f_equal. symmetry. apply (Int.sign_ext_shr_shl 8). compute; auto. intros. unfold rs2, rs1; Simpl. (* Ocast16signed *) - set (rs1 := nextinstr (rs#x <- (Val.shl rs#x0 (Vint (Int.repr 16))))). - set (rs2 := nextinstr (rs1#x <- (Val.shr rs1#x (Vint (Int.repr 16))))). + destruct (thumb tt). + econstructor; split. apply exec_straight_one; simpl; eauto. intuition Simpl. + destruct (rs x0); auto; simpl. rewrite Int.shru_zero. reflexivity. + set (rs1 := nextinstr_nf (rs#x <- (Val.shl rs#x0 (Vint (Int.repr 16))))). + set (rs2 := nextinstr_nf (rs1#x <- (Val.shr rs1#x (Vint (Int.repr 16))))). exists rs2. split. apply exec_straight_two with rs1 m; auto. split. unfold rs2; Simpl. unfold rs1; Simpl. @@ -1051,22 +1190,6 @@ Proof. generalize (rsubimm_correct x x0 i k rs m). intros [rs' [A [B C]]]. exists rs'; auto with asmgen. - (* Omul *) - destruct (negb (ireg_eq x x0)). - TranslOpSimpl. - destruct (negb (ireg_eq x x1)). - rewrite Val.mul_commut. TranslOpSimpl. - econstructor; split. - eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. - intuition Simpl. - (* Omla *) - destruct (negb (ireg_eq x x0)). - TranslOpSimpl. - destruct (negb (ireg_eq x x1)). - rewrite Val.mul_commut. TranslOpSimpl. - econstructor; split. - eapply exec_straight_two. simpl; eauto. simpl; eauto. auto. auto. - intuition Simpl. (* divs *) Local Transparent destroyed_by_op. econstructor. split. apply exec_straight_one. simpl. rewrite H0. reflexivity. auto. @@ -1117,9 +1240,9 @@ Local Transparent destroyed_by_op. omega. } set (j := Int.sub Int.iwordsize i) in *. - set (rs1 := nextinstr (rs#IR14 <- (Val.shr (Vint i0) (Vint (Int.repr 31))))). - set (rs2 := nextinstr (rs1#IR14 <- (Val.add (Vint i0) (Val.shru rs1#IR14 (Vint j))))). - set (rs3 := nextinstr (rs2#x <- (Val.shr rs2#IR14 (Vint i)))). + set (rs1 := nextinstr_nf (rs#IR14 <- (Val.shr (Vint i0) (Vint (Int.repr 31))))). + set (rs2 := nextinstr_nf (rs1#IR14 <- (Val.add (Vint i0) (Val.shru rs1#IR14 (Vint j))))). + set (rs3 := nextinstr_nf (rs2#x <- (Val.shr rs2#IR14 (Vint i)))). exists rs3; split. apply exec_straight_three with rs1 m rs2 m. simpl. rewrite X0; reflexivity. @@ -1180,23 +1303,11 @@ Proof. subst op. simpl in H. monadInv H. simpl in H0. inv H0. rewrite (ireg_of_eq _ _ EQ). exploit transl_cond_correct; eauto. instantiate (1 := rs). instantiate (1 := m). intros [rs1 [A [B C]]]. - set (rs2 := nextinstr (rs1#x <- (Vint Int.zero))). - set (rs3 := nextinstr (match eval_testcond (cond_for_cond cmp) rs2 with - | Some true => rs2 # x <- (Vint Int.one) - | Some false => rs2 - | None => rs2 # x <- Vundef end)). - exists rs3; split. - eapply exec_straight_trans. eexact A. apply exec_straight_two with rs2 m. - auto. - simpl. unfold rs3. destruct (eval_testcond (cond_for_cond cmp) rs2) as [[]|]; auto. - auto. unfold rs3. destruct (eval_testcond (cond_for_cond cmp) rs2) as [[]|]; auto. - split. destruct (eval_condition cmp rs ## (preg_of ## args) m) as [b|]; simpl; auto. - unfold rs3. - change (eval_testcond (cond_for_cond cmp) rs2) with (eval_testcond (cond_for_cond cmp) rs1). rewrite B. - Simpl. destruct b. Simpl. unfold rs2. Simpl. - intros. transitivity (rs2 r). - unfold rs3. Simpl. destruct (eval_testcond (cond_for_cond cmp) rs2) as [[]|]; Simpl; auto. - unfold rs2. Simpl. + econstructor; split. + eapply exec_straight_trans. eexact A. apply exec_straight_one. simpl; eauto. auto. + split; intros; Simpl. + destruct (eval_condition cmp rs ## (preg_of ## args) m) as [b|]; simpl; auto. + destruct B as [B1 B2]; rewrite B1. destruct b; auto. Qed. (** Translation of loads and stores. *) @@ -1209,21 +1320,21 @@ Qed. Lemma transl_memory_access_correct: forall (P: regset -> Prop) (mk_instr_imm: ireg -> int -> instruction) - (mk_instr_gen: option (ireg -> shift_addr -> instruction)) + (mk_instr_gen: option (ireg -> shift_op -> instruction)) (mk_immed: int -> int) addr args k c (rs: regset) a m m', transl_memory_access mk_instr_imm mk_instr_gen mk_immed addr args k = OK c -> eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a -> (forall (r1: ireg) (rs1: regset) n k, Val.add rs1#r1 (Vint n) = a -> - (forall (r: preg), r <> PC -> r <> IR14 -> rs1 r = rs r) -> + (forall (r: preg), if_preg r = true -> r <> IR14 -> rs1 r = rs r) -> exists rs', exec_straight ge fn (mk_instr_imm r1 n :: k) rs1 m k rs' m' /\ P rs') -> match mk_instr_gen with | None => True | Some mk => - (forall (r1: ireg) (sa: shift_addr) k, - Val.add rs#r1 (eval_shift_addr sa rs) = a -> + (forall (r1: ireg) (sa: shift_op) k, + Val.add rs#r1 (eval_shift_op sa rs) = a -> exists rs', exec_straight ge fn (mk r1 sa :: k) rs m k rs' m' /\ P rs') end -> @@ -1239,7 +1350,7 @@ Proof. simpl. erewrite ! ireg_of_eq; eauto. (* Aindexed2shift *) destruct mk_instr_gen as [mk | ]; monadInv TR. apply MK2. - erewrite ! ireg_of_eq; eauto. rewrite transl_shift_addr_correct. auto. + erewrite ! ireg_of_eq; eauto. rewrite transl_shift_correct. auto. (* Ainstack *) inv TR. apply indexed_memory_access_correct. exact MK1. Qed. @@ -1249,9 +1360,9 @@ Lemma transl_load_int_correct: transl_memory_access_int mk_instr is_immed dst addr args k = OK c -> eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a -> Mem.loadv chunk m a = Some v -> - (forall (r1 r2: ireg) (sa: shift_addr) (rs1: regset), + (forall (r1 r2: ireg) (sa: shift_op) (rs1: regset), exec_instr ge fn (mk_instr r1 r2 sa) rs1 m = - exec_load chunk (Val.add rs1#r2 (eval_shift_addr sa rs1)) r1 rs1 m) -> + exec_load chunk (Val.add rs1#r2 (eval_shift_op sa rs1)) r1 rs1 m) -> exists rs', exec_straight ge fn c rs m k rs' m /\ rs'#(preg_of dst) = v @@ -1260,7 +1371,7 @@ Proof. intros. monadInv H. erewrite ireg_of_eq by eauto. eapply transl_memory_access_correct; eauto. intros; simpl. econstructor; split. apply exec_straight_one. - rewrite H2. unfold exec_load. simpl eval_shift_addr. rewrite H. rewrite H1. eauto. auto. + rewrite H2. unfold exec_load. simpl eval_shift_op. rewrite H. rewrite H1. eauto. auto. split. Simpl. intros; Simpl. simpl; intros. econstructor; split. apply exec_straight_one. @@ -1294,17 +1405,18 @@ Lemma transl_store_int_correct: transl_memory_access_int mk_instr is_immed src addr args k = OK c -> eval_addressing ge (rs#SP) addr (map rs (map preg_of args)) = Some a -> Mem.storev chunk m a rs#(preg_of src) = Some m' -> - (forall (r1 r2: ireg) (sa: shift_addr) (rs1: regset), + (forall (r1 r2: ireg) (sa: shift_op) (rs1: regset), exec_instr ge fn (mk_instr r1 r2 sa) rs1 m = - exec_store chunk (Val.add rs1#r2 (eval_shift_addr sa rs1)) r1 rs1 m) -> + exec_store chunk (Val.add rs1#r2 (eval_shift_op sa rs1)) r1 rs1 m) -> exists rs', exec_straight ge fn c rs m k rs' m' /\ forall r, data_preg r = true -> preg_notin r mr -> rs'#r = rs#r. Proof. - intros. monadInv H. erewrite ireg_of_eq in * by eauto. + intros. assert (DR: data_preg (preg_of src) = true) by eauto with asmgen. + monadInv H. erewrite ireg_of_eq in * by eauto. eapply transl_memory_access_correct; eauto. intros; simpl. econstructor; split. apply exec_straight_one. - rewrite H2. unfold exec_store. simpl eval_shift_addr. rewrite H. rewrite H3; eauto with asmgen. + rewrite H2. unfold exec_store. simpl eval_shift_op. rewrite H. rewrite H3; eauto with asmgen. rewrite H1. eauto. auto. intros; Simpl. simpl; intros. @@ -1325,7 +1437,8 @@ Lemma transl_store_float_correct: exec_straight ge fn c rs m k rs' m' /\ forall r, data_preg r = true -> preg_notin r mr -> rs'#r = rs#r. Proof. - intros. monadInv H. erewrite freg_of_eq in * by eauto. + intros. assert (DR: data_preg (preg_of src) = true) by eauto with asmgen. + monadInv H. erewrite freg_of_eq in * by eauto. eapply transl_memory_access_correct; eauto. intros; simpl. econstructor; split. apply exec_straight_one. rewrite H2. unfold exec_store. rewrite H. rewrite H3; auto with asmgen. rewrite H1. eauto. auto. diff --git a/arm/PrintAsm.ml b/arm/PrintAsm.ml index 2ad9114..bb738a1 100644 --- a/arm/PrintAsm.ml +++ b/arm/PrintAsm.ml @@ -20,6 +20,11 @@ open AST open Memdata open Asm +(* Code generation options. To be turned into proper options. *) + +let vfpv3 = ref true +let literals_in_code = ref true + (* On-the-fly label renaming *) let next_label = ref 100 @@ -102,35 +107,41 @@ let condition_name = function | TCgt -> "gt" | TCle -> "le" -let movimm oc dst n = - match Asmgen.decompose_int n with - | [] -> assert false - | hd::tl as l -> - fprintf oc " mov %s, #%a\n" dst coqint hd; - List.iter - (fun n -> fprintf oc " orr %s, %s, #%a\n" dst dst coqint n) - tl; - List.length l - -let addimm oc dst src n = - match Asmgen.decompose_int n with - | [] -> assert false - | hd::tl as l -> - fprintf oc " add %s, %s, #%a\n" dst src coqint hd; - List.iter - (fun n -> fprintf oc " add %s, %s, #%a\n" dst dst coqint n) - tl; - List.length l - -let subimm oc dst src n = - match Asmgen.decompose_int n with - | [] -> assert false - | hd::tl as l -> - fprintf oc " sub %s, %s, #%a\n" dst src coqint hd; - List.iter - (fun n -> fprintf oc " sub %s, %s, #%a\n" dst dst coqint n) - tl; - List.length l +let neg_condition_name = function + | TCeq -> "ne" + | TCne -> "eq" + | TChs -> "lo" + | TClo -> "hs" + | TCmi -> "pl" + | TCpl -> "mi" + | TChi -> "ls" + | TCls -> "hi" + | TCge -> "lt" + | TClt -> "ge" + | TCgt -> "le" + | TCle -> "gt" + +(* In Thumb2 mode, some arithmetic instructions have shorter encodings + if they carry the "S" flag (update condition flags): + add (but not sp + imm) + and + asr + bic + eor + lsl + lsr + mov + mvn + orr + rsb + sub (but not sp - imm) + The proof of Asmgen shows that CompCert-generated code behaves the + same whether flags are updated or not by those instructions. The + following printing function adds a "S" suffix if we are in Thumb2 + mode. *) + +let thumbS oc = + if !Clflags.option_fthumb then output_char oc 's' (* Names of sections *) @@ -156,15 +167,16 @@ let size_constants = ref 0 let max_pos_constants = ref max_int let distance_to_emit_constants () = - !max_pos_constants - (!currpos + !size_constants) + if !literals_in_code + then !max_pos_constants - (!currpos + !size_constants) + else max_int (* Associate labels to floating-point constants and to symbols *) let float_labels = (Hashtbl.create 39 : (int64, int) Hashtbl.t) let float32_labels = (Hashtbl.create 39 : (int32, int) Hashtbl.t) -let label_float f = - let bf = camlint64_of_coqint(Floats.Float.to_bits f) in +let label_float bf = try Hashtbl.find float_labels bf with Not_found -> @@ -174,8 +186,7 @@ let label_float f = max_pos_constants := min !max_pos_constants (!currpos + 1024); lbl' -let label_float32 f = - let bf = camlint_of_coqint(Floats.Float32.to_bits f) in +let label_float32 bf = try Hashtbl.find float32_labels bf with Not_found -> @@ -206,6 +217,7 @@ let reset_constants () = max_pos_constants := max_int let emit_constants oc = + fprintf oc " .balign 4\n"; Hashtbl.iter (fun bf lbl -> (* Little-endian floats *) @@ -224,6 +236,67 @@ let emit_constants oc = symbol_labels; reset_constants () +(* Generate code to load the address of id + ofs in register r *) + +let loadsymbol oc r id ofs = + if !Clflags.option_fthumb then begin + fprintf oc " movw %a, #:lower16:%a\n" + ireg r print_symb_ofs (id, ofs); + fprintf oc " movt %a, #:upper16:%a\n" + ireg r print_symb_ofs (id, ofs); 2 + end else begin + let lbl = label_symbol id ofs in + fprintf oc " ldr %a, .L%d @ %a\n" + ireg r lbl print_symb_ofs (id, ofs); 1 + end + +(* Emit instruction sequences that set or offset a register by a constant. *) +(* No S suffix because they are applied to SP most of the time. *) + +let movimm oc dst n = + match Asmgen.decompose_int n with + | [] -> assert false + | hd::tl as l -> + fprintf oc " mov %s, #%a\n" dst coqint hd; + List.iter + (fun n -> fprintf oc " orr %s, %s, #%a\n" dst dst coqint n) + tl; + List.length l + +let addimm oc dst src n = + match Asmgen.decompose_int n with + | [] -> assert false + | hd::tl as l -> + fprintf oc " add %s, %s, #%a\n" dst src coqint hd; + List.iter + (fun n -> fprintf oc " add %s, %s, #%a\n" dst dst coqint n) + tl; + List.length l + +let subimm oc dst src n = + match Asmgen.decompose_int n with + | [] -> assert false + | hd::tl as l -> + fprintf oc " sub %s, %s, #%a\n" dst src coqint hd; + List.iter + (fun n -> fprintf oc " sub %s, %s, #%a\n" dst dst coqint n) + tl; + List.length l + +(* Recognition of float constants appropriate for VMOV. + "a normalized binary floating point encoding with 1 sign bit, 4 + bits of fraction and a 3-bit exponent" *) + +let is_immediate_float64 bits = + let exp = (Int64.(to_int (shift_right_logical bits 52)) land 0x7FF) - 1023 in + let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in + exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant + +let is_immediate_float32 bits = + let exp = (Int32.(to_int (shift_right_logical bits 23)) land 0xFF) - 127 in + let mant = Int32.logand bits 0x7F_FFFFl in + exp >= -3 && exp <= 4 && Int32.logand mant 0x78_0000l = mant + (* Emit .file / .loc debugging directives *) let filename_num : (string, int) Hashtbl.t = Hashtbl.create 7 @@ -267,7 +340,7 @@ let cfi_rel_offset oc reg ofs = - annotation statements: take their arguments in registers or stack locations; generate no code; - inlined by the compiler: take their arguments in arbitrary - registers; preserve all registers except IR2, IR3, IR12 and FP6. + registers. *) (* Handling of annotations *) @@ -365,9 +438,9 @@ let print_builtin_vload_common oc chunk args res = fprintf oc " ldr %a, [%a, #0]\n" ireg res2 ireg addr end; 2 | Mfloat32, [IR addr], [FR res] -> - fprintf oc " flds %a, [%a, #0]\n" freg_single res ireg addr; 1 + fprintf oc " vldr %a, [%a, #0]\n" freg_single res ireg addr; 1 | Mfloat64, [IR addr], [FR res] -> - fprintf oc " fldd %a, [%a, #0]\n" freg res ireg addr; 1 + fprintf oc " vldr %a, [%a, #0]\n" freg res ireg addr; 1 | _ -> assert false @@ -378,10 +451,9 @@ let print_builtin_vload oc chunk args res = let print_builtin_vload_global oc chunk id ofs args res = fprintf oc "%s begin builtin __builtin_volatile_read\n" comment; - let lbl = label_symbol id ofs in - fprintf oc " ldr %a, .L%d @ %a\n" ireg IR14 lbl print_symb_ofs (id, ofs); - let n = print_builtin_vload_common oc chunk [IR IR14] res in - fprintf oc "%s end builtin __builtin_volatile_read\n" comment; n + 1 + let n1 = loadsymbol oc IR14 id ofs in + let n2 = print_builtin_vload_common oc chunk [IR IR14] res in + fprintf oc "%s end builtin __builtin_volatile_read\n" comment; n1 + n2 let print_builtin_vstore_common oc chunk args = match chunk, args with @@ -395,9 +467,9 @@ let print_builtin_vstore_common oc chunk args = fprintf oc " str %a, [%a, #0]\n" ireg src2 ireg addr; fprintf oc " str %a, [%a, #4]\n" ireg src1 ireg addr; 2 | Mfloat32, [IR addr; FR src] -> - fprintf oc " fsts %a, [%a, #0]\n" freg_single src ireg addr; 1 + fprintf oc " vstr %a, [%a, #0]\n" freg_single src ireg addr; 1 | Mfloat64, [IR addr; FR src] -> - fprintf oc " fstd %a, [%a, #0]\n" freg src ireg addr; 1 + fprintf oc " vstr %a, [%a, #0]\n" freg src ireg addr; 1 | _ -> assert false @@ -408,10 +480,9 @@ let print_builtin_vstore oc chunk args = let print_builtin_vstore_global oc chunk id ofs args = fprintf oc "%s begin builtin __builtin_volatile_write\n" comment; - let lbl = label_symbol id ofs in - fprintf oc " ldr %a, .L%d @ %a\n" ireg IR14 lbl print_symb_ofs (id, ofs); - let n = print_builtin_vstore_common oc chunk (IR IR14 :: args) in - fprintf oc "%s end builtin __builtin_volatile_write\n" comment; n + 1 + let n1 = loadsymbol oc IR14 id ofs in + let n2 = print_builtin_vstore_common oc chunk (IR IR14 :: args) in + fprintf oc "%s end builtin __builtin_volatile_write\n" comment; n1 + n2 (* Handling of varargs *) @@ -458,24 +529,30 @@ let print_builtin_inline oc name args res = fprintf oc " clz %a, %a\n" ireg res ireg a1; 1 (* Float arithmetic *) | "__builtin_fabs", [FR a1], [FR res] -> - fprintf oc " fabsd %a, %a\n" freg res freg a1; 1 + fprintf oc " vabs.f64 %a, %a\n" freg res freg a1; 1 | "__builtin_fsqrt", [FR a1], [FR res] -> - fprintf oc " fsqrtd %a, %a\n" freg res freg a1; 1 + fprintf oc " vsqrt.f64 %a, %a\n" freg res freg a1; 1 (* 64-bit integer arithmetic *) | "__builtin_negl", [IR ah; IR al], [IR rh; IR rl] -> - if rl = ah then begin - fprintf oc " rsbs %a, %a, #0\n" ireg IR14 ireg al; - fprintf oc " rsc %a, %a, #0\n" ireg rh ireg ah; - fprintf oc " mov %a, %a\n" ireg rl ireg IR14; 3 - end else begin - fprintf oc " rsbs %a, %a, #0\n" ireg rl ireg al; - fprintf oc " rsc %a, %a, #0\n" ireg rh ireg ah; 2 - end + fprintf oc " rsbs %a, %a, #0\n" + ireg (if rl = ah then IR14 else rl) ireg al; + (* No "rsc" instruction in Thumb2. Emulate based on + rsc a, b, #0 == a <- AddWithCarry(~b, 0, carry) + == mvn a, b; adc a, a, #0 *) + if !Clflags.option_fthumb then begin + fprintf oc " mvn %a, %a\n" ireg rh ireg ah; + fprintf oc " adc %a, %a, #0\n" ireg rh ireg rh + end else begin + fprintf oc " rsc %a, %a, #0\n" ireg rh ireg ah + end; + if rl = ah then + fprintf oc " mov%t %a, %a\n" thumbS ireg rl ireg IR14; + 4 | "__builtin_addl", [IR ah; IR al; IR bh; IR bl], [IR rh; IR rl] -> if rl = ah || rl = bh then begin fprintf oc " adds %a, %a, %a\n" ireg IR14 ireg al ireg bl; fprintf oc " adc %a, %a, %a\n" ireg rh ireg ah ireg bh; - fprintf oc " mov %a, %a\n" ireg rl ireg IR14; 3 + fprintf oc " mov%t %a, %a\n" thumbS ireg rl ireg IR14; 3 end else begin fprintf oc " adds %a, %a, %a\n" ireg rl ireg al ireg bl; fprintf oc " adc %a, %a, %a\n" ireg rh ireg ah ireg bh; 2 @@ -484,18 +561,13 @@ let print_builtin_inline oc name args res = if rl = ah || rl = bh then begin fprintf oc " subs %a, %a, %a\n" ireg IR14 ireg al ireg bl; fprintf oc " sbc %a, %a, %a\n" ireg rh ireg ah ireg bh; - fprintf oc " mov %a, %a\n" ireg rl ireg IR14; 3 + fprintf oc " mov%t %a, %a\n" thumbS ireg rl ireg IR14; 3 end else begin fprintf oc " subs %a, %a, %a\n" ireg rl ireg al ireg bl; fprintf oc " sbc %a, %a, %a\n" ireg rh ireg ah ireg bh; 2 end | "__builtin_mull", [IR a; IR b], [IR rh; IR rl] -> - if rl = a || rh = a then begin - fprintf oc " mov %a, %a\n" ireg IR14 ireg a; - fprintf oc " umull %a, %a, %a, %a\n" ireg rl ireg rh ireg IR14 ireg b; 2 - end else begin - fprintf oc " umull %a, %a, %a, %a\n" ireg rl ireg rh ireg a ireg b; 1 - end + fprintf oc " umull %a, %a, %a, %a\n" ireg rl ireg rh ireg a ireg b; 1 (* Memory accesses *) | "__builtin_read16_reversed", [IR a1], [IR res] -> fprintf oc " ldrh %a, [%a, #0]\n" ireg res ireg a1; @@ -534,16 +606,16 @@ module FixupEABI = struct let fixup_double oc dir f i1 i2 = match dir with | Incoming -> (* f <- (i1, i2) *) - fprintf oc " fmdrr %a, %a, %a\n" freg f ireg i1 ireg i2 + fprintf oc " vmov %a, %a, %a\n" freg f ireg i1 ireg i2 | Outgoing -> (* (i1, i2) <- f *) - fprintf oc " fmrrd %a, %a, %a\n" ireg i1 ireg i2 freg f + fprintf oc " vmov %a, %a, %a\n" ireg i1 ireg i2 freg f let fixup_single oc dir f i = match dir with | Incoming -> (* f <- i *) - fprintf oc " fmsr %a, %a\n" freg_single f ireg i + fprintf oc " vmov %a, %a\n" freg_single f ireg i | Outgoing -> (* i <- f *) - fprintf oc " fmrs %a, %a\n" ireg i freg_single f + fprintf oc " vmov %a, %a\n" ireg i freg_single f let fixup_conventions oc dir tyl = let rec fixup i tyl = @@ -608,11 +680,11 @@ module FixupHF = struct | [] -> 0 | (fr, Double, dr) :: act -> if fr = dr then fixup_outgoing oc act else begin - fprintf oc " fcpyd d%d, d%d\n" dr fr; + fprintf oc " vmov.f64 d%d, d%d\n" dr fr; 1 + fixup_outgoing oc act end | (fr, Single, sr) :: act -> - fprintf oc " fcpys s%d, s%d\n" sr (2*fr); + fprintf oc " vmov.f32 s%d, s%d\n" sr (2*fr); 1 + fixup_outgoing oc act let rec fixup_incoming oc = function @@ -620,13 +692,13 @@ module FixupHF = struct | (fr, Double, dr) :: act -> let n = fixup_incoming oc act in if fr = dr then n else begin - fprintf oc " fcpyd d%d, d%d\n" fr dr; + fprintf oc " vmov.f64 d%d, d%d\n" fr dr; 1 + n end | (fr, Single, sr) :: act -> let n = fixup_incoming oc act in if fr = sr then n else begin - fprintf oc " fcpys s%d, s%d\n" (2*fr) sr; + fprintf oc " vmov.f32 s%d, s%d\n" (2*fr) sr; 1 + n end @@ -658,29 +730,23 @@ let (fixup_arguments, fixup_result) = let shift_op oc = function | SOimm n -> fprintf oc "#%a" coqint n | SOreg r -> ireg oc r - | SOlslimm(r, n) -> fprintf oc "%a, lsl #%a" ireg r coqint n - | SOlslreg(r, r') -> fprintf oc "%a, lsl %a" ireg r ireg r' - | SOlsrimm(r, n) -> fprintf oc "%a, lsr #%a" ireg r coqint n - | SOlsrreg(r, r') -> fprintf oc "%a, lsr %a" ireg r ireg r' - | SOasrimm(r, n) -> fprintf oc "%a, asr #%a" ireg r coqint n - | SOasrreg(r, r') -> fprintf oc "%a, asr %a" ireg r ireg r' - | SOrorimm(r, n) -> fprintf oc "%a, ror #%a" ireg r coqint n - | SOrorreg(r, r') -> fprintf oc "%a, ror %a" ireg r ireg r' - -let shift_addr oc = function - | SAimm n -> fprintf oc "#%a" coqint n - | SAreg r -> ireg oc r - | SAlsl(r, n) -> fprintf oc "%a, lsl #%a" ireg r coqint n - | SAlsr(r, n) -> fprintf oc "%a, lsr #%a" ireg r coqint n - | SAasr(r, n) -> fprintf oc "%a, asr #%a" ireg r coqint n - | SAror(r, n) -> fprintf oc "%a, ror #%a" ireg r coqint n + | SOlsl(r, n) -> fprintf oc "%a, lsl #%a" ireg r coqint n + | SOlsr(r, n) -> fprintf oc "%a, lsr #%a" ireg r coqint n + | SOasr(r, n) -> fprintf oc "%a, asr #%a" ireg r coqint n + | SOror(r, n) -> fprintf oc "%a, ror #%a" ireg r coqint n let print_instruction oc = function (* Core instructions *) | Padd(r1, r2, so) -> - fprintf oc " add %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1 + fprintf oc " add%s %a, %a, %a\n" + (if !Clflags.option_fthumb && r2 <> IR14 then "s" else "") + ireg r1 ireg r2 shift_op so; 1 | Pand(r1, r2, so) -> - fprintf oc " and %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1 + fprintf oc " and%t %a, %a, %a\n" + thumbS ireg r1 ireg r2 shift_op so; 1 + | Pasr(r1, r2, r3) -> + fprintf oc " asr%t %a, %a, %a\n" + thumbS ireg r1 ireg r2 ireg r3; 1 | Pb lbl -> fprintf oc " b %a\n" print_label lbl; 1 | Pbc(bit, lbl) -> @@ -707,141 +773,181 @@ let print_instruction oc = function let n2 = fixup_result oc Incoming sg in n1 + 1 + n2 | Pbic(r1, r2, so) -> - fprintf oc " bic %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1 + fprintf oc " bic%t %a, %a, %a\n" + thumbS ireg r1 ireg r2 shift_op so; 1 | Pcmp(r1, so) -> fprintf oc " cmp %a, %a\n" ireg r1 shift_op so; 1 | Peor(r1, r2, so) -> - fprintf oc " eor %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1 + fprintf oc " eor%t %a, %a, %a\n" + thumbS ireg r1 ireg r2 shift_op so; 1 | Pldr(r1, r2, sa) | Pldr_a(r1, r2, sa) -> - fprintf oc " ldr %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 + fprintf oc " ldr %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pldrb(r1, r2, sa) -> - fprintf oc " ldrb %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 + fprintf oc " ldrb %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pldrh(r1, r2, sa) -> - fprintf oc " ldrh %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 + fprintf oc " ldrh %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pldrsb(r1, r2, sa) -> - fprintf oc " ldrsb %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 + fprintf oc " ldrsb %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pldrsh(r1, r2, sa) -> - fprintf oc " ldrsh %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 + fprintf oc " ldrsh %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 + | Plsl(r1, r2, r3) -> + fprintf oc " lsl%t %a, %a, %a\n" + thumbS ireg r1 ireg r2 ireg r3; 1 + | Plsr(r1, r2, r3) -> + fprintf oc " lsr%t %a, %a, %a\n" + thumbS ireg r1 ireg r2 ireg r3; 1 | Pmla(r1, r2, r3, r4) -> fprintf oc " mla %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1 | Pmov(r1, so) -> - fprintf oc " mov %a, %a\n" ireg r1 shift_op so; 1 - | Pmovc(bit, r1, so) -> - fprintf oc " mov%s %a, %a\n" (condition_name bit) ireg r1 shift_op so; 1 + fprintf oc " mov%t %a, %a\n" thumbS ireg r1 shift_op so; 1 + | Pmovw(r1, n) -> + fprintf oc " movw %a, #%a\n" ireg r1 coqint n; 1 + | Pmovt(r1, n) -> + fprintf oc " movt %a, #%a\n" ireg r1 coqint n; 1 | Pmul(r1, r2, r3) -> fprintf oc " mul %a, %a, %a\n" ireg r1 ireg r2 ireg r3; 1 | Pmvn(r1, so) -> - fprintf oc " mvn %a, %a\n" ireg r1 shift_op so; 1 + fprintf oc " mvn%t %a, %a\n" thumbS ireg r1 shift_op so; 1 | Porr(r1, r2, so) -> - fprintf oc " orr %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1 + fprintf oc " orr%t %a, %a, %a\n" + thumbS ireg r1 ireg r2 shift_op so; 1 | Prsb(r1, r2, so) -> - fprintf oc " rsb %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1 + fprintf oc " rsb%t %a, %a, %a\n" + thumbS ireg r1 ireg r2 shift_op so; 1 | Pstr(r1, r2, sa) | Pstr_a(r1, r2, sa) -> - fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; + fprintf oc " str %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; begin match r1, r2, sa with - | IR14, IR13, SAimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n) + | IR14, IR13, SOimm n -> cfi_rel_offset oc "lr" (camlint_of_coqint n) | _ -> () end; 1 | Pstrb(r1, r2, sa) -> - fprintf oc " strb %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 + fprintf oc " strb %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Pstrh(r1, r2, sa) -> - fprintf oc " strh %a, [%a, %a]\n" ireg r1 ireg r2 shift_addr sa; 1 + fprintf oc " strh %a, [%a, %a]\n" ireg r1 ireg r2 shift_op sa; 1 | Psdiv -> fprintf oc " bl __aeabi_idiv\n"; 1 + | Psbfx(r1, r2, lsb, sz) -> + fprintf oc " sbfx %a, %a, #%a, #%a\n" ireg r1 ireg r2 coqint lsb coqint sz; 1 | Psmull(r1, r2, r3, r4) -> fprintf oc " smull %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1 | Psub(r1, r2, so) -> - fprintf oc " sub %a, %a, %a\n" ireg r1 ireg r2 shift_op so; 1 + fprintf oc " sub%t %a, %a, %a\n" + thumbS ireg r1 ireg r2 shift_op so; 1 | Pudiv -> fprintf oc " bl __aeabi_uidiv\n"; 1 | Pumull(r1, r2, r3, r4) -> fprintf oc " umull %a, %a, %a, %a\n" ireg r1 ireg r2 ireg r3 ireg r4; 1 - (* Floating-point coprocessor instructions *) + (* Floating-point VFD instructions *) | Pfcpyd(r1, r2) -> - fprintf oc " fcpyd %a, %a\n" freg r1 freg r2; 1 + fprintf oc " vmov.f64 %a, %a\n" freg r1 freg r2; 1 | Pfabsd(r1, r2) -> - fprintf oc " fabsd %a, %a\n" freg r1 freg r2; 1 + fprintf oc " vabs.f64 %a, %a\n" freg r1 freg r2; 1 | Pfnegd(r1, r2) -> - fprintf oc " fnegd %a, %a\n" freg r1 freg r2; 1 + fprintf oc " vneg.f64 %a, %a\n" freg r1 freg r2; 1 | Pfaddd(r1, r2, r3) -> - fprintf oc " faddd %a, %a, %a\n" freg r1 freg r2 freg r3; 1 + fprintf oc " vadd.f64 %a, %a, %a\n" freg r1 freg r2 freg r3; 1 | Pfdivd(r1, r2, r3) -> - fprintf oc " fdivd %a, %a, %a\n" freg r1 freg r2 freg r3; 1 + fprintf oc " vdiv.f64 %a, %a, %a\n" freg r1 freg r2 freg r3; 1 | Pfmuld(r1, r2, r3) -> - fprintf oc " fmuld %a, %a, %a\n" freg r1 freg r2 freg r3; 1 + fprintf oc " vmul.f64 %a, %a, %a\n" freg r1 freg r2 freg r3; 1 | Pfsubd(r1, r2, r3) -> - fprintf oc " fsubd %a, %a, %a\n" freg r1 freg r2 freg r3; 1 + fprintf oc " vsub.f64 %a, %a, %a\n" freg r1 freg r2 freg r3; 1 | Pflid(r1, f) -> - (* We could make good use of the fconstd instruction, but it's available - in VFD v3 and up, not in v1 nor v2 *) - let lbl = label_float f in - fprintf oc " fldd %a, .L%d @ %.12g\n" freg r1 lbl (camlfloat_of_coqfloat f); 1 + let f = camlint64_of_coqint(Floats.Float.to_bits f) in + if !vfpv3 && is_immediate_float64 f then begin + fprintf oc " vmov.f64 %a, #%F\n" + freg r1 (Int64.float_of_bits f); 1 + (* immediate floats have at most 4 bits of fraction, so they + should print exactly with OCaml's F decimal format. *) + end else if !literals_in_code then begin + let lbl = label_float f in + fprintf oc " vldr %a, .L%d @ %.12g\n" + freg r1 lbl (Int64.float_of_bits f); 1 + end else begin + let lbl = label_float f in + fprintf oc " movw r14, #:lower16:.L%d\n" lbl; + fprintf oc " movt r14, #:upper16:.L%d\n" lbl; + fprintf oc " vldr %a, [r14, #0] @ %.12g\n" + freg r1 (Int64.float_of_bits f); 3 + end | Pfcmpd(r1, r2) -> - fprintf oc " fcmpd %a, %a\n" freg r1 freg r2; - fprintf oc " fmstat\n"; 2 + fprintf oc " vcmp.f64 %a, %a\n" freg r1 freg r2; + fprintf oc " vmrs APSR_nzcv, FPSCR\n"; 2 | Pfcmpzd(r1) -> - fprintf oc " fcmpzd %a\n" freg r1; - fprintf oc " fmstat\n"; 2 + fprintf oc " vcmp.f64 %a, #0\n" freg r1; + fprintf oc " vmrs APSR_nzcv, FPSCR\n"; 2 | Pfsitod(r1, r2) -> - fprintf oc " fmsr %a, %a\n" freg_single r1 ireg r2; - fprintf oc " fsitod %a, %a\n" freg r1 freg_single r1; 2 + fprintf oc " vmov %a, %a\n" freg_single r1 ireg r2; + fprintf oc " vcvt.f64.s32 %a, %a\n" freg r1 freg_single r1; 2 | Pfuitod(r1, r2) -> - fprintf oc " fmsr %a, %a\n" freg_single r1 ireg r2; - fprintf oc " fuitod %a, %a\n" freg r1 freg_single r1; 2 + fprintf oc " vmov %a, %a\n" freg_single r1 ireg r2; + fprintf oc " vcvt.f64.u32 %a, %a\n" freg r1 freg_single r1; 2 | Pftosizd(r1, r2) -> - fprintf oc " ftosizd %a, %a\n" freg_single FR6 freg r2; - fprintf oc " fmrs %a, %a\n" ireg r1 freg_single FR6; 2 + fprintf oc " vcvt.s32.f64 %a, %a\n" freg_single FR6 freg r2; + fprintf oc " vmov %a, %a\n" ireg r1 freg_single FR6; 2 | Pftouizd(r1, r2) -> - fprintf oc " ftouizd %a, %a\n" freg_single FR6 freg r2; - fprintf oc " fmrs %a, %a\n" ireg r1 freg_single FR6; 2 + fprintf oc " vcvt.u32.f64 %a, %a\n" freg_single FR6 freg r2; + fprintf oc " vmov %a, %a\n" ireg r1 freg_single FR6; 2 | Pfabss(r1, r2) -> - fprintf oc " fabss %a, %a\n" freg_single r1 freg_single r2; 1 + fprintf oc " vabs.f32 %a, %a\n" freg_single r1 freg_single r2; 1 | Pfnegs(r1, r2) -> - fprintf oc " fnegs %a, %a\n" freg_single r1 freg_single r2; 1 + fprintf oc " vneg.f32 %a, %a\n" freg_single r1 freg_single r2; 1 | Pfadds(r1, r2, r3) -> - fprintf oc " fadds %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1 + fprintf oc " vadd.f32 %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1 | Pfdivs(r1, r2, r3) -> - fprintf oc " fdivs %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1 + fprintf oc " vdiv.f32 %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1 | Pfmuls(r1, r2, r3) -> - fprintf oc " fmuls %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1 + fprintf oc " vmul.f32 %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1 | Pfsubs(r1, r2, r3) -> - fprintf oc " fsubs %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1 + fprintf oc " vsub.f32 %a, %a, %a\n" freg_single r1 freg_single r2 freg_single r3; 1 | Pflis(r1, f) -> - (* We could make good use of the fconsts instruction, but it's available - in VFD v3 and up, not in v1 nor v2 *) - let lbl = label_float32 f in - fprintf oc " flds %a, .L%d @ %.12g\n" freg_single r1 lbl (camlfloat_of_coqfloat32 f); 1 + let f = camlint_of_coqint(Floats.Float32.to_bits f) in + if !vfpv3 && is_immediate_float32 f then begin + fprintf oc " vmov.f32 %a, #%F\n" + freg_single r1 (Int32.float_of_bits f); 1 + (* immediate floats have at most 4 bits of fraction, so they + should print exactly with OCaml's F decimal format. *) + end else if !literals_in_code then begin + let lbl = label_float32 f in + fprintf oc " vldr %a, .L%d @ %.12g\n" + freg_single r1 lbl (Int32.float_of_bits f); 1 + end else begin + fprintf oc " movw r14, #%ld\n" (Int32.logand f 0xFFFFl); + fprintf oc " movt r14, #%ld\n" (Int32.shift_right_logical f 16); + fprintf oc " vmov %a, r14 @ %.12g\n" + freg_single r1 (Int32.float_of_bits f); 3 + end | Pfcmps(r1, r2) -> - fprintf oc " fcmps %a, %a\n" freg_single r1 freg_single r2; - fprintf oc " fmstat\n"; 2 + fprintf oc " vcmp.f32 %a, %a\n" freg_single r1 freg_single r2; + fprintf oc " vmrs APSR_nzcv, FPSCR\n"; 2 | Pfcmpzs(r1) -> - fprintf oc " fcmpzs %a\n" freg_single r1; - fprintf oc " fmstat\n"; 2 + fprintf oc " vcmp.f32 %a, #0\n" freg_single r1; + fprintf oc " vmrs APSR_nzcv, FPSCR\n"; 2 | Pfsitos(r1, r2) -> - fprintf oc " fmsr %a, %a\n" freg_single r1 ireg r2; - fprintf oc " fsitos %a, %a\n" freg_single r1 freg_single r1; 2 + fprintf oc " vmov %a, %a\n" freg_single r1 ireg r2; + fprintf oc " vcvt.f32.s32 %a, %a\n" freg_single r1 freg_single r1; 2 | Pfuitos(r1, r2) -> - fprintf oc " fmsr %a, %a\n" freg_single r1 ireg r2; - fprintf oc " fuitos %a, %a\n" freg_single r1 freg_single r1; 2 + fprintf oc " vmov %a, %a\n" freg_single r1 ireg r2; + fprintf oc " vcvt.f32.u32 %a, %a\n" freg_single r1 freg_single r1; 2 | Pftosizs(r1, r2) -> - fprintf oc " ftosizs %a, %a\n" freg_single FR6 freg_single r2; - fprintf oc " fmrs %a, %a\n" ireg r1 freg_single FR6; 2 + fprintf oc " vcvt.s32.f32 %a, %a\n" freg_single FR6 freg_single r2; + fprintf oc " vmov %a, %a\n" ireg r1 freg_single FR6; 2 | Pftouizs(r1, r2) -> - fprintf oc " ftouizs %a, %a\n" freg_single FR6 freg_single r2; - fprintf oc " fmrs %a, %a\n" ireg r1 freg_single FR6; 2 + fprintf oc " vcvt.u32.f32 %a, %a\n" freg_single FR6 freg_single r2; + fprintf oc " vmov %a, %a\n" ireg r1 freg_single FR6; 2 | Pfcvtsd(r1, r2) -> - fprintf oc " fcvtsd %a, %a\n" freg_single r1 freg r2; 1 + fprintf oc " vcvt.f32.f64 %a, %a\n" freg_single r1 freg r2; 1 | Pfcvtds(r1, r2) -> - fprintf oc " fcvtds %a, %a\n" freg r1 freg_single r2; 1 + fprintf oc " vcvt.f64.f32 %a, %a\n" freg r1 freg_single r2; 1 | Pfldd(r1, r2, n) | Pfldd_a(r1, r2, n) -> - fprintf oc " fldd %a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1 + fprintf oc " vldr %a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1 | Pflds(r1, r2, n) -> - fprintf oc " flds %a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1 + fprintf oc " vldr %a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1 | Pfstd(r1, r2, n) | Pfstd_a(r1, r2, n) -> - fprintf oc " fstd %a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1 + fprintf oc " vstr %a, [%a, #%a]\n" freg r1 ireg r2 coqint n; 1 | Pfsts(r1, r2, n) -> - fprintf oc " fsts %a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1 + fprintf oc " vstr %a, [%a, #%a]\n" freg_single r1 ireg r2 coqint n; 1 (* Pseudo-instructions *) | Pallocframe(sz, ofs) -> fprintf oc " mov r12, sp\n"; @@ -866,17 +972,32 @@ let print_instruction oc = function | Plabel lbl -> fprintf oc "%a:\n" print_label lbl; 0 | Ploadsymbol(r1, id, ofs) -> - let lbl = label_symbol id ofs in - fprintf oc " ldr %a, .L%d @ %a\n" - ireg r1 lbl print_symb_ofs (id, ofs); 1 + loadsymbol oc r1 id ofs + | Pmovite(cond, r1, ifso, ifnot) -> + fprintf oc " ite %s\n" (condition_name cond); + fprintf oc " mov%s %a, %a\n" + (condition_name cond) ireg r1 shift_op ifso; + fprintf oc " mov%s %a, %a\n" + (neg_condition_name cond) ireg r1 shift_op ifnot; 2 | Pbtbl(r, tbl) -> - fprintf oc " mov r14, %a, lsl #2\n" ireg r; - fprintf oc " ldr pc, [pc, r14]\n"; - fprintf oc " mov r0, r0\n"; (* no-op *) - List.iter - (fun l -> fprintf oc " .word %a\n" print_label l) - tbl; - 3 + List.length tbl + if !Clflags.option_fthumb then begin + let lbl = new_label() in + fprintf oc " adr r14, .L%d\n" lbl; + fprintf oc " add r14, r14, %a, lsl #2\n" ireg r; + fprintf oc " mov pc, r14\n"; + fprintf oc ".L%d:\n" lbl; + List.iter + (fun l -> fprintf oc " b.w %a\n" print_label l) + tbl; + 3 + List.length tbl + end else begin + fprintf oc " add pc, pc, %a, lsl #2\n" ireg r; + fprintf oc " nop\n"; + List.iter + (fun l -> fprintf oc " b %a\n" print_label l) + tbl; + 2 + List.length tbl + end | Pbuiltin(ef, args, res) -> begin match ef with | EF_builtin(name, sg) -> @@ -938,25 +1059,31 @@ let print_function oc name fn = reset_constants(); current_function_sig := fn.fn_sig; currpos := 0; - let text = + let (text, lit) = match C2C.atom_sections name with - | t :: _ -> t - | _ -> Section_text in + | t :: l :: _ -> (t, l) + | _ -> (Section_text, Section_literal) in section oc text; let alignment = match !Clflags.option_falignfunctions with Some n -> n | None -> 4 in fprintf oc " .balign %d\n" alignment; if not (C2C.atom_is_static name) then fprintf oc " .global %a\n" print_symb name; + if !Clflags.option_fthumb then + fprintf oc " .thumb_func\n"; fprintf oc "%a:\n" print_symb name; print_location oc (C2C.atom_location name); cfi_startproc oc; ignore (fixup_arguments oc Incoming fn.fn_sig); print_instructions oc fn.fn_code; - emit_constants oc; + if !literals_in_code then emit_constants oc; cfi_endproc oc; fprintf oc " .type %a, %%function\n" print_symb name; - fprintf oc " .size %a, . - %a\n" print_symb name print_symb name + fprintf oc " .size %a, . - %a\n" print_symb name print_symb name; + if not !literals_in_code && !size_constants > 0 then begin + section oc lit; + emit_constants oc + end (* Data *) @@ -1031,9 +1158,14 @@ let print_globdef oc (name, gdef) = | Gvar v -> print_var oc name v let print_program oc p = -(* fprintf oc " .fpu vfp\n"; *) PrintAnnot.print_version_and_options oc comment; Hashtbl.clear filename_num; + fprintf oc " .syntax unified\n"; + fprintf oc " .arch %s\n" + (if !Clflags.option_fthumb then "armv7" else "armv6"); + fprintf oc " .fpu %s\n" + (if !vfpv3 then "vfpv3-d16" else "vfpv2"); + fprintf oc " .%s\n" (if !Clflags.option_fthumb then "thumb" else "arm"); List.iter (print_globdef oc) p.prog_defs diff --git a/arm/SelectOp.vp b/arm/SelectOp.vp index ad8a945..6102d82 100644 --- a/arm/SelectOp.vp +++ b/arm/SelectOp.vp @@ -454,7 +454,7 @@ Definition can_use_Aindexed2 (chunk: memory_chunk): bool := | Many64 => false end. -Definition can_use_Aindexed2shift (chunk: memory_chunk): bool := +Definition can_use_Aindexed2shift (chunk: memory_chunk) (s: shift): bool := match chunk with | Mint8signed => false | Mint8unsigned => true @@ -466,14 +466,20 @@ Definition can_use_Aindexed2shift (chunk: memory_chunk): bool := | Mfloat64 => false | Many32 => true | Many64 => false - end. + end && + (if thumb tt then + match s with + | Slsl s => Int.ltu s (Int.repr 4) + | _ => false + end + else true). Nondetfunction addressing (chunk: memory_chunk) (e: expr) := match e with | Eop (Oaddrstack n) Enil => (Ainstack n, Enil) | Eop (Oaddimm n) (e1:::Enil) => (Aindexed n, e1:::Enil) | Eop (Oaddshift s) (e1:::e2:::Enil) => - if can_use_Aindexed2shift chunk + if can_use_Aindexed2shift chunk s then (Aindexed2shift s, e1:::e2:::Enil) else (Aindexed Int.zero, Eop (Oaddshift s) (e1:::e2:::Enil) ::: Enil) | Eop Oadd (e1:::e2:::Enil) => diff --git a/driver/Clflags.ml b/driver/Clflags.ml index 610d807..7104c32 100644 --- a/driver/Clflags.ml +++ b/driver/Clflags.ml @@ -28,6 +28,7 @@ let option_falignfunctions = ref (None: int option) let option_falignbranchtargets = ref 0 let option_faligncondbranchs = ref 0 let option_finline_asm = ref false +let option_fthumb = ref false let option_Osize = ref false let option_dparse = ref false let option_dcmedium = ref false diff --git a/driver/Compopts.v b/driver/Compopts.v index 205f768..01109f5 100644 --- a/driver/Compopts.v +++ b/driver/Compopts.v @@ -30,4 +30,5 @@ Parameter va_strict: unit -> bool. (** Flag -ftailcalls. For tail call optimization. *) Parameter eliminate_tailcalls: unit -> bool. - +(** Flag -fthumb. For the ARM back-end. *) +Parameter thumb: unit -> bool. diff --git a/driver/Driver.ml b/driver/Driver.ml index 88871f7..556a476 100644 --- a/driver/Driver.ml +++ b/driver/Driver.ml @@ -429,6 +429,7 @@ Code generation options: (use -fno- to turn off -f) : -O Optimize for speed [on by default] -Os Optimize for code size -ffpu Use FP registers for some integer operations [on] + -fthumb (ARM only) Use Thumb2 instruction encoding -fsmall-data Set maximal size for allocation in small data area -fsmall-const Set maximal size for allocation in small constant area -ffloat-const-prop Control constant propagation of floats @@ -552,6 +553,7 @@ let cmdline_actions = @ f_opt "inline-asm" option_finline_asm @ f_opt "fpu" option_ffpu @ f_opt "sse" option_ffpu (* backward compatibility *) + @ f_opt "thumb" option_fthumb let _ = Gc.set { (Gc.get()) with diff --git a/extraction/extraction.v b/extraction/extraction.v index 0f23264..f0033af 100644 --- a/extraction/extraction.v +++ b/extraction/extraction.v @@ -93,6 +93,8 @@ Extract Constant Compopts.generate_float_constants => "fun _ -> !Clflags.option_ffloatconstprop >= 2". Extract Constant Compopts.eliminate_tailcalls => "fun _ -> !Clflags.option_ftailcalls". +Extract Constant Compopts.thumb => + "fun _ -> !Clflags.option_fthumb". (* Compiler *) Extract Constant Compiler.print_Clight => "PrintClight.print_if". diff --git a/runtime/arm/i64_dtos.S b/runtime/arm/i64_dtos.S index e454fd4..638a5ff 100644 --- a/runtime/arm/i64_dtos.S +++ b/runtime/arm/i64_dtos.S @@ -34,66 +34,67 @@ @ Helper functions for 64-bit integer arithmetic. ARM version. - .text +#include "sysdeps.h" @@@ Conversion from double float to signed 64-bit integer - .global __i64_dtos -__i64_dtos: -#ifdef VARIANT_hardfloat - fmrrd r0, r1, d0 +FUNCTION(__i64_dtos) +#ifndef VARIANT_eabi + vmov r0, r1, d0 #endif - mov r12, r1, asr #31 @ save sign of result in r12 + ASR r12, r1, #31 @ save sign of result in r12 @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 @ note: 1023 + 52 = 1075 = 1024 + 51 @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 - mov r2, r1, lsl #1 - mov r2, r2, lsr #21 - sub r2, r2, #51 - sub r2, r2, #1024 + LSL r2, r1, #1 + LSR r2, r2, #21 + SUB r2, r2, #51 + SUB r2, r2, #1024 @ check range of exponent cmn r2, #52 @ if EXP < -52, |double| is < 1.0 blt 1f cmp r2, #11 @ if EXP >= 63 - 52, |double| is >= 2^63 bge 2f @ extract true mantissa - bic r1, r1, #0xFF000000 - bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 - orr r1, r1, #0x00100000 @ HI |= 0x00100000 + BIC r1, r1, #0xFF000000 + BIC r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 + ORR r1, r1, #0x00100000 @ HI |= 0x00100000 @ shift it appropriately cmp r2, #0 blt 3f @ EXP >= 0: shift left by EXP. Note that EXP < 12 rsb r3, r2, #32 @ r3 = 32 - amount - mov r1, r1, lsl r2 - orr r1, r1, r0, lsr r3 - mov r0, r0, lsl r2 + LSL r1, r1, r2 + LSR r3, r0, r3 + ORR r1, r1, r3 + LSL r0, r0, r2 b 4f @ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32 -3: rsb r2, r2, #0 @ r2 = -EXP - shift amount - rsb r3, r2, #32 @ r3 = 32 - amount - mov r0, r0, lsr r2 - orr r0, r0, r1, lsl r3 - sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s) - orr r0, r0, r1, lsr r3 - mov r1, r1, lsr r2 +3: RSB r2, r2, #0 @ r2 = -EXP = shift amount + RSB r3, r2, #32 @ r3 = 32 - amount + LSR r0, r0, r2 + LSL r3, r1, r3 + ORR r0, r0, r3 + SUB r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s) + LSR r3, r1, r3 + ORR r0, r0, r3 + LSR r1, r1, r2 @ apply sign to result -4: eor r0, r0, r12 - eor r1, r1, r12 +4: EOR r0, r0, r12 + EOR r1, r1, r12 subs r0, r0, r12 sbc r1, r1, r12 bx lr @ special cases -1: mov r0, #0 @ result is 0 - mov r1, #0 +1: MOV r0, #0 @ result is 0 + MOV r1, #0 bx lr -2: cmp r4, #0 +2: cmp r12, #0 blt 6f mvn r0, #0 @ result is 0x7F....FF (MAX_SINT) - mov r1, r0, lsr #1 + LSR r1, r0, #1 bx lr -6: mov r0, #0 @ result is 0x80....00 (MIN_SINT) - mov r1, #0x80000000 +6: MOV r0, #0 @ result is 0x80....00 (MIN_SINT) + MOV r1, #0x80000000 bx lr - .type __i64_dtos, %function - .size __i64_dtos, . - __i64_dtos +ENDFUNCTION(__i64_dtos) diff --git a/runtime/arm/i64_dtou.S b/runtime/arm/i64_dtou.S index a9e7b65..1c632d2 100644 --- a/runtime/arm/i64_dtou.S +++ b/runtime/arm/i64_dtou.S @@ -34,58 +34,58 @@ @ Helper functions for 64-bit integer arithmetic. ARM version. - .text +#include "sysdeps.h" @@@ Conversion from double float to unsigned 64-bit integer - .global __i64_dtou -__i64_dtou: -#ifdef VARIANT_hardfloat - fmrrd r0, r1, d0 +FUNCTION(__i64_dtou) +#ifndef VARIANT_eabi + vmov r0, r1, d0 #endif cmp r1, #0 @ is double < 0 ? blt 1f @ then it converts to 0 @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 @ note: 1023 + 52 = 1075 = 1024 + 51 @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 - mov r2, r1, lsl #1 - mov r2, r2, lsr #21 - sub r2, r2, #51 - sub r2, r2, #1024 + LSL r2, r1, #1 + LSR r2, r2, #21 + SUB r2, r2, #51 + SUB r2, r2, #1024 @ check range of exponent cmn r2, #52 @ if EXP < -52, double is < 1.0 blt 1f cmp r2, #12 @ if EXP >= 64 - 52, double is >= 2^64 bge 2f @ extract true mantissa - bic r1, r1, #0xFF000000 - bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 - orr r1, r1, #0x00100000 @ HI |= 0x00100000 + BIC r1, r1, #0xFF000000 + BIC r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 + ORR r1, r1, #0x00100000 @ HI |= 0x00100000 @ shift it appropriately cmp r2, #0 blt 3f @ EXP >= 0: shift left by EXP. Note that EXP < 12 rsb r3, r2, #32 @ r3 = 32 - amount - mov r1, r1, lsl r2 - orr r1, r1, r0, lsr r3 - mov r0, r0, lsl r2 + LSL r1, r1, r2 + LSR r3, r0, r3 + ORR r1, r1, r3 + LSL r0, r0, r2 bx lr @ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32 -3: rsb r2, r2, #0 @ r2 = -EXP - shift amount - rsb r3, r2, #32 @ r3 = 32 - amount - mov r0, r0, lsr r2 - orr r0, r0, r1, lsl r3 - sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s) - orr r0, r0, r1, lsr r3 - mov r1, r1, lsr r2 +3: RSB r2, r2, #0 @ r2 = -EXP = shift amount + RSB r3, r2, #32 @ r3 = 32 - amount + LSR r0, r0, r2 + LSL r3, r1, r3 + ORR r0, r0, r3 + SUB r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s) + LSR r3, r1, r3 + ORR r0, r0, r3 + LSR r1, r1, r2 bx lr @ special cases -1: mov r0, #0 @ result is 0 - mov r1, #0 +1: MOV r0, #0 @ result is 0 + MOV r1, #0 bx lr 2: mvn r0, #0 @ result is 0xFF....FF (MAX_UINT) - mvn r1, #0 + MOV r1, r0 bx lr - .type __i64_dtou, %function - .size __i64_dtou, . - __i64_dtou - +ENDFUNCTION(__i64_dtou) diff --git a/runtime/arm/i64_sar.S b/runtime/arm/i64_sar.S new file mode 100644 index 0000000..1bbd8a7 --- /dev/null +++ b/runtime/arm/i64_sar.S @@ -0,0 +1,57 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +#include "sysdeps.h" + +@@@ Shift right signed + +FUNCTION(__i64_sar) + AND r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + LSR r0, r0, r2 + LSL r3, r1, r3 + ORR r0, r1, r3 + ASR r1, r1, r2 + bx lr +1: + SUB r2, r2, #32 + ASR r0, r1, r2 + ASR r1, r1, #31 + bx lr +ENDFUNCTION(__i64_sar) + + diff --git a/runtime/arm/i64_sar.s b/runtime/arm/i64_sar.s deleted file mode 100644 index a96d092..0000000 --- a/runtime/arm/i64_sar.s +++ /dev/null @@ -1,57 +0,0 @@ -@ ***************************************************************** -@ -@ The Compcert verified compiler -@ -@ Xavier Leroy, INRIA Paris-Rocquencourt -@ -@ Copyright (c) 2013 Institut National de Recherche en Informatique et -@ en Automatique. -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions are met: -@ * Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ * Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ * Neither the name of the nor the -@ names of its contributors may be used to endorse or promote products -@ derived from this software without specific prior written permission. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -@ -@ ********************************************************************* - -@ Helper functions for 64-bit integer arithmetic. ARM version. - - .text - -@@@ Shift right signed - - .global __i64_sar -__i64_sar: - and r2, r2, #63 @ normalize amount to 0...63 - rsbs r3, r2, #32 @ r3 = 32 - amount - ble 1f @ branch if <= 0, namely if amount >= 32 - mov r0, r0, lsr r2 - orr r0, r1, lsl r3 - mov r1, r1, asr r2 - bx lr -1: - sub r2, r2, #32 - mov r0, r1, asr r2 - mov r1, r1, asr #31 - bx lr - .type __i64_sar, %function - .size __i64_sar, . - __i64_sar - diff --git a/runtime/arm/i64_sdiv.S b/runtime/arm/i64_sdiv.S new file mode 100644 index 0000000..dd88c12 --- /dev/null +++ b/runtime/arm/i64_sdiv.S @@ -0,0 +1,61 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +#include "sysdeps.h" + +@@@ Signed division + +FUNCTION(__i64_sdiv) + push {r4, r5, r6, r7, r8, r10, lr} + ASR r4, r1, #31 @ r4 = sign of N + ASR r5, r3, #31 @ r5 = sign of D + EOR r10, r4, r5 @ r10 = sign of result + EOR r0, r0, r4 @ take absolute value of N + EOR r1, r1, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) + subs r0, r0, r4 + sbc r1, r1, r4 + EOR r2, r2, r5 @ take absolute value of D + EOR r3, r3, r5 + subs r2, r2, r5 + sbc r3, r3, r5 + bl __i64_udivmod @ do unsigned division + EOR r0, r4, r10 @ apply expected sign + EOR r1, r5, r10 + subs r0, r0, r10 + sbc r1, r1, r10 + pop {r4, r5, r6, r7, r8, r10, lr} + bx lr +ENDFUNCTION(__i64_sdiv) diff --git a/runtime/arm/i64_sdiv.s b/runtime/arm/i64_sdiv.s deleted file mode 100644 index e96008d..0000000 --- a/runtime/arm/i64_sdiv.s +++ /dev/null @@ -1,66 +0,0 @@ -@ ***************************************************************** -@ -@ The Compcert verified compiler -@ -@ Xavier Leroy, INRIA Paris-Rocquencourt -@ -@ Copyright (c) 2013 Institut National de Recherche en Informatique et -@ en Automatique. -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions are met: -@ * Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ * Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ * Neither the name of the nor the -@ names of its contributors may be used to endorse or promote products -@ derived from this software without specific prior written permission. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -@ -@ ********************************************************************* - -@ Helper functions for 64-bit integer arithmetic. ARM version. - - .text - -@@@ Signed division - - .global __i64_sdiv -__i64_sdiv: - push {r4, r5, r6, r7, r8, r10, lr} - eor r10, r1, r3 @ r10 = sign of result - mov r4, r1, asr #31 @ take absolute value of N - eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) - eor r1, r1, r4 - subs r0, r0, r4 - sbc r1, r1, r4 - mov r4, r3, asr #31 @ take absolute value of D - eor r2, r2, r4 - eor r3, r3, r4 - subs r2, r2, r4 - sbc r3, r3, r4 - bl __i64_udivmod @ do unsigned division - mov r0, r4 - mov r1, r5 - eor r0, r0, r10, asr#31 @ apply expected sign - eor r1, r1, r10, asr#31 - subs r0, r0, r10, asr#31 - sbc r1, r1, r10, asr#31 - pop {r4, r5, r6, r7, r8, r10, lr} - bx lr - .type __i64_sdiv, %function - .size __i64_sdiv, . - __i64_sdiv - diff --git a/runtime/arm/i64_shl.S b/runtime/arm/i64_shl.S new file mode 100644 index 0000000..66569d3 --- /dev/null +++ b/runtime/arm/i64_shl.S @@ -0,0 +1,71 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +#include "sysdeps.h" + +@@@ Shift left + +@ Note on ARM shifts: the shift amount is taken modulo 256. +@ If shift amount mod 256 >= 32, the shift produces 0. + +@ Algorithm: +@ RH = (XH << N) | (XL >> (32-N) | (XL << (N-32)) +@ RL = XL << N +@ If N = 0: +@ RH = XH | 0 | 0 +@ RL = XL +@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 2s5 <= N-32 mod 256 <= 255 +@ RH = (XH << N) | (XL >> (32-N) | 0 +@ RL = XL << N +@ If N = 32: +@ RH = 0 | XL | 0 +@ RL = 0 +@ If 33 <= N <= 63: 225 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31 +@ RH = 0 | 0 | (XL << (N-32)) +@ RL = 0 + +FUNCTION(__i64_shl) + AND r2, r2, #63 @ normalize amount to 0...63 + RSB r3, r2, #32 @ r3 = 32 - amount + LSL r1, r1, r2 + LSR r3, r0, r3 + ORR r1, r1, r3 + SUB r3, r2, #32 @ r3 = amount - 32 + LSL r3, r0, r3 + ORR r1, r1, r3 + LSL r0, r0, r2 + bx lr +ENDFUNCTION(__i64_shl) diff --git a/runtime/arm/i64_shl.s b/runtime/arm/i64_shl.s deleted file mode 100644 index afd92db..0000000 --- a/runtime/arm/i64_shl.s +++ /dev/null @@ -1,72 +0,0 @@ -@ ***************************************************************** -@ -@ The Compcert verified compiler -@ -@ Xavier Leroy, INRIA Paris-Rocquencourt -@ -@ Copyright (c) 2013 Institut National de Recherche en Informatique et -@ en Automatique. -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions are met: -@ * Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ * Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ * Neither the name of the nor the -@ names of its contributors may be used to endorse or promote products -@ derived from this software without specific prior written permission. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -@ -@ ********************************************************************* - -@ Helper functions for 64-bit integer arithmetic. ARM version. - - .text - -@@@ Shift left - -@ Note on ARM shifts: the shift amount is taken modulo 256. -@ If shift amount mod 256 >= 32, the shift produces 0. - -@ Algorithm: -@ RH = (XH << N) | (XL >> (32-N) | (XL << (N-32)) -@ RL = XL << N -@ If N = 0: -@ RH = XH | 0 | 0 -@ RL = XL -@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255 -@ RH = (XH << N) | (XL >> (32-N) | 0 -@ RL = XL << N -@ If N = 32: -@ RH = 0 | XL | 0 -@ RL = 0 -@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31 -@ RH = 0 | 0 | (XL << (N-32)) -@ RL = 0 - - .global __i64_shl -__i64_shl: - and r2, r2, #63 @ normalize amount to 0...63 - rsb r3, r2, #32 @ r3 = 32 - amount - mov r1, r1, lsl r2 - orr r1, r1, r0, lsr r3 - sub r3, r2, #32 @ r3 = amount - 32 - orr r1, r1, r0, lsl r3 - mov r0, r0, lsl r2 - bx lr - .type __i64_shl, %function - .size __i64_shl, . - __i64_shl - diff --git a/runtime/arm/i64_shr.S b/runtime/arm/i64_shr.S new file mode 100644 index 0000000..a5418f4 --- /dev/null +++ b/runtime/arm/i64_shr.S @@ -0,0 +1,71 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +#include "sysdeps.h" + +@@@ Shift right unsigned + +@ Note on ARM shifts: the shift amount is taken modulo 256. +@ If shift amount mod 256 >= 32, the shift produces 0. + +@ Algorithm: +@ RL = (XL >> N) | (XH << (32-N) | (XH >> (N-32)) +@ RH = XH >> N +@ If N = 0: +@ RL = XL | 0 | 0 +@ RH = XH +@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255 +@ RL = (XL >> N) | (XH >> (32-N) | 0 +@ RH = XH >> N +@ If N = 32: +@ RL = 0 | XH | 0 +@ RH = 0 +@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31 +@ RL = 0 | 0 | (XH >> (N-32)) +@ RH = 0 + +FUNCTION(__i64_shr) + AND r2, r2, #63 @ normalize amount to 0...63 + RSB r3, r2, #32 @ r3 = 32 - amount + LSR r0, r0, r2 + LSL r3, r1, r3 + ORR r0, r0, r3 + SUB r3, r2, #32 @ r3 = amount - 32 + LSR r3, r1, r3 + ORR r0, r0, r3 + LSR r1, r1, r2 + bx lr +ENDFUNCTION(__i64_shr) diff --git a/runtime/arm/i64_shr.s b/runtime/arm/i64_shr.s deleted file mode 100644 index 9d60441..0000000 --- a/runtime/arm/i64_shr.s +++ /dev/null @@ -1,72 +0,0 @@ -@ ***************************************************************** -@ -@ The Compcert verified compiler -@ -@ Xavier Leroy, INRIA Paris-Rocquencourt -@ -@ Copyright (c) 2013 Institut National de Recherche en Informatique et -@ en Automatique. -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions are met: -@ * Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ * Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ * Neither the name of the nor the -@ names of its contributors may be used to endorse or promote products -@ derived from this software without specific prior written permission. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -@ -@ ********************************************************************* - -@ Helper functions for 64-bit integer arithmetic. ARM version. - - .text - -@@@ Shift right unsigned - -@ Note on ARM shifts: the shift amount is taken modulo 256. -@ If shift amount mod 256 >= 32, the shift produces 0. - -@ Algorithm: -@ RL = (XL >> N) | (XH << (32-N) | (XH >> (N-32)) -@ RH = XH >> N -@ If N = 0: -@ RL = XL | 0 | 0 -@ RH = XH -@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255 -@ RL = (XL >> N) | (XH >> (32-N) | 0 -@ RH = XH >> N -@ If N = 32: -@ RL = 0 | XH | 0 -@ RH = 0 -@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31 -@ RL = 0 | 0 | (XH >> (N-32)) -@ RH = 0 - - .global __i64_shr -__i64_shr: - and r2, r2, #63 @ normalize amount to 0...63 - rsb r3, r2, #32 @ r3 = 32 - amount - mov r0, r0, lsr r2 - orr r0, r0, r1, lsl r3 - sub r3, r2, #32 @ r3 = amount - 32 - orr r0, r0, r1, lsr r3 - mov r1, r1, lsr r2 - bx lr - .type __i64_shr, %function - .size __i64_shr, . - __i64_shr - diff --git a/runtime/arm/i64_smod.S b/runtime/arm/i64_smod.S new file mode 100644 index 0000000..b109ecc --- /dev/null +++ b/runtime/arm/i64_smod.S @@ -0,0 +1,61 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +#include "sysdeps.h" + +@@@ Signed modulus + +FUNCTION(__i64_smod) + push {r4, r5, r6, r7, r8, r10, lr} + ASR r4, r1, #31 @ r4 = sign of N + ASR r5, r3, #31 @ r5 = sign of D + MOV r10, r4 @ r10 = sign of result + EOR r0, r0, r4 @ take absolute value of N + EOR r1, r1, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) + subs r0, r0, r4 + sbc r1, r1, r4 + EOR r2, r2, r5 @ take absolute value of D + EOR r3, r3, r5 + subs r2, r2, r5 + sbc r3, r3, r5 + bl __i64_udivmod @ do unsigned division + EOR r0, r0, r10 @ apply expected sign + EOR r1, r1, r10 + subs r0, r0, r10 + sbc r1, r1, r10 + pop {r4, r5, r6, r7, r8, r10, lr} + bx lr +ENDFUNCTION(__i64_smod) diff --git a/runtime/arm/i64_smod.s b/runtime/arm/i64_smod.s deleted file mode 100644 index 8e766a0..0000000 --- a/runtime/arm/i64_smod.s +++ /dev/null @@ -1,64 +0,0 @@ -@ ***************************************************************** -@ -@ The Compcert verified compiler -@ -@ Xavier Leroy, INRIA Paris-Rocquencourt -@ -@ Copyright (c) 2013 Institut National de Recherche en Informatique et -@ en Automatique. -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions are met: -@ * Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ * Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ * Neither the name of the nor the -@ names of its contributors may be used to endorse or promote products -@ derived from this software without specific prior written permission. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -@ -@ ********************************************************************* - -@ Helper functions for 64-bit integer arithmetic. ARM version. - - .text - -@@@ Signed modulus - - .global __i64_smod -__i64_smod: - push {r4, r5, r6, r7, r8, r10, lr} - mov r10, r1 @ r10 = sign of result - mov r4, r1, asr#31 @ take absolute value of N - eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) - eor r1, r1, r4 - subs r0, r0, r4 - sbc r1, r1, r4 - mov r4, r3, asr #31 @ take absolute value of D - eor r2, r2, r4 - eor r3, r3, r4 - subs r2, r2, r4 - sbc r3, r3, r4 - bl __i64_udivmod @ do unsigned division - eor r0, r0, r10, asr#31 @ apply expected sign - eor r1, r1, r10, asr#31 - subs r0, r0, r10, asr#31 - sbc r1, r1, r10, asr#31 - pop {r4, r5, r6, r7, r8, r10, lr} - bx lr - .type __i64_smod, %function - .size __i64_smod, . - __i64_smod - diff --git a/runtime/arm/i64_stod.S b/runtime/arm/i64_stod.S index 7e5a06f..81e43e2 100644 --- a/runtime/arm/i64_stod.S +++ b/runtime/arm/i64_stod.S @@ -34,24 +34,23 @@ @ Helper functions for 64-bit integer arithmetic. ARM version. - .text +#include "sysdeps.h" @@@ Conversion from signed 64-bit integer to double float - .global __i64_stod +FUNCTION(__i64_stod) __i64_stod: - fmsr s0, r0 - fuitod d0, s0 @ convert low half to double (unsigned) - fmsr s2, r1 - fsitod d1, s2 @ convert high half to double (signed) - fldd d2, .LC1 @ d2 = 2^32 - fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 + vmov s0, r0 + vcvt.f64.u32 d0, s0 @ convert low half to double (unsigned) + vmov s2, r1 + vcvt.f64.s32 d1, s2 @ convert high half to double (signed) + vldr d2, .LC1 @ d2 = 2^32 + vmla.f64 d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 #ifdef VARIANT_eabi - fmrrd r0, r1, d0 @ return result in r0, r1 + vmov r0, r1, d0 @ return result in r0, r1 #endif bx lr - .type __i64_stod, %function - .size __i64_stod, . - __i64_stod +ENDFUNCTION(__i64_stod) .balign 8 .LC1: .quad 0x41f0000000000000 @ 2^32 in double precision diff --git a/runtime/arm/i64_stof.S b/runtime/arm/i64_stof.S index 3f33f04..f1051f5 100644 --- a/runtime/arm/i64_stof.S +++ b/runtime/arm/i64_stof.S @@ -34,15 +34,14 @@ @ Helper functions for 64-bit integer arithmetic. ARM version. - .text +#include "sysdeps.h" @@@ Conversion from signed 64-bit integer to single float - .global __i64_stof -__i64_stof: +FUNCTION(__i64_stof) @ Check whether -2^53 <= X < 2^53 - mov r2, r1, asr #21 - mov r3, r1, asr #31 @ (r2,r3) = X >> 53 + ASR r2, r1, #21 + ASR r3, r1, #31 @ (r2,r3) = X >> 53 adds r2, r2, #1 adc r3, r3, #0 @ (r2,r3) = X >> 53 + 1 cmp r3, #2 @@ -50,30 +49,29 @@ __i64_stof: @ X is large enough that double rounding can occur. @ Avoid it by nudging X away from the points where double rounding @ occurs (the "round to odd" technique) - mov r2, #0x700 - orr r2, r2, #0xFF @ r2 = 0x7FF - and r3, r0, r2 @ extract bits 0 to 11 of X - add r3, r3, r2 @ r3 = (X & 0x7FF) + 0x7FF + MOV r2, #0x700 + ORR r2, r2, #0xFF @ r2 = 0x7FF + AND r3, r0, r2 @ extract bits 0 to 11 of X + ADD r3, r3, r2 @ r3 = (X & 0x7FF) + 0x7FF @ bit 12 of r3 is 0 if all low 12 bits of X are 0, 1 otherwise @ bits 13-31 of r3 are 0 - orr r0, r0, r3 @ correct bit number 12 of X - bic r0, r0, r2 @ set to 0 bits 0 to 11 of X + ORR r0, r0, r3 @ correct bit number 12 of X + BIC r0, r0, r2 @ set to 0 bits 0 to 11 of X @ Convert to double -1: fmsr s0, r0 - fuitod d0, s0 @ convert low half to double (unsigned) - fmsr s2, r1 - fsitod d1, s2 @ convert high half to double (signed) - fldd d2, .LC1 @ d2 = 2^32 - fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 +1: vmov s0, r0 + vcvt.f64.u32 d0, s0 @ convert low half to double (unsigned) + vmov s2, r1 + vcvt.f64.s32 d1, s2 @ convert high half to double (signed) + vldr d2, .LC1 @ d2 = 2^32 + vmla.f64 d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 @ Round to single - fcvtsd s0, d0 + vcvt.f32.f64 s0, d0 #ifdef VARIANT_eabi @ Return result in r0 - fmrs r0, s0 + vmov r0, s0 #endif bx lr - .type __i64_stof, %function - .size __i64_stof, . - __i64_stof - +ENDFUNCTION(__i64_stof) + .balign 8 .LC1: .quad 0x41f0000000000000 @ 2^32 in double precision diff --git a/runtime/arm/i64_udiv.S b/runtime/arm/i64_udiv.S new file mode 100644 index 0000000..3b59944 --- /dev/null +++ b/runtime/arm/i64_udiv.S @@ -0,0 +1,48 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +#include "sysdeps.h" + +@@@ Unsigned division + +FUNCTION(__i64_udiv) + push {r4, r5, r6, r7, r8, lr} + bl __i64_udivmod + MOV r0, r4 + MOV r1, r5 + pop {r4, r5, r6, r7, r8, lr} + bx lr +ENDFUNCTION(__i64_udiv) diff --git a/runtime/arm/i64_udiv.s b/runtime/arm/i64_udiv.s deleted file mode 100644 index de41e00..0000000 --- a/runtime/arm/i64_udiv.s +++ /dev/null @@ -1,51 +0,0 @@ -@ ***************************************************************** -@ -@ The Compcert verified compiler -@ -@ Xavier Leroy, INRIA Paris-Rocquencourt -@ -@ Copyright (c) 2013 Institut National de Recherche en Informatique et -@ en Automatique. -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions are met: -@ * Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ * Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ * Neither the name of the nor the -@ names of its contributors may be used to endorse or promote products -@ derived from this software without specific prior written permission. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -@ -@ ********************************************************************* - -@ Helper functions for 64-bit integer arithmetic. ARM version. - - .text - -@@@ Unsigned division - - .global __i64_udiv -__i64_udiv: - push {r4, r5, r6, r7, r8, lr} - bl __i64_udivmod - mov r0, r4 - mov r1, r5 - pop {r4, r5, r6, r7, r8, lr} - bx lr - .type __i64_udiv, %function - .size __i64_udiv, . - __i64_udiv - diff --git a/runtime/arm/i64_udivmod.S b/runtime/arm/i64_udivmod.S new file mode 100644 index 0000000..e5373ad --- /dev/null +++ b/runtime/arm/i64_udivmod.S @@ -0,0 +1,79 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +#include "sysdeps.h" + +@@@ Auxiliary function for division and modulus. Don't call from C + +@ On entry: N = (r0, r1) numerator D = (r2, r3) divisor +@ On exit: Q = (r4, r5) quotient R = (r0, r1) remainder +@ Locals: M = (r6, r7) mask TMP = r8 temporary + +FUNCTION(__i64_udivmod) + orrs r8, r2, r3 @ is D == 0? + it eq + bxeq lr @ if so, return with unspecified results + MOV r4, #0 @ Q = 0 + MOV r5, #0 + MOV r6, #1 @ M = 1 + MOV r7, #0 +1: cmp r3, #0 @ while ((signed) D >= 0) ... + blt 2f + subs r8, r0, r2 @ ... and N >= D ... + sbcs r8, r1, r3 + blo 2f + adds r2, r2, r2 @ D = D << 1 + adc r3, r3, r3 + adds r6, r6, r6 @ M = M << 1 + adc r7, r7, r7 + b 1b +2: subs r0, r0, r2 @ N = N - D + sbcs r1, r1, r3 + orr r4, r4, r6 @ Q = Q | M + orr r5, r5, r7 + bhs 3f @ if N was >= D, continue + adds r0, r0, r2 @ otherwise, undo what we just did + adc r1, r1, r3 @ N = N + D + bic r4, r4, r6 @ Q = Q & ~M + bic r5, r5, r7 +3: lsrs r7, r7, #1 @ M = M >> 1 + rrx r6, r6 + lsrs r3, r3, #1 @ D = D >> 1 + rrx r2, r2 + orrs r8, r6, r7 @ repeat while (M != 0) ... + bne 2b + bx lr +ENDFUNCTION(__i64_udivmod) diff --git a/runtime/arm/i64_udivmod.s b/runtime/arm/i64_udivmod.s deleted file mode 100644 index e3d9c87..0000000 --- a/runtime/arm/i64_udivmod.s +++ /dev/null @@ -1,80 +0,0 @@ -@ ***************************************************************** -@ -@ The Compcert verified compiler -@ -@ Xavier Leroy, INRIA Paris-Rocquencourt -@ -@ Copyright (c) 2013 Institut National de Recherche en Informatique et -@ en Automatique. -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions are met: -@ * Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ * Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ * Neither the name of the nor the -@ names of its contributors may be used to endorse or promote products -@ derived from this software without specific prior written permission. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -@ -@ ********************************************************************* - -@ Helper functions for 64-bit integer arithmetic. ARM version. - - .text - -@@@ Auxiliary function for division and modulus. Don't call from C - -@ On entry: N = (r0, r1) numerator D = (r2, r3) divisor -@ On exit: Q = (r4, r5) quotient R = (r0, r1) remainder -@ Locals: M = (r6, r7) mask TMP = r8 temporary - - .global __i64_udivmod -__i64_udivmod: - orrs r8, r2, r3 @ is D == 0? - bxeq lr @ if so, return with unspecified results - mov r4, #0 @ Q = 0 - mov r5, #0 - mov r6, #1 @ M = 1 - mov r7, #0 -1: cmp r3, #0 @ while ((signed) D >= 0) ... - blt 2f - subs r8, r0, r2 @ ... and N >= D ... - sbcs r8, r1, r3 - blo 2f - adds r2, r2, r2 @ D = D << 1 - adc r3, r3, r3 - adds r6, r6, r6 @ M = M << 1 - adc r7, r7, r7 - b 1b -2: subs r0, r0, r2 @ N = N - D - sbcs r1, r1, r3 - orr r4, r4, r6 @ Q = Q | M - orr r5, r5, r7 - bhs 3f @ if N was >= D, continue - adds r0, r0, r2 @ otherwise, undo what we just did - adc r1, r1, r3 @ N = N + D - bic r4, r4, r6 @ Q = Q & ~M - bic r5, r5, r7 -3: movs r7, r7, lsr #1 @ M = M >> 1 - mov r6, r6, rrx - movs r3, r3, lsr #1 @ D = D >> 1 - mov r2, r2, rrx - orrs r8, r6, r7 @ repeat while (M != 0) ... - bne 2b - bx lr - .type __i64_udivmod, %function - .size __i64_udivmod, . - __i64_udivmod diff --git a/runtime/arm/i64_umod.S b/runtime/arm/i64_umod.S new file mode 100644 index 0000000..e59fd20 --- /dev/null +++ b/runtime/arm/i64_umod.S @@ -0,0 +1,46 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +#include "sysdeps.h" + +@@@ Unsigned remainder + +FUNCTION(__i64_umod) + push {r4, r5, r6, r7, r8, lr} + bl __i64_udivmod @ remainder is already in r0,r1 + pop {r4, r5, r6, r7, r8, lr} + bx lr +ENDFUNCTION(__i64_umod) diff --git a/runtime/arm/i64_umod.s b/runtime/arm/i64_umod.s deleted file mode 100644 index 80af56e..0000000 --- a/runtime/arm/i64_umod.s +++ /dev/null @@ -1,49 +0,0 @@ -@ ***************************************************************** -@ -@ The Compcert verified compiler -@ -@ Xavier Leroy, INRIA Paris-Rocquencourt -@ -@ Copyright (c) 2013 Institut National de Recherche en Informatique et -@ en Automatique. -@ -@ Redistribution and use in source and binary forms, with or without -@ modification, are permitted provided that the following conditions are met: -@ * Redistributions of source code must retain the above copyright -@ notice, this list of conditions and the following disclaimer. -@ * Redistributions in binary form must reproduce the above copyright -@ notice, this list of conditions and the following disclaimer in the -@ documentation and/or other materials provided with the distribution. -@ * Neither the name of the nor the -@ names of its contributors may be used to endorse or promote products -@ derived from this software without specific prior written permission. -@ -@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -@ -@ ********************************************************************* - -@ Helper functions for 64-bit integer arithmetic. ARM version. - - .text - -@@@ Unsigned modulus - - .global __i64_umod -__i64_umod: - push {r4, r5, r6, r7, r8, lr} - bl __i64_udivmod @ remainder is already in r0,r1 - pop {r4, r5, r6, r7, r8, lr} - bx lr - .type __i64_umod, %function - .size __i64_umod, . - __i64_umod - diff --git a/runtime/arm/i64_utod.S b/runtime/arm/i64_utod.S index 1110874..b12d7c2 100644 --- a/runtime/arm/i64_utod.S +++ b/runtime/arm/i64_utod.S @@ -34,25 +34,23 @@ @ Helper functions for 64-bit integer arithmetic. ARM version. - .text +#include "sysdeps.h" @@@ Conversion from unsigned 64-bit integer to double float - .global __i64_utod -__i64_utod: - fmsr s0, r0 - fuitod d0, s0 @ convert low half to double (unsigned) - fmsr s2, r1 - fuitod d1, s2 @ convert high half to double (unsigned) - fldd d2, .LC1 @ d2 = 2^32 - fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 +FUNCTION(__i64_utod) +__i64_stod: + vmov s0, r0 + vcvt.f64.u32 d0, s0 @ convert low half to double (unsigned) + vmov s2, r1 + vcvt.f64.u32 d1, s2 @ convert high half to double (unsigned) + vldr d2, .LC1 @ d2 = 2^32 + vmla.f64 d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 #ifdef VARIANT_eabi - fmrrd r0, r1, d0 @ return result in r0, r1 -#endif + vmov r0, r1, d0 @ return result in r0, r1 +#endif bx lr - .type __i64_utod, %function - .size __i64_utod, . - __i64_utod - +ENDFUNCTION(__i64_utod) + .balign 8 .LC1: .quad 0x41f0000000000000 @ 2^32 in double precision - diff --git a/runtime/arm/i64_utof.S b/runtime/arm/i64_utof.S index a959076..711cda0 100644 --- a/runtime/arm/i64_utof.S +++ b/runtime/arm/i64_utof.S @@ -34,42 +34,40 @@ @ Helper functions for 64-bit integer arithmetic. ARM version. - .text +#include "sysdeps.h" @@@ Conversion from unsigned 64-bit integer to single float - .global __i64_utof -__i64_utof: +FUNCTION(__i64_utof) @ Check whether X < 2^53 - movs r2, r1, lsr #21 @ test if X >> 53 == 0 + lsrs r2, r1, #21 @ test if X >> 53 == 0 beq 1f @ X is large enough that double rounding can occur. @ Avoid it by nudging X away from the points where double rounding @ occurs (the "round to odd" technique) - mov r2, #0x700 - orr r2, r2, #0xFF @ r2 = 0x7FF - and r3, r0, r2 @ extract bits 0 to 11 of X - add r3, r3, r2 @ r3 = (X & 0x7FF) + 0x7FF + MOV r2, #0x700 + ORR r2, r2, #0xFF @ r2 = 0x7FF + AND r3, r0, r2 @ extract bits 0 to 11 of X + ADD r3, r3, r2 @ r3 = (X & 0x7FF) + 0x7FF @ bit 12 of r3 is 0 if all low 12 bits of X are 0, 1 otherwise @ bits 13-31 of r3 are 0 - orr r0, r0, r3 @ correct bit number 12 of X - bic r0, r0, r2 @ set to 0 bits 0 to 11 of X + ORR r0, r0, r3 @ correct bit number 12 of X + BIC r0, r0, r2 @ set to 0 bits 0 to 11 of X @ Convert to double -1: fmsr s0, r0 - fuitod d0, s0 @ convert low half to double (unsigned) - fmsr s2, r1 - fuitod d1, s2 @ convert high half to double (unsigned) - fldd d2, .LC1 @ d2 = 2^32 - fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 +1: vmov s0, r0 + vcvt.f64.u32 d0, s0 @ convert low half to double (unsigned) + vmov s2, r1 + vcvt.f64.u32 d1, s2 @ convert high half to double (unsigned) + vldr d2, .LC1 @ d2 = 2^32 + vmla.f64 d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 @ Round to single - fcvtsd s0, d0 + vcvt.f32.f64 s0, d0 #ifdef VARIANT_eabi @ Return result in r0 - fmrs r0, s0 + vmov r0, s0 #endif bx lr - .type __i64_utof, %function - .size __i64_utof, . - __i64_utof +ENDFUNCTION(__i64_utof) .balign 8 .LC1: .quad 0x41f0000000000000 @ 2^32 in double precision diff --git a/runtime/arm/sysdeps.h b/runtime/arm/sysdeps.h new file mode 100644 index 0000000..b72af0f --- /dev/null +++ b/runtime/arm/sysdeps.h @@ -0,0 +1,84 @@ +// ***************************************************************** +// +// The Compcert verified compiler +// +// Xavier Leroy, INRIA Paris-Rocquencourt +// +// Copyright (c) 2013 Institut National de Recherche en Informatique et +// en Automatique. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ********************************************************************* + +// System dependencies + +#ifdef THUMB +#define FUNCTION(f) \ + .text; \ + .thumb; \ + .thumb_func; \ + .global f; \ +f: +#else +#define FUNCTION(f) \ + .text; \ + .arm; \ + .global f; \ +f: +#endif + +#define ENDFUNCTION(f) \ + .type f, %function; .size f, . - f + +// In Thumb2 mode, some instructions have shorter encodings in their "S" form +// (update condition flags). For cases where the condition flags do not +// matter, we use the following macros for these instructions. +// In classic ARM mode, we prefer not to use the "S" form unless necessary. + +#ifdef THUMB +#define THUMB_S(x) x##s +#else +#define THUMB_S(x) x +#endif + +#define ADD THUMB_S(add) +#define AND THUMB_S(and) +#define ASR THUMB_S(asr) +#define BIC THUMB_S(bic) +#define EOR THUMB_S(eor) +#define LSL THUMB_S(lsl) +#define LSR THUMB_S(lsr) +#define MOV THUMB_S(mov) +#define ORR THUMB_S(orr) +#define RSB THUMB_S(rsb) +#define SUB THUMB_S(sub) + + .syntax unified +#ifdef THUMB + .arch armv7 +#else + .arch armv6 +#endif + .fpu vfpv2 diff --git a/runtime/arm/vararg.S b/runtime/arm/vararg.S index 8d431d3..e352582 100644 --- a/runtime/arm/vararg.S +++ b/runtime/arm/vararg.S @@ -34,50 +34,44 @@ @ Helper functions for variadic functions . ARM version - .text +#include "sysdeps.h" @ typedef void * va_list; @ unsigned int __compcert_va_int32(va_list * ap); @ unsigned long long __compcert_va_int64(va_list * ap); @ double __compcert_va_float64(va_list * ap); - .global __compcert_va_int32 -__compcert_va_int32: +FUNCTION(__compcert_va_int32) @ r0 = ap parameter ldr r1, [r0, #0] @ r1 = pointer to next argument - add r1, r1, #4 @ advance ap by 4 + ADD r1, r1, #4 @ advance ap by 4 str r1, [r0, #0] ldr r0, [r1, #-4] @ load next argument and return it in r0 bx lr - .type __compcert_va_int32, %function - .size __compcert_va_int32, . - __compcert_va_int32 +ENDFUNCTION(__compcert_va_int32) - .global __compcert_va_int64 -__compcert_va_int64: +FUNCTION(__compcert_va_int64) @ r0 = ap parameter ldr r1, [r0, #0] @ r1 = pointer to next argument - add r1, r1, #15 @ 8-align and advance by 8 - bic r1, r1, #7 + ADD r1, r1, #15 @ 8-align and advance by 8 + BIC r1, r1, #7 str r1, [r0, #0] @ update ap ldr r0, [r1, #-8] @ load next argument and return it in r0,r1 ldr r1, [r1, #-4] bx lr - .type __compcert_va_int64, %function - .size __compcert_va_int64, . - __compcert_va_int64 +ENDFUNCTION(__compcert_va_int64) - .global __compcert_va_float64 -__compcert_va_float64: +FUNCTION(__compcert_va_float64) @ r0 = ap parameter ldr r1, [r0, #0] @ r1 = pointer to next argument - add r1, r1, #15 @ 8-align and advance by 8 - bic r1, r1, #7 + ADD r1, r1, #15 @ 8-align and advance by 8 + BIC r1, r1, #7 str r1, [r0, #0] @ update ap -#ifdef VARIANT_hardfloat - fldd d0, [r1, #-8] @ load next argument and return it in d0 -#else +#ifdef VARIANT_eabi ldr r0, [r1, #-8] @ load next argument and return it in r0,r1 ldr r1, [r1, #-4] +#else + vldr d0, [r1, #-8] @ load next argument and return it in d0 #endif bx lr - .type __compcert_va_float64, %function - .size __compcert_va_float64, . - __compcert_va_float64 +ENDFUNCTION(__compcert_va_float64) diff --git a/runtime/ia32/sysdeps.h b/runtime/ia32/sysdeps.h index 1aa54d3..9d957a8 100644 --- a/runtime/ia32/sysdeps.h +++ b/runtime/ia32/sysdeps.h @@ -1,38 +1,38 @@ -# ***************************************************************** -# -# The Compcert verified compiler -# -# Xavier Leroy, INRIA Paris-Rocquencourt -# -# Copyright (c) 2013 Institut National de Recherche en Informatique et -# en Automatique. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# ********************************************************************* +// ***************************************************************** +// +// The Compcert verified compiler +// +// Xavier Leroy, INRIA Paris-Rocquencourt +// +// Copyright (c) 2013 Institut National de Recherche en Informatique et +// en Automatique. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of the nor the +// names of its contributors may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// ********************************************************************* -# System dependencies +// System dependencies #if defined(SYS_linux) || defined(SYS_bsd) -- cgit v1.2.3