summaryrefslogtreecommitdiff
path: root/runtime/arm/int64.s
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/arm/int64.s')
-rw-r--r--runtime/arm/int64.s424
1 files changed, 424 insertions, 0 deletions
diff --git a/runtime/arm/int64.s b/runtime/arm/int64.s
new file mode 100644
index 0000000..6b03351
--- /dev/null
+++ b/runtime/arm/int64.s
@@ -0,0 +1,424 @@
+@ *****************************************************************
+@
+@ The Compcert verified compiler
+@
+@ Xavier Leroy, INRIA Paris-Rocquencourt
+@
+@ Copyright (c) 2013 Institut National de Recherche en Informatique et
+@ en Automatique.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are met:
+@ * Redistributions of source code must retain the above copyright
+@ notice, this list of conditions and the following disclaimer.
+@ * Redistributions in binary form must reproduce the above copyright
+@ notice, this list of conditions and the following disclaimer in the
+@ documentation and/or other materials provided with the distribution.
+@ * Neither the name of the <organization> nor the
+@ names of its contributors may be used to endorse or promote products
+@ derived from this software without specific prior written permission.
+@
+@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+@ HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+@
+@ *********************************************************************
+
+@ Helper functions for 64-bit integer arithmetic. ARM version.
+
+@ Calling conventions for R = F(X) or R = F(X,Y):
+@ one or two long arguments: XL in r0, XH in r1, YL in r2, YH in r3
+@ one long argument, one int: XL in r0, XH in r1, Y in r2
+@ one float argument: X in r0, r1
+@ one long result: RL in r0, RH in r1
+@ one float result: R in r0, r1
+@ This is a little-endian convention: the low word is in the
+@ low-numbered register.
+@ Can use r0...r3 and f0...f7 as temporary registers (caller-save)
+
+ .text
+
+@@@ Unsigned comparison
+
+ .global __i64_ucmp
+__i64_ucmp:
+ cmp r1, r3 @ compare high words
+ cmpeq r0, r2 @ if equal, compare low words instead
+ moveq r0, #0 @ res = 0 if eq
+ movhi r0, #1 @ res = 1 if unsigned higher
+ mvnlo r0, #0 @ res = -1 if unsigned lower
+ bx lr
+ .type __i64_ucmp, %function
+ .size __i64_ucmp, . - __i64_ucmp
+
+@@@ Signed comparison
+
+ .global __i64_scmp
+__i64_scmp:
+ cmp r0, r2 @ compare low words (unsigned)
+ moveq r0, #0 @ res = 0 if eq
+ movhi r0, #1 @ res = 1 if unsigned higher
+ mvnlo r0, #0 @ res = -1 if unsigned lower
+ cmp r1, r3 @ compare high words (signed)
+ addgt r0, r0, #2 @ res += 2 if signed greater
+ sublt r0, r0, #2 @ res -= 2 if signed less
+ @ here, r0 = 0 if X == Y
+ @ r0 = -3, -2, -1 if X < Y
+ @ r0 = 1, 2, 3 if X > Y
+ bx lr
+ .type __i64_scmp, %function
+ .size __i64_scmp, . - __i64_scmp
+
+@@@ Opposite
+
+ .global __i64_neg
+__i64_neg:
+ rsbs r0, r0, #0
+ rsc r1, r1, #0
+ bx lr
+ .type __i64_neg, %function
+ .size __i64_neg, . - __i64_neg
+
+@@@ Addition
+
+ .global __i64_add
+__i64_add:
+ adds r0, r0, r2
+ adc r1, r1, r3
+ bx lr
+ .type __i64_add, %function
+ .size __i64_add, . - __i64_add
+
+@@@ Subtraction
+
+ .global __i64_sub
+__i64_sub:
+ subs r0, r0, r2
+ sbc r1, r1, r3
+ bx lr
+ .type __i64_sub, %function
+ .size __i64_sub, . - __i64_sub
+
+@ Note on ARM shifts: the shift amount is taken modulo 256.
+@ Therefore, unsigned shifts by 32 bits or more produce 0.
+
+@@@ Shift left
+
+ .global __i64_shl
+__i64_shl:
+ and r2, r2, #63 @ normalize amount to 0...63
+ rsbs r3, r2, #32 @ r3 = 32 - amount
+ ble 1f @ branch if <= 0, namely if amount >= 32
+ mov r1, r1, lsl r2
+ orr r1, r0, lsr r3
+ mov r0, r0, lsl r2
+ bx lr
+1:
+ sub r2, r2, #32
+ mov r1, r0, lsl r2
+ mov r0, #0
+ bx lr
+ .type __i64_shl, %function
+ .size __i64_shl, . - __i64_shl
+
+@@@ Shift right unsigned
+
+ .global __i64_shr
+__i64_shr:
+ and r2, r2, #63 @ normalize amount to 0...63
+ rsbs r3, r2, #32 @ r3 = 32 - amount
+ ble 1f @ branch if <= 0, namely if amount >= 32
+ mov r0, r0, lsr r2
+ orr r0, r1, lsl r3
+ mov r1, r1, lsr r2
+ bx lr
+1:
+ sub r2, r2, #32
+ mov r0, r1, lsr r2
+ mov r1, #0
+ bx lr
+ .type __i64_shr, %function
+ .size __i64_shr, . - __i64_shr
+
+@@@ Shift right signed
+
+ .global __i64_sar
+__i64_sar:
+ and r2, r2, #63 @ normalize amount to 0...63
+ rsbs r3, r2, #32 @ r3 = 32 - amount
+ ble 1f @ branch if <= 0, namely if amount >= 32
+ mov r0, r0, lsr r2
+ orr r0, r1, lsl r3
+ mov r1, r1, asr r2
+ bx lr
+1:
+ sub r2, r2, #32
+ mov r0, r1, asr r2
+ mov r1, r1, asr #31
+ bx lr
+ .type __i64_sar, %function
+ .size __i64_sar, . - __i64_sar
+
+@@@ Multiplication
+
+ .global __i64_mul
+__i64_mul:
+ push {r4, r5}
+ mov r4, r0 @ save first arg in r4,r5
+ mov r5, r1
+ umull r0, r1, r2, r4 @ 64-bit product of low halves
+ mla r1, r2, r5, r1 @ add 32-bit products low half * high half
+ mla r1, r3, r4, r1 @ to high half of result
+ pop {r4, r5}
+ bx lr
+ .type __i64_mul, %function
+ .size __i64_mul, . - __i64_mul
+
+@@@ Auxiliary function for division and modulus. Not exported.
+
+@ On entry: N = (r0, r1) numerator D = (r2, r3) divisor
+@ On exit: Q = (r4, r5) quotient R = (r0, r1) remainder
+@ Locals: M = (r6, r7) mask TMP = r8 temporary
+
+__i64_udivmod:
+ orrs r8, r2, r3 @ is D == 0?
+ bxeq lr @ if so, return with unspecified results
+ mov r4, #0 @ Q = 0
+ mov r5, #0
+ mov r6, #1 @ M = 1
+ mov r7, #0
+1: cmp r3, #0 @ while ((signed) D >= 0) ...
+ blt 2f
+ subs r8, r0, r2 @ ... and N >= D ...
+ sbcs r8, r1, r3
+ blo 2f
+ adds r2, r2, r2 @ D = D << 1
+ adc r3, r3, r3
+ adds r6, r6, r6 @ M = M << 1
+ adc r7, r7, r7
+ b 1b
+2: subs r0, r0, r2 @ N = N - D
+ sbcs r1, r1, r3
+ orr r4, r4, r6 @ Q = Q | M
+ orr r5, r5, r7
+ bhs 3f @ if N was >= D, continue
+ adds r0, r0, r2 @ otherwise, undo what we just did
+ adc r1, r1, r3 @ N = N + D
+ bic r4, r4, r6 @ Q = Q & ~M
+ bic r5, r5, r7
+3: movs r7, r7, lsr #1 @ M = M >> 1
+ mov r6, r6, rrx
+ movs r3, r3, lsr #1 @ D = D >> 1
+ mov r2, r2, rrx
+ orrs r8, r6, r7 @ repeat while (M != 0) ...
+ bne 2b
+ bx lr
+
+@@@ Unsigned division
+
+ .global __i64_udiv
+__i64_udiv:
+ push {r4, r5, r6, r7, r8, lr}
+ bl __i64_udivmod
+ mov r0, r4
+ mov r1, r5
+ pop {r4, r5, r6, r7, r8, lr}
+ bx lr
+ .type __i64_udiv, %function
+ .size __i64_udiv, . - __i64_udiv
+
+@@@ Unsigned modulus
+
+ .global __i64_umod
+__i64_umod:
+ push {r4, r5, r6, r7, r8, lr}
+ bl __i64_udivmod @ remainder is already in r0,r1
+ pop {r4, r5, r6, r7, r8, lr}
+ bx lr
+ .type __i64_umod, %function
+ .size __i64_umod, . - __i64_umod
+
+ .global __i64_sdiv
+__i64_sdiv:
+ push {r4, r5, r6, r7, r8, r10, lr}
+ eor r10, r1, r3 @ r10 = sign of result
+ mov r4, r1, asr #31 @ take absolute value of N
+ eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
+ eor r1, r1, r4
+ subs r0, r0, r4
+ sbc r1, r1, r4
+ mov r4, r3, asr #31 @ take absolute value of D
+ eor r2, r2, r4
+ eor r3, r3, r4
+ subs r2, r2, r4
+ sbc r3, r3, r4
+ bl __i64_udivmod @ do unsigned division
+ mov r0, r4
+ mov r1, r5
+ eor r0, r0, r10, asr#31 @ apply expected sign
+ eor r1, r1, r10, asr#31
+ subs r0, r0, r10, asr#31
+ sbc r1, r1, r10, asr#31
+ pop {r4, r5, r6, r7, r8, r10, lr}
+ bx lr
+ .type __i64_sdiv, %function
+ .size __i64_sdiv, . - __i64_sdiv
+
+@@@ Signed modulus
+
+ .global __i64_smod
+__i64_smod:
+ push {r4, r5, r6, r7, r8, r10, lr}
+ mov r10, r1 @ r10 = sign of result
+ mov r4, r1, asr#31 @ take absolute value of N
+ eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
+ eor r1, r1, r4
+ subs r0, r0, r4
+ sbc r1, r1, r4
+ mov r4, r3, asr #31 @ take absolute value of D
+ eor r2, r2, r4
+ eor r3, r3, r4
+ subs r2, r2, r4
+ sbc r3, r3, r4
+ bl __i64_udivmod @ do unsigned division
+ eor r0, r0, r10, asr#31 @ apply expected sign
+ eor r1, r1, r10, asr#31
+ subs r0, r0, r10, asr#31
+ sbc r1, r1, r10, asr#31
+ pop {r4, r5, r6, r7, r8, r10, lr}
+ bx lr
+ .type __i64_smod, %function
+ .size __i64_smod, . - __i64_smod
+
+@@@ Conversion from unsigned 64-bit integer to double float
+
+ .global __i64_utod
+__i64_utod:
+ fmsr s0, r0
+ fuitod d0, s0 @ convert low half to double (unsigned)
+ fmsr s2, r1
+ fuitod d1, s2 @ convert high half to double (unsigned)
+ fldd d2, .LC1 @ d2 = 2^32
+ fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
+ fmrrd r0, r1, d0 @ return result in r0, r1
+ bx lr
+ .type __i64_utod, %function
+ .size __i64_utod, . - __i64_utod
+
+ .balign 8
+.LC1: .quad 0x41f0000000000000 @ 2^32 in double precision
+
+@@@ Conversion from signed 64-bit integer to double float
+
+ .global __i64_stod
+__i64_stod:
+ fmsr s0, r0
+ fuitod d0, s0 @ convert low half to double (unsigned)
+ fmsr s2, r1
+ fsitod d1, s2 @ convert high half to double (signed)
+ fldd d2, .LC1 @ d2 = 2^32
+ fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
+ fmrrd r0, r1, d0 @ return result in r0, r1
+ bx lr
+ .type __i64_stod, %function
+ .size __i64_stod, . - __i64_stod
+
+@@@ Conversion from double float to unsigned 64-bit integer
+
+ .global __i64_dtou
+__i64_dtou:
+ cmp r1, #0 @ is double < 0 ?
+ blt 1f @ then it converts to 0
+ @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+ @ note: 1023 + 52 = 1075 = 1024 + 51
+ @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+ mov r2, r1, lsl #1
+ mov r2, r2, lsr #21
+ sub r2, r2, #51
+ sub r2, r2, #1024
+ @ check range of exponent
+ cmn r2, #52 @ if EXP < -52, double is < 1.0
+ blt 1f
+ cmp r2, #12 @ if EXP >= 64 - 52, double is >= 2^64
+ bge 2f
+ @ extract true mantissa
+ bic r1, r1, #0xFF000000
+ bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+ orr r1, r1, #0x00100000 @ HI |= 0x00100000
+ @ shift it appropriately
+ cmp r2, #0
+ bge __i64_shl @ if EXP >= 0, shift left by EXP
+ rsb r2, r2, #0
+ b __i64_shr @ otherwise, shift right by -EXP
+ @ special cases
+1: mov r0, #0 @ result is 0
+ mov r1, #0
+ bx lr
+2: mvn r0, #0 @ result is 0xFF....FF (MAX_UINT)
+ mvn r1, #0
+ bx lr
+ .type __i64_dtou, %function
+ .size __i64_dtou, . - __i64_dtou
+
+@@@ Conversion from double float to signed 64-bit integer
+
+ .global __i64_dtos
+__i64_dtos:
+ push {r4, lr}
+ mov r4, r1, asr #31 @ save sign in r4
+ @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+ @ note: 1023 + 52 = 1075 = 1024 + 51
+ @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+ mov r2, r1, lsl #1
+ mov r2, r2, lsr #21
+ sub r2, r2, #51
+ sub r2, r2, #1024
+ @ check range of exponent
+ cmn r2, #52 @ if EXP < -52, |double| is < 1.0
+ blt 1f
+ cmp r2, #11 @ if EXP >= 63 - 52, |double| is >= 2^63
+ bge 2f
+ @ extract true mantissa
+ bic r1, r1, #0xFF000000
+ bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+ orr r1, r1, #0x00100000 @ HI |= 0x00100000
+ @ shift it appropriately
+ cmp r2, #0
+ blt 3f
+ bl __i64_shl @ if EXP >= 0, shift left by EXP
+ b 4f
+3: rsb r2, r2, #0
+ bl __i64_shr @ otherwise, shift right by -EXP
+ @ apply sign to result
+4: eor r0, r0, r4
+ eor r1, r1, r4
+ subs r0, r0, r4
+ sbc r1, r1, r4
+ pop {r4, lr}
+ bx lr
+ @ special cases
+1: mov r0, #0 @ result is 0
+ mov r1, #0
+ pop {r4, lr}
+ bx lr
+2: cmp r4, #0
+ blt 6f
+ mvn r0, #0 @ result is 0x7F....FF (MAX_SINT)
+ mov r1, r0, lsr #1
+ pop {r4, lr}
+ bx lr
+6: mov r0, #0 @ result is 0x80....00 (MIN_SINT)
+ mov r1, #0x80000000
+ pop {r4, lr}
+ bx lr
+ .type __i64_dtos, %function
+ .size __i64_dtos, . - __i64_dtos
+