summaryrefslogtreecommitdiff
path: root/runtime
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2013-05-05 16:29:20 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2013-05-05 16:29:20 +0000
commitb257a6d283f6f5784cb351856b5dbe8c645a1f6f (patch)
tree3e134e46c53a5e75a3b6c78878d07a554b396367 /runtime
parentbc24cf49659f91245d8f42ca06fbe7d21a5c06cd (diff)
ia32/i64_dtou: wrong play on rounding mode
arm, powerpc: expand shifts inline in dtos and dtou arm: branchless code for shl and shr test: more tests for double -> long long conversions. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2234 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'runtime')
-rw-r--r--runtime/arm/i64_dtos.S33
-rw-r--r--runtime/arm/i64_dtou.S19
-rw-r--r--runtime/arm/i64_shl.s30
-rw-r--r--runtime/arm/i64_shr.s31
-rw-r--r--runtime/ia32/i64_dtou.S28
-rw-r--r--runtime/powerpc/i64_dtos.s25
-rw-r--r--runtime/powerpc/i64_dtou.s21
-rw-r--r--runtime/test/test_int64.c10
8 files changed, 145 insertions, 52 deletions
diff --git a/runtime/arm/i64_dtos.S b/runtime/arm/i64_dtos.S
index d0ef037..4374868 100644
--- a/runtime/arm/i64_dtos.S
+++ b/runtime/arm/i64_dtos.S
@@ -40,8 +40,7 @@
.global __i64_dtos
__i64_dtos:
- push {r4, lr}
- mov r4, r1, asr #31 @ save sign in r4
+ mov r12, r1, asr #31 @ save sign of result in r12
@ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
@ note: 1023 + 52 = 1075 = 1024 + 51
@ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
@@ -61,31 +60,37 @@ __i64_dtos:
@ shift it appropriately
cmp r2, #0
blt 3f
- bl __i64_shl @ if EXP >= 0, shift left by EXP
- b 4f
-3: rsb r2, r2, #0
- bl __i64_shr @ otherwise, shift right by -EXP
+ @ EXP >= 0: shift left by EXP. Note that EXP < 12
+ rsb r3, r2, #32 @ r3 = 32 - amount
+ mov r1, r1, lsl r2
+ orr r1, r1, r0, lsr r3
+ mov r0, r0, lsl r2
+ b 4f
+ @ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32
+3: rsb r2, r2, #0 @ r2 = -EXP - shift amount
+ rsb r3, r2, #32 @ r3 = 32 - amount
+ mov r0, r0, lsr r2
+ orr r0, r0, r1, lsl r3
+ sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s)
+ orr r0, r0, r1, lsr r3
+ mov r1, r1, lsr r2
@ apply sign to result
-4: eor r0, r0, r4
- eor r1, r1, r4
- subs r0, r0, r4
- sbc r1, r1, r4
- pop {r4, lr}
+4: eor r0, r0, r12
+ eor r1, r1, r12
+ subs r0, r0, r12
+ sbc r1, r1, r12
bx lr
@ special cases
1: mov r0, #0 @ result is 0
mov r1, #0
- pop {r4, lr}
bx lr
2: cmp r4, #0
blt 6f
mvn r0, #0 @ result is 0x7F....FF (MAX_SINT)
mov r1, r0, lsr #1
- pop {r4, lr}
bx lr
6: mov r0, #0 @ result is 0x80....00 (MIN_SINT)
mov r1, #0x80000000
- pop {r4, lr}
bx lr
.type __i64_dtos, %function
.size __i64_dtos, . - __i64_dtos
diff --git a/runtime/arm/i64_dtou.S b/runtime/arm/i64_dtou.S
index 7f6152e..50648a5 100644
--- a/runtime/arm/i64_dtou.S
+++ b/runtime/arm/i64_dtou.S
@@ -60,9 +60,22 @@ __i64_dtou:
orr r1, r1, #0x00100000 @ HI |= 0x00100000
@ shift it appropriately
cmp r2, #0
- bge __i64_shl @ if EXP >= 0, shift left by EXP
- rsb r2, r2, #0
- b __i64_shr @ otherwise, shift right by -EXP
+ blt 3f
+ @ EXP >= 0: shift left by EXP. Note that EXP < 12
+ rsb r3, r2, #32 @ r3 = 32 - amount
+ mov r1, r1, lsl r2
+ orr r1, r1, r0, lsr r3
+ mov r0, r0, lsl r2
+ bx lr
+ @ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32
+3: rsb r2, r2, #0 @ r2 = -EXP - shift amount
+ rsb r3, r2, #32 @ r3 = 32 - amount
+ mov r0, r0, lsr r2
+ orr r0, r0, r1, lsl r3
+ sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s)
+ orr r0, r0, r1, lsr r3
+ mov r1, r1, lsr r2
+ bx lr
@ special cases
1: mov r0, #0 @ result is 0
mov r1, #0
diff --git a/runtime/arm/i64_shl.s b/runtime/arm/i64_shl.s
index 8014f88..afd92db 100644
--- a/runtime/arm/i64_shl.s
+++ b/runtime/arm/i64_shl.s
@@ -39,22 +39,34 @@
@@@ Shift left
@ Note on ARM shifts: the shift amount is taken modulo 256.
-@ Therefore, unsigned shifts by 32 bits or more produce 0.
+@ If shift amount mod 256 >= 32, the shift produces 0.
+
+@ Algorithm:
+@ RH = (XH << N) | (XL >> (32-N) | (XL << (N-32))
+@ RL = XL << N
+@ If N = 0:
+@ RH = XH | 0 | 0
+@ RL = XL
+@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255
+@ RH = (XH << N) | (XL >> (32-N) | 0
+@ RL = XL << N
+@ If N = 32:
+@ RH = 0 | XL | 0
+@ RL = 0
+@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31
+@ RH = 0 | 0 | (XL << (N-32))
+@ RL = 0
.global __i64_shl
__i64_shl:
and r2, r2, #63 @ normalize amount to 0...63
- rsbs r3, r2, #32 @ r3 = 32 - amount
- ble 1f @ branch if <= 0, namely if amount >= 32
+ rsb r3, r2, #32 @ r3 = 32 - amount
mov r1, r1, lsl r2
- orr r1, r0, lsr r3
+ orr r1, r1, r0, lsr r3
+ sub r3, r2, #32 @ r3 = amount - 32
+ orr r1, r1, r0, lsl r3
mov r0, r0, lsl r2
bx lr
-1:
- sub r2, r2, #32
- mov r1, r0, lsl r2
- mov r0, #0
- bx lr
.type __i64_shl, %function
.size __i64_shl, . - __i64_shl
diff --git a/runtime/arm/i64_shr.s b/runtime/arm/i64_shr.s
index f10b770..9d60441 100644
--- a/runtime/arm/i64_shr.s
+++ b/runtime/arm/i64_shr.s
@@ -38,20 +38,35 @@
@@@ Shift right unsigned
+@ Note on ARM shifts: the shift amount is taken modulo 256.
+@ If shift amount mod 256 >= 32, the shift produces 0.
+
+@ Algorithm:
+@ RL = (XL >> N) | (XH << (32-N) | (XH >> (N-32))
+@ RH = XH >> N
+@ If N = 0:
+@ RL = XL | 0 | 0
+@ RH = XH
+@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255
+@ RL = (XL >> N) | (XH >> (32-N) | 0
+@ RH = XH >> N
+@ If N = 32:
+@ RL = 0 | XH | 0
+@ RH = 0
+@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31
+@ RL = 0 | 0 | (XH >> (N-32))
+@ RH = 0
+
.global __i64_shr
__i64_shr:
and r2, r2, #63 @ normalize amount to 0...63
- rsbs r3, r2, #32 @ r3 = 32 - amount
- ble 1f @ branch if <= 0, namely if amount >= 32
+ rsb r3, r2, #32 @ r3 = 32 - amount
mov r0, r0, lsr r2
- orr r0, r1, lsl r3
+ orr r0, r0, r1, lsl r3
+ sub r3, r2, #32 @ r3 = amount - 32
+ orr r0, r0, r1, lsr r3
mov r1, r1, lsr r2
bx lr
-1:
- sub r2, r2, #32
- mov r0, r1, lsr r2
- mov r1, #0
- bx lr
.type __i64_shr, %function
.size __i64_shr, . - __i64_shr
diff --git a/runtime/ia32/i64_dtou.S b/runtime/ia32/i64_dtou.S
index cdd2381..4903f84 100644
--- a/runtime/ia32/i64_dtou.S
+++ b/runtime/ia32/i64_dtou.S
@@ -40,12 +40,6 @@
FUNCTION(__i64_dtou)
subl $4, %esp
- // Change rounding mode to "round towards zero"
- fnstcw 0(%esp)
- movw 0(%esp), %ax
- movb $12, %ah
- movw %ax, 2(%esp)
- fldcw 2(%esp)
// Compare argument with 2^63
fldl 8(%esp)
flds LC1
@@ -54,18 +48,36 @@ FUNCTION(__i64_dtou)
sahf
jbe 1f // branch if not (ARG < 2^63)
// Argument < 2^63: convert as is
+ // Change rounding mode to "round towards zero"
+ fnstcw 0(%esp)
+ movw 0(%esp), %ax
+ movb $12, %ah
+ movw %ax, 2(%esp)
+ fldcw 2(%esp)
+ // Convert
fistpll 8(%esp)
movl 8(%esp), %eax
movl 12(%esp), %edx
- jmp 2f
+ // Restore rounding mode
+ fldcw 0(%esp)
+ addl $4, %esp
+ ret
// Argument > 2^63: offset ARG by -2^63, then convert, then offset RES by 2^63
1: fsubs LC1
+ // Change rounding mode to "round towards zero"
+ fnstcw 0(%esp)
+ movw 0(%esp), %ax
+ movb $12, %ah
+ movw %ax, 2(%esp)
+ fldcw 2(%esp)
+ // Convert
fistpll 8(%esp)
movl 8(%esp), %eax
movl 12(%esp), %edx
+ // Offset result by 2^63
addl $0x80000000, %edx
// Restore rounding mode
-2: fldcw 0(%esp)
+ fldcw 0(%esp)
addl $4, %esp
ret
diff --git a/runtime/powerpc/i64_dtos.s b/runtime/powerpc/i64_dtos.s
index 56c6e4b..9b1288f 100644
--- a/runtime/powerpc/i64_dtos.s
+++ b/runtime/powerpc/i64_dtos.s
@@ -58,16 +58,27 @@ __i64_dtos:
rlwinm r3, r3, 0, 12, 31 # HI &= ~0xFFF00000
oris r3, r3, 0x10 # HI |= 0x00100000
# shift it appropriately
- mflr r9 # save retaddr in r9
cmpwi r5, 0
blt 3f
- bl __i64_shl # if EXP >= 0, shift left by EXP
+ # if EXP >= 0, shift left by EXP. Note that EXP < 11.
+ subfic r6, r5, 32 # r6 = 32 - EXP
+ slw r3, r3, r5
+ srw r0, r4, r6
+ or r3, r3, r0
+ slw r4, r4, r5
b 4f
-3: subfic r5, r5, 0
- bl __i64_shr # if EXP < 0, shift right by -EXP
+ # if EXP < 0, shift right by -EXP. Note that -EXP <= 52 but can be >= 32.
+3: subfic r5, r5, 0 # r5 = -EXP = shift amount
+ subfic r6, r5, 32 # r6 = 32 - amount
+ addi r7, r5, -32 # r7 = amount - 32 (see i64_shr.s)
+ srw r4, r4, r5
+ slw r0, r3, r6
+ or r4, r4, r0
+ srw r0, r3, r7
+ or r4, r4, r0
+ srw r3, r3, r5
# apply sign to result
-4: mtlr r9
- xor r4, r4, r10
+4: xor r4, r4, r10
xor r3, r3, r10
subfc r4, r10, r4
subfe r3, r10, r3
@@ -76,7 +87,7 @@ __i64_dtos:
1: li r3, 0 # result is 0
li r4, 0
blr
-2: cmpwi r10, 0 # result is MAX_SINT or MIN_SINT
+2: li r4, -1 # result is MAX_SINT or MIN_SINT
bge 5f # depending on sign
li r4, -1 # result is MAX_SINT = 0x7FFF_FFFF
srwi r3, r4, 1
diff --git a/runtime/powerpc/i64_dtou.s b/runtime/powerpc/i64_dtou.s
index d9638e6..78cd08b 100644
--- a/runtime/powerpc/i64_dtou.s
+++ b/runtime/powerpc/i64_dtou.s
@@ -61,9 +61,24 @@ __i64_dtou:
# shift it appropriately
cmpwi r5, 0
blt 3f
- b __i64_shl # if EXP >= 0, shift left by EXP
-3: subfic r5, r5, 0
- b __i64_shr # if EXP < 0, shift right by -EXP
+ # if EXP >= 0, shift left by EXP. Note that EXP < 12.
+ subfic r6, r5, 32 # r6 = 32 - EXP
+ slw r3, r3, r5
+ srw r0, r4, r6
+ or r3, r3, r0
+ slw r4, r4, r5
+ blr
+ # if EXP < 0, shift right by -EXP. Note that -EXP <= 52 but can be >= 32.
+3: subfic r5, r5, 0 # r5 = -EXP = shift amount
+ subfic r6, r5, 32 # r6 = 32 - amount
+ addi r7, r5, -32 # r7 = amount - 32 (see i64_shr.s)
+ srw r4, r4, r5
+ slw r0, r3, r6
+ or r4, r4, r0
+ srw r0, r3, r7
+ or r4, r4, r0
+ srw r3, r3, r5
+ blr
# Special cases
1: li r3, 0 # result is 0
li r4, 0
diff --git a/runtime/test/test_int64.c b/runtime/test/test_int64.c
index 0a7dfc4..4dbad70 100644
--- a/runtime/test/test_int64.c
+++ b/runtime/test/test_int64.c
@@ -147,6 +147,16 @@ static void test1(u64 x, u64 y)
if (f != g)
error++, printf("(double) %lld (s) = %a, expected %a\n", x, f, g);
+ f = (double) x;
+ z = __i64_dtou(f);
+ if (z != (u64) (double) f)
+ error++, printf("(u64) %a = %llu, expected %llu\n", f, z, (u64) f);
+
+ f = (double) ((s64) x);
+ t = __i64_dtos(f);
+ if (t != (s64) (double) f)
+ error++, printf("(s64) %a = %lld, expected %lld\n", f, z, (s64) f);
+
f = ((double) x) * 0.0001;
z = __i64_dtou(f);
if (z != (u64) f)