From b257a6d283f6f5784cb351856b5dbe8c645a1f6f Mon Sep 17 00:00:00 2001 From: xleroy Date: Sun, 5 May 2013 16:29:20 +0000 Subject: ia32/i64_dtou: wrong play on rounding mode arm, powerpc: expand shifts inline in dtos and dtou arm: branchless code for shl and shr test: more tests for double -> long long conversions. git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2234 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- runtime/arm/i64_dtos.S | 33 +++++++++++++++++++-------------- runtime/arm/i64_dtou.S | 19 ++++++++++++++++--- runtime/arm/i64_shl.s | 30 +++++++++++++++++++++--------- runtime/arm/i64_shr.s | 31 +++++++++++++++++++++++-------- runtime/ia32/i64_dtou.S | 28 ++++++++++++++++++++-------- runtime/powerpc/i64_dtos.s | 25 ++++++++++++++++++------- runtime/powerpc/i64_dtou.s | 21 ++++++++++++++++++--- runtime/test/test_int64.c | 10 ++++++++++ 8 files changed, 145 insertions(+), 52 deletions(-) (limited to 'runtime') diff --git a/runtime/arm/i64_dtos.S b/runtime/arm/i64_dtos.S index d0ef037..4374868 100644 --- a/runtime/arm/i64_dtos.S +++ b/runtime/arm/i64_dtos.S @@ -40,8 +40,7 @@ .global __i64_dtos __i64_dtos: - push {r4, lr} - mov r4, r1, asr #31 @ save sign in r4 + mov r12, r1, asr #31 @ save sign of result in r12 @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 @ note: 1023 + 52 = 1075 = 1024 + 51 @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 @@ -61,31 +60,37 @@ __i64_dtos: @ shift it appropriately cmp r2, #0 blt 3f - bl __i64_shl @ if EXP >= 0, shift left by EXP - b 4f -3: rsb r2, r2, #0 - bl __i64_shr @ otherwise, shift right by -EXP + @ EXP >= 0: shift left by EXP. Note that EXP < 12 + rsb r3, r2, #32 @ r3 = 32 - amount + mov r1, r1, lsl r2 + orr r1, r1, r0, lsr r3 + mov r0, r0, lsl r2 + b 4f + @ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32 +3: rsb r2, r2, #0 @ r2 = -EXP - shift amount + rsb r3, r2, #32 @ r3 = 32 - amount + mov r0, r0, lsr r2 + orr r0, r0, r1, lsl r3 + sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s) + orr r0, r0, r1, lsr r3 + mov r1, r1, lsr r2 @ apply sign to result -4: eor r0, r0, r4 - eor r1, r1, r4 - subs r0, r0, r4 - sbc r1, r1, r4 - pop {r4, lr} +4: eor r0, r0, r12 + eor r1, r1, r12 + subs r0, r0, r12 + sbc r1, r1, r12 bx lr @ special cases 1: mov r0, #0 @ result is 0 mov r1, #0 - pop {r4, lr} bx lr 2: cmp r4, #0 blt 6f mvn r0, #0 @ result is 0x7F....FF (MAX_SINT) mov r1, r0, lsr #1 - pop {r4, lr} bx lr 6: mov r0, #0 @ result is 0x80....00 (MIN_SINT) mov r1, #0x80000000 - pop {r4, lr} bx lr .type __i64_dtos, %function .size __i64_dtos, . - __i64_dtos diff --git a/runtime/arm/i64_dtou.S b/runtime/arm/i64_dtou.S index 7f6152e..50648a5 100644 --- a/runtime/arm/i64_dtou.S +++ b/runtime/arm/i64_dtou.S @@ -60,9 +60,22 @@ __i64_dtou: orr r1, r1, #0x00100000 @ HI |= 0x00100000 @ shift it appropriately cmp r2, #0 - bge __i64_shl @ if EXP >= 0, shift left by EXP - rsb r2, r2, #0 - b __i64_shr @ otherwise, shift right by -EXP + blt 3f + @ EXP >= 0: shift left by EXP. Note that EXP < 12 + rsb r3, r2, #32 @ r3 = 32 - amount + mov r1, r1, lsl r2 + orr r1, r1, r0, lsr r3 + mov r0, r0, lsl r2 + bx lr + @ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32 +3: rsb r2, r2, #0 @ r2 = -EXP - shift amount + rsb r3, r2, #32 @ r3 = 32 - amount + mov r0, r0, lsr r2 + orr r0, r0, r1, lsl r3 + sub r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s) + orr r0, r0, r1, lsr r3 + mov r1, r1, lsr r2 + bx lr @ special cases 1: mov r0, #0 @ result is 0 mov r1, #0 diff --git a/runtime/arm/i64_shl.s b/runtime/arm/i64_shl.s index 8014f88..afd92db 100644 --- a/runtime/arm/i64_shl.s +++ b/runtime/arm/i64_shl.s @@ -39,22 +39,34 @@ @@@ Shift left @ Note on ARM shifts: the shift amount is taken modulo 256. -@ Therefore, unsigned shifts by 32 bits or more produce 0. +@ If shift amount mod 256 >= 32, the shift produces 0. + +@ Algorithm: +@ RH = (XH << N) | (XL >> (32-N) | (XL << (N-32)) +@ RL = XL << N +@ If N = 0: +@ RH = XH | 0 | 0 +@ RL = XL +@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255 +@ RH = (XH << N) | (XL >> (32-N) | 0 +@ RL = XL << N +@ If N = 32: +@ RH = 0 | XL | 0 +@ RL = 0 +@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31 +@ RH = 0 | 0 | (XL << (N-32)) +@ RL = 0 .global __i64_shl __i64_shl: and r2, r2, #63 @ normalize amount to 0...63 - rsbs r3, r2, #32 @ r3 = 32 - amount - ble 1f @ branch if <= 0, namely if amount >= 32 + rsb r3, r2, #32 @ r3 = 32 - amount mov r1, r1, lsl r2 - orr r1, r0, lsr r3 + orr r1, r1, r0, lsr r3 + sub r3, r2, #32 @ r3 = amount - 32 + orr r1, r1, r0, lsl r3 mov r0, r0, lsl r2 bx lr -1: - sub r2, r2, #32 - mov r1, r0, lsl r2 - mov r0, #0 - bx lr .type __i64_shl, %function .size __i64_shl, . - __i64_shl diff --git a/runtime/arm/i64_shr.s b/runtime/arm/i64_shr.s index f10b770..9d60441 100644 --- a/runtime/arm/i64_shr.s +++ b/runtime/arm/i64_shr.s @@ -38,20 +38,35 @@ @@@ Shift right unsigned +@ Note on ARM shifts: the shift amount is taken modulo 256. +@ If shift amount mod 256 >= 32, the shift produces 0. + +@ Algorithm: +@ RL = (XL >> N) | (XH << (32-N) | (XH >> (N-32)) +@ RH = XH >> N +@ If N = 0: +@ RL = XL | 0 | 0 +@ RH = XH +@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255 +@ RL = (XL >> N) | (XH >> (32-N) | 0 +@ RH = XH >> N +@ If N = 32: +@ RL = 0 | XH | 0 +@ RH = 0 +@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31 +@ RL = 0 | 0 | (XH >> (N-32)) +@ RH = 0 + .global __i64_shr __i64_shr: and r2, r2, #63 @ normalize amount to 0...63 - rsbs r3, r2, #32 @ r3 = 32 - amount - ble 1f @ branch if <= 0, namely if amount >= 32 + rsb r3, r2, #32 @ r3 = 32 - amount mov r0, r0, lsr r2 - orr r0, r1, lsl r3 + orr r0, r0, r1, lsl r3 + sub r3, r2, #32 @ r3 = amount - 32 + orr r0, r0, r1, lsr r3 mov r1, r1, lsr r2 bx lr -1: - sub r2, r2, #32 - mov r0, r1, lsr r2 - mov r1, #0 - bx lr .type __i64_shr, %function .size __i64_shr, . - __i64_shr diff --git a/runtime/ia32/i64_dtou.S b/runtime/ia32/i64_dtou.S index cdd2381..4903f84 100644 --- a/runtime/ia32/i64_dtou.S +++ b/runtime/ia32/i64_dtou.S @@ -40,12 +40,6 @@ FUNCTION(__i64_dtou) subl $4, %esp - // Change rounding mode to "round towards zero" - fnstcw 0(%esp) - movw 0(%esp), %ax - movb $12, %ah - movw %ax, 2(%esp) - fldcw 2(%esp) // Compare argument with 2^63 fldl 8(%esp) flds LC1 @@ -54,18 +48,36 @@ FUNCTION(__i64_dtou) sahf jbe 1f // branch if not (ARG < 2^63) // Argument < 2^63: convert as is + // Change rounding mode to "round towards zero" + fnstcw 0(%esp) + movw 0(%esp), %ax + movb $12, %ah + movw %ax, 2(%esp) + fldcw 2(%esp) + // Convert fistpll 8(%esp) movl 8(%esp), %eax movl 12(%esp), %edx - jmp 2f + // Restore rounding mode + fldcw 0(%esp) + addl $4, %esp + ret // Argument > 2^63: offset ARG by -2^63, then convert, then offset RES by 2^63 1: fsubs LC1 + // Change rounding mode to "round towards zero" + fnstcw 0(%esp) + movw 0(%esp), %ax + movb $12, %ah + movw %ax, 2(%esp) + fldcw 2(%esp) + // Convert fistpll 8(%esp) movl 8(%esp), %eax movl 12(%esp), %edx + // Offset result by 2^63 addl $0x80000000, %edx // Restore rounding mode -2: fldcw 0(%esp) + fldcw 0(%esp) addl $4, %esp ret diff --git a/runtime/powerpc/i64_dtos.s b/runtime/powerpc/i64_dtos.s index 56c6e4b..9b1288f 100644 --- a/runtime/powerpc/i64_dtos.s +++ b/runtime/powerpc/i64_dtos.s @@ -58,16 +58,27 @@ __i64_dtos: rlwinm r3, r3, 0, 12, 31 # HI &= ~0xFFF00000 oris r3, r3, 0x10 # HI |= 0x00100000 # shift it appropriately - mflr r9 # save retaddr in r9 cmpwi r5, 0 blt 3f - bl __i64_shl # if EXP >= 0, shift left by EXP + # if EXP >= 0, shift left by EXP. Note that EXP < 11. + subfic r6, r5, 32 # r6 = 32 - EXP + slw r3, r3, r5 + srw r0, r4, r6 + or r3, r3, r0 + slw r4, r4, r5 b 4f -3: subfic r5, r5, 0 - bl __i64_shr # if EXP < 0, shift right by -EXP + # if EXP < 0, shift right by -EXP. Note that -EXP <= 52 but can be >= 32. +3: subfic r5, r5, 0 # r5 = -EXP = shift amount + subfic r6, r5, 32 # r6 = 32 - amount + addi r7, r5, -32 # r7 = amount - 32 (see i64_shr.s) + srw r4, r4, r5 + slw r0, r3, r6 + or r4, r4, r0 + srw r0, r3, r7 + or r4, r4, r0 + srw r3, r3, r5 # apply sign to result -4: mtlr r9 - xor r4, r4, r10 +4: xor r4, r4, r10 xor r3, r3, r10 subfc r4, r10, r4 subfe r3, r10, r3 @@ -76,7 +87,7 @@ __i64_dtos: 1: li r3, 0 # result is 0 li r4, 0 blr -2: cmpwi r10, 0 # result is MAX_SINT or MIN_SINT +2: li r4, -1 # result is MAX_SINT or MIN_SINT bge 5f # depending on sign li r4, -1 # result is MAX_SINT = 0x7FFF_FFFF srwi r3, r4, 1 diff --git a/runtime/powerpc/i64_dtou.s b/runtime/powerpc/i64_dtou.s index d9638e6..78cd08b 100644 --- a/runtime/powerpc/i64_dtou.s +++ b/runtime/powerpc/i64_dtou.s @@ -61,9 +61,24 @@ __i64_dtou: # shift it appropriately cmpwi r5, 0 blt 3f - b __i64_shl # if EXP >= 0, shift left by EXP -3: subfic r5, r5, 0 - b __i64_shr # if EXP < 0, shift right by -EXP + # if EXP >= 0, shift left by EXP. Note that EXP < 12. + subfic r6, r5, 32 # r6 = 32 - EXP + slw r3, r3, r5 + srw r0, r4, r6 + or r3, r3, r0 + slw r4, r4, r5 + blr + # if EXP < 0, shift right by -EXP. Note that -EXP <= 52 but can be >= 32. +3: subfic r5, r5, 0 # r5 = -EXP = shift amount + subfic r6, r5, 32 # r6 = 32 - amount + addi r7, r5, -32 # r7 = amount - 32 (see i64_shr.s) + srw r4, r4, r5 + slw r0, r3, r6 + or r4, r4, r0 + srw r0, r3, r7 + or r4, r4, r0 + srw r3, r3, r5 + blr # Special cases 1: li r3, 0 # result is 0 li r4, 0 diff --git a/runtime/test/test_int64.c b/runtime/test/test_int64.c index 0a7dfc4..4dbad70 100644 --- a/runtime/test/test_int64.c +++ b/runtime/test/test_int64.c @@ -147,6 +147,16 @@ static void test1(u64 x, u64 y) if (f != g) error++, printf("(double) %lld (s) = %a, expected %a\n", x, f, g); + f = (double) x; + z = __i64_dtou(f); + if (z != (u64) (double) f) + error++, printf("(u64) %a = %llu, expected %llu\n", f, z, (u64) f); + + f = (double) ((s64) x); + t = __i64_dtos(f); + if (t != (s64) (double) f) + error++, printf("(s64) %a = %lld, expected %lld\n", f, z, (s64) f); + f = ((double) x) * 0.0001; z = __i64_dtou(f); if (z != (u64) f) -- cgit v1.2.3