From 255cee09b71255051c2b40eae0c88bffce1f6f32 Mon Sep 17 00:00:00 2001 From: xleroy Date: Sat, 20 Apr 2013 07:54:52 +0000 Subject: Big merge of the newregalloc-int64 branch. Lots of changes in two directions: 1- new register allocator (+ live range splitting, spilling&reloading, etc) based on a posteriori validation using the Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long long"). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e --- runtime/Makefile | 28 ++- runtime/arm/int64.s | 424 +++++++++++++++++++++++++++++++++++++++ runtime/ia32/int64.s | 471 ++++++++++++++++++++++++++++++++++++++++++++ runtime/powerpc/int64.s | 492 ++++++++++++++++++++++++++++++++++++++++++++++ runtime/test/test_int64.c | 238 ++++++++++++++++++++++ 5 files changed, 1643 insertions(+), 10 deletions(-) create mode 100644 runtime/arm/int64.s create mode 100644 runtime/ia32/int64.s create mode 100644 runtime/powerpc/int64.s create mode 100644 runtime/test/test_int64.c (limited to 'runtime') diff --git a/runtime/Makefile b/runtime/Makefile index c128ba2..5550e6b 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -2,26 +2,34 @@ include ../Makefile.config CFLAGS=-O1 -g -Wall INCLUDES= -OBJS= +OBJS=int64.o LIB=libcompcert.a -ifeq ($(strip $(HAS_RUNTIME_LIB)),true) all: $(LIB) $(INCLUDES) -else -all: -endif $(LIB): $(OBJS) rm -f $(LIB) ar rcs $(LIB) $(OBJS) -clean: +%.o: $(ARCH)/%.s + $(CASM) $^ + +clean:: rm -f *.o $(LIB) -ifeq ($(strip $(HAS_RUNTIME_LIB)),true) install: install -d $(LIBDIR) install -c $(LIB) $(INCLUDES) $(LIBDIR) -else -install: -endif + +test/test_int64: test/test_int64.c $(LIB) + $(CC) -o $@ test/test_int64.c $(LIB) + +clean:: + rm -f test/test_int64 + +.PHONY: test + +test: FORCE test/test_int64 + test/test_int64 + +FORCE: diff --git a/runtime/arm/int64.s b/runtime/arm/int64.s new file mode 100644 index 0000000..6b03351 --- /dev/null +++ b/runtime/arm/int64.s @@ -0,0 +1,424 @@ +@ ***************************************************************** +@ +@ The Compcert verified compiler +@ +@ Xavier Leroy, INRIA Paris-Rocquencourt +@ +@ Copyright (c) 2013 Institut National de Recherche en Informatique et +@ en Automatique. +@ +@ Redistribution and use in source and binary forms, with or without +@ modification, are permitted provided that the following conditions are met: +@ * Redistributions of source code must retain the above copyright +@ notice, this list of conditions and the following disclaimer. +@ * Redistributions in binary form must reproduce the above copyright +@ notice, this list of conditions and the following disclaimer in the +@ documentation and/or other materials provided with the distribution. +@ * Neither the name of the nor the +@ names of its contributors may be used to endorse or promote products +@ derived from this software without specific prior written permission. +@ +@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +@ +@ ********************************************************************* + +@ Helper functions for 64-bit integer arithmetic. ARM version. + +@ Calling conventions for R = F(X) or R = F(X,Y): +@ one or two long arguments: XL in r0, XH in r1, YL in r2, YH in r3 +@ one long argument, one int: XL in r0, XH in r1, Y in r2 +@ one float argument: X in r0, r1 +@ one long result: RL in r0, RH in r1 +@ one float result: R in r0, r1 +@ This is a little-endian convention: the low word is in the +@ low-numbered register. +@ Can use r0...r3 and f0...f7 as temporary registers (caller-save) + + .text + +@@@ Unsigned comparison + + .global __i64_ucmp +__i64_ucmp: + cmp r1, r3 @ compare high words + cmpeq r0, r2 @ if equal, compare low words instead + moveq r0, #0 @ res = 0 if eq + movhi r0, #1 @ res = 1 if unsigned higher + mvnlo r0, #0 @ res = -1 if unsigned lower + bx lr + .type __i64_ucmp, %function + .size __i64_ucmp, . - __i64_ucmp + +@@@ Signed comparison + + .global __i64_scmp +__i64_scmp: + cmp r0, r2 @ compare low words (unsigned) + moveq r0, #0 @ res = 0 if eq + movhi r0, #1 @ res = 1 if unsigned higher + mvnlo r0, #0 @ res = -1 if unsigned lower + cmp r1, r3 @ compare high words (signed) + addgt r0, r0, #2 @ res += 2 if signed greater + sublt r0, r0, #2 @ res -= 2 if signed less + @ here, r0 = 0 if X == Y + @ r0 = -3, -2, -1 if X < Y + @ r0 = 1, 2, 3 if X > Y + bx lr + .type __i64_scmp, %function + .size __i64_scmp, . - __i64_scmp + +@@@ Opposite + + .global __i64_neg +__i64_neg: + rsbs r0, r0, #0 + rsc r1, r1, #0 + bx lr + .type __i64_neg, %function + .size __i64_neg, . - __i64_neg + +@@@ Addition + + .global __i64_add +__i64_add: + adds r0, r0, r2 + adc r1, r1, r3 + bx lr + .type __i64_add, %function + .size __i64_add, . - __i64_add + +@@@ Subtraction + + .global __i64_sub +__i64_sub: + subs r0, r0, r2 + sbc r1, r1, r3 + bx lr + .type __i64_sub, %function + .size __i64_sub, . - __i64_sub + +@ Note on ARM shifts: the shift amount is taken modulo 256. +@ Therefore, unsigned shifts by 32 bits or more produce 0. + +@@@ Shift left + + .global __i64_shl +__i64_shl: + and r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + mov r1, r1, lsl r2 + orr r1, r0, lsr r3 + mov r0, r0, lsl r2 + bx lr +1: + sub r2, r2, #32 + mov r1, r0, lsl r2 + mov r0, #0 + bx lr + .type __i64_shl, %function + .size __i64_shl, . - __i64_shl + +@@@ Shift right unsigned + + .global __i64_shr +__i64_shr: + and r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + mov r0, r0, lsr r2 + orr r0, r1, lsl r3 + mov r1, r1, lsr r2 + bx lr +1: + sub r2, r2, #32 + mov r0, r1, lsr r2 + mov r1, #0 + bx lr + .type __i64_shr, %function + .size __i64_shr, . - __i64_shr + +@@@ Shift right signed + + .global __i64_sar +__i64_sar: + and r2, r2, #63 @ normalize amount to 0...63 + rsbs r3, r2, #32 @ r3 = 32 - amount + ble 1f @ branch if <= 0, namely if amount >= 32 + mov r0, r0, lsr r2 + orr r0, r1, lsl r3 + mov r1, r1, asr r2 + bx lr +1: + sub r2, r2, #32 + mov r0, r1, asr r2 + mov r1, r1, asr #31 + bx lr + .type __i64_sar, %function + .size __i64_sar, . - __i64_sar + +@@@ Multiplication + + .global __i64_mul +__i64_mul: + push {r4, r5} + mov r4, r0 @ save first arg in r4,r5 + mov r5, r1 + umull r0, r1, r2, r4 @ 64-bit product of low halves + mla r1, r2, r5, r1 @ add 32-bit products low half * high half + mla r1, r3, r4, r1 @ to high half of result + pop {r4, r5} + bx lr + .type __i64_mul, %function + .size __i64_mul, . - __i64_mul + +@@@ Auxiliary function for division and modulus. Not exported. + +@ On entry: N = (r0, r1) numerator D = (r2, r3) divisor +@ On exit: Q = (r4, r5) quotient R = (r0, r1) remainder +@ Locals: M = (r6, r7) mask TMP = r8 temporary + +__i64_udivmod: + orrs r8, r2, r3 @ is D == 0? + bxeq lr @ if so, return with unspecified results + mov r4, #0 @ Q = 0 + mov r5, #0 + mov r6, #1 @ M = 1 + mov r7, #0 +1: cmp r3, #0 @ while ((signed) D >= 0) ... + blt 2f + subs r8, r0, r2 @ ... and N >= D ... + sbcs r8, r1, r3 + blo 2f + adds r2, r2, r2 @ D = D << 1 + adc r3, r3, r3 + adds r6, r6, r6 @ M = M << 1 + adc r7, r7, r7 + b 1b +2: subs r0, r0, r2 @ N = N - D + sbcs r1, r1, r3 + orr r4, r4, r6 @ Q = Q | M + orr r5, r5, r7 + bhs 3f @ if N was >= D, continue + adds r0, r0, r2 @ otherwise, undo what we just did + adc r1, r1, r3 @ N = N + D + bic r4, r4, r6 @ Q = Q & ~M + bic r5, r5, r7 +3: movs r7, r7, lsr #1 @ M = M >> 1 + mov r6, r6, rrx + movs r3, r3, lsr #1 @ D = D >> 1 + mov r2, r2, rrx + orrs r8, r6, r7 @ repeat while (M != 0) ... + bne 2b + bx lr + +@@@ Unsigned division + + .global __i64_udiv +__i64_udiv: + push {r4, r5, r6, r7, r8, lr} + bl __i64_udivmod + mov r0, r4 + mov r1, r5 + pop {r4, r5, r6, r7, r8, lr} + bx lr + .type __i64_udiv, %function + .size __i64_udiv, . - __i64_udiv + +@@@ Unsigned modulus + + .global __i64_umod +__i64_umod: + push {r4, r5, r6, r7, r8, lr} + bl __i64_udivmod @ remainder is already in r0,r1 + pop {r4, r5, r6, r7, r8, lr} + bx lr + .type __i64_umod, %function + .size __i64_umod, . - __i64_umod + + .global __i64_sdiv +__i64_sdiv: + push {r4, r5, r6, r7, r8, r10, lr} + eor r10, r1, r3 @ r10 = sign of result + mov r4, r1, asr #31 @ take absolute value of N + eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) + eor r1, r1, r4 + subs r0, r0, r4 + sbc r1, r1, r4 + mov r4, r3, asr #31 @ take absolute value of D + eor r2, r2, r4 + eor r3, r3, r4 + subs r2, r2, r4 + sbc r3, r3, r4 + bl __i64_udivmod @ do unsigned division + mov r0, r4 + mov r1, r5 + eor r0, r0, r10, asr#31 @ apply expected sign + eor r1, r1, r10, asr#31 + subs r0, r0, r10, asr#31 + sbc r1, r1, r10, asr#31 + pop {r4, r5, r6, r7, r8, r10, lr} + bx lr + .type __i64_sdiv, %function + .size __i64_sdiv, . - __i64_sdiv + +@@@ Signed modulus + + .global __i64_smod +__i64_smod: + push {r4, r5, r6, r7, r8, r10, lr} + mov r10, r1 @ r10 = sign of result + mov r4, r1, asr#31 @ take absolute value of N + eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31) + eor r1, r1, r4 + subs r0, r0, r4 + sbc r1, r1, r4 + mov r4, r3, asr #31 @ take absolute value of D + eor r2, r2, r4 + eor r3, r3, r4 + subs r2, r2, r4 + sbc r3, r3, r4 + bl __i64_udivmod @ do unsigned division + eor r0, r0, r10, asr#31 @ apply expected sign + eor r1, r1, r10, asr#31 + subs r0, r0, r10, asr#31 + sbc r1, r1, r10, asr#31 + pop {r4, r5, r6, r7, r8, r10, lr} + bx lr + .type __i64_smod, %function + .size __i64_smod, . - __i64_smod + +@@@ Conversion from unsigned 64-bit integer to double float + + .global __i64_utod +__i64_utod: + fmsr s0, r0 + fuitod d0, s0 @ convert low half to double (unsigned) + fmsr s2, r1 + fuitod d1, s2 @ convert high half to double (unsigned) + fldd d2, .LC1 @ d2 = 2^32 + fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 + fmrrd r0, r1, d0 @ return result in r0, r1 + bx lr + .type __i64_utod, %function + .size __i64_utod, . - __i64_utod + + .balign 8 +.LC1: .quad 0x41f0000000000000 @ 2^32 in double precision + +@@@ Conversion from signed 64-bit integer to double float + + .global __i64_stod +__i64_stod: + fmsr s0, r0 + fuitod d0, s0 @ convert low half to double (unsigned) + fmsr s2, r1 + fsitod d1, s2 @ convert high half to double (signed) + fldd d2, .LC1 @ d2 = 2^32 + fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64 + fmrrd r0, r1, d0 @ return result in r0, r1 + bx lr + .type __i64_stod, %function + .size __i64_stod, . - __i64_stod + +@@@ Conversion from double float to unsigned 64-bit integer + + .global __i64_dtou +__i64_dtou: + cmp r1, #0 @ is double < 0 ? + blt 1f @ then it converts to 0 + @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 + @ note: 1023 + 52 = 1075 = 1024 + 51 + @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 + mov r2, r1, lsl #1 + mov r2, r2, lsr #21 + sub r2, r2, #51 + sub r2, r2, #1024 + @ check range of exponent + cmn r2, #52 @ if EXP < -52, double is < 1.0 + blt 1f + cmp r2, #12 @ if EXP >= 64 - 52, double is >= 2^64 + bge 2f + @ extract true mantissa + bic r1, r1, #0xFF000000 + bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 + orr r1, r1, #0x00100000 @ HI |= 0x00100000 + @ shift it appropriately + cmp r2, #0 + bge __i64_shl @ if EXP >= 0, shift left by EXP + rsb r2, r2, #0 + b __i64_shr @ otherwise, shift right by -EXP + @ special cases +1: mov r0, #0 @ result is 0 + mov r1, #0 + bx lr +2: mvn r0, #0 @ result is 0xFF....FF (MAX_UINT) + mvn r1, #0 + bx lr + .type __i64_dtou, %function + .size __i64_dtou, . - __i64_dtou + +@@@ Conversion from double float to signed 64-bit integer + + .global __i64_dtos +__i64_dtos: + push {r4, lr} + mov r4, r1, asr #31 @ save sign in r4 + @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2 + @ note: 1023 + 52 = 1075 = 1024 + 51 + @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21 + mov r2, r1, lsl #1 + mov r2, r2, lsr #21 + sub r2, r2, #51 + sub r2, r2, #1024 + @ check range of exponent + cmn r2, #52 @ if EXP < -52, |double| is < 1.0 + blt 1f + cmp r2, #11 @ if EXP >= 63 - 52, |double| is >= 2^63 + bge 2f + @ extract true mantissa + bic r1, r1, #0xFF000000 + bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000 + orr r1, r1, #0x00100000 @ HI |= 0x00100000 + @ shift it appropriately + cmp r2, #0 + blt 3f + bl __i64_shl @ if EXP >= 0, shift left by EXP + b 4f +3: rsb r2, r2, #0 + bl __i64_shr @ otherwise, shift right by -EXP + @ apply sign to result +4: eor r0, r0, r4 + eor r1, r1, r4 + subs r0, r0, r4 + sbc r1, r1, r4 + pop {r4, lr} + bx lr + @ special cases +1: mov r0, #0 @ result is 0 + mov r1, #0 + pop {r4, lr} + bx lr +2: cmp r4, #0 + blt 6f + mvn r0, #0 @ result is 0x7F....FF (MAX_SINT) + mov r1, r0, lsr #1 + pop {r4, lr} + bx lr +6: mov r0, #0 @ result is 0x80....00 (MIN_SINT) + mov r1, #0x80000000 + pop {r4, lr} + bx lr + .type __i64_dtos, %function + .size __i64_dtos, . - __i64_dtos + diff --git a/runtime/ia32/int64.s b/runtime/ia32/int64.s new file mode 100644 index 0000000..8fd8151 --- /dev/null +++ b/runtime/ia32/int64.s @@ -0,0 +1,471 @@ +# ***************************************************************** +# +# The Compcert verified compiler +# +# Xavier Leroy, INRIA Paris-Rocquencourt +# +# Copyright (c) 2013 Institut National de Recherche en Informatique et +# en Automatique. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# ********************************************************************* + +# Helper functions for 64-bit integer arithmetic. IA32 version. + + .text + +# Opposite + + .globl __i64_neg + .balign 16 +__i64_neg: + movl 4(%esp), %eax + movl 8(%esp), %edx + negl %eax + adcl $0, %edx + negl %edx + ret + .type __i64_neg, @function + .size __i64_neg, . - __i64_neg + +# Addition + + .globl __i64_add + .balign 16 +__i64_add: + movl 4(%esp), %eax + movl 8(%esp), %edx + addl 12(%esp), %eax + adcl 16(%esp), %edx + ret + .type __i64_add, @function + .size __i64_add, . - __i64_add + +# Subtraction + + .globl __i64_sub + .balign 16 +__i64_sub: + movl 4(%esp), %eax + movl 8(%esp), %edx + subl 12(%esp), %eax + sbbl 16(%esp), %edx + ret + .type __i64_sub, @function + .size __i64_sub, . - __i64_sub + +# Multiplication + + .globl __i64_mul + .balign 16 +__i64_mul: + movl 4(%esp), %eax + mull 12(%esp) # edx:eax = xlo * ylo + movl 4(%esp), %ecx + imull 16(%esp), %ecx # ecx = xlo * yhi + addl %ecx, %edx + movl 12(%esp), %ecx # ecx = xhi * ylo + imull 8(%esp), %ecx + addl %ecx, %edx + ret + .type __i64_mul, @function + .size __i64_mul, . - __i64_mul + +# Division and remainder + +# Auxiliary function, not exported. +# Input: 20(esp), 24(esp) is dividend N +# 28(esp), 32(esp) is divisor D +# Output: esi:edi is quotient Q +# eax:edx is remainder R +# ebp is preserved + + .balign 16 +__i64_udivmod: + cmpl $0, 32(%esp) # single-word divisor? (DH = 0) + jne 1f + # Special case 64 bits divided by 32 bits + movl 28(%esp), %ecx # divide NH by DL + movl 24(%esp), %eax # (will trap if D = 0) + xorl %edx, %edx + divl %ecx # eax = quotient, edx = remainder + movl %eax, %edi # high word of quotient in edi + movl 20(%esp), %eax # divide rem : NL by DL + divl %ecx # eax = quotient, edx = remainder + movl %eax, %esi # low word of quotient in esi */ + movl %edx, %eax # low word of remainder in eax + xorl %edx, %edx # high word of remainder is 0, in edx + ret + # The general case +1: movl 28(%esp), %ecx # esi:ecx = D + movl 32(%esp), %esi + movl 20(%esp), %eax # edx:eax = N + movl 24(%esp), %edx + # Scale D and N down, giving D' and N', until D' fits in 32 bits +2: shrl $1, %esi # shift D' right by one + rcrl $1, %ecx + shrl $1, %edx # shift N' right by one + rcrl $1, %eax + testl %esi, %esi # repeat until D'H = 0 + jnz 2b + # Divide N' by D' to get an approximate quotient + divl %ecx # eax = quotient, edx = remainder + movl %eax, %esi # save tentative quotient Q in esi + # Check for off by one quotient + # Compute Q * D +3: movl 32(%esp), %ecx + imull %esi, %ecx # ecx = Q * DH + movl 28(%esp), %eax + mull %esi # edx:eax = Q * DL + add %ecx, %edx # edx:eax = Q * D + jc 5f # overflow in addition means Q is too high + # Compare Q * D with N, computing the remainder in the process + movl %eax, %ecx + movl 20(%esp), %eax + subl %ecx, %eax + movl %edx, %ecx + movl 24(%esp), %edx + sbbl %ecx, %edx # edx:eax = N - Q * D + jnc 4f # no carry: N >= Q * D, we are fine + decl %esi # carry: N < Q * D, adjust Q down by 1 + addl 28(%esp), %eax # and remainder up by D + adcl 32(%esp), %edx + # Finished +4: xorl %edi, %edi # high half of quotient is 0 + ret + # Special case when Q * D overflows +5: decl %esi # adjust Q down by 1 + jmp 3b # and redo check & computation of remainder + +# Unsigned division + + .globl __i64_udiv + .balign 16 +__i64_udiv: + pushl %ebp + pushl %esi + pushl %edi + call __i64_udivmod + movl %esi, %eax + movl %edi, %edx + popl %edi + popl %esi + popl %ebp + ret + .type __i64_udiv, @function + .size __i64_udiv, . - __i64_udiv + +# Unsigned remainder + + .globl __i64_umod + .balign 16 +__i64_umod: + pushl %ebp + pushl %esi + pushl %edi + call __i64_udivmod + popl %edi + popl %esi + popl %ebp + ret + .type __i64_umod, @function + .size __i64_umod, . - __i64_umod + +# Signed division + + .globl __i64_sdiv + .balign 16 +__i64_sdiv: + pushl %ebp + pushl %esi + pushl %edi + movl 20(%esp), %esi # esi = NH + movl %esi, %ebp # save sign of N in ebp + testl %esi, %esi + jge 1f # if N < 0, + negl 16(%esp) # N = -N + adcl $0, %esi + negl %esi + movl %esi, 20(%esp) +1: movl 28(%esp), %esi # esi = DH + xorl %esi, %ebp # sign of result in ebp + testl %esi, %esi + jge 2f # if D < 0, + negl 24(%esp) # D = -D + adcl $0, %esi + negl %esi + movl %esi, 28(%esp) +2: call __i64_udivmod + testl %ebp, %ebp # apply sign to result + jge 3f + negl %esi + adcl $0, %edi + negl %edi +3: movl %esi, %eax + movl %edi, %edx + popl %edi + popl %esi + popl %ebp + ret + .type __i64_sdiv, @function + .size __i64_sdiv, . - __i64_sdiv + +# Signed remainder + + .globl __i64_smod + .balign 16 +__i64_smod: + pushl %ebp + pushl %esi + pushl %edi + movl 20(%esp), %esi # esi = NH + movl %esi, %ebp # save sign of result in ebp + testl %esi, %esi + jge 1f # if N < 0, + negl 16(%esp) # N = -N + adcl $0, %esi + negl %esi + movl %esi, 20(%esp) +1: movl 28(%esp), %esi # esi = DH + testl %esi, %esi + jge 2f # if D < 0, + negl 24(%esp) # D = -D + adcl $0, %esi + negl %esi + movl %esi, 28(%esp) +2: call __i64_udivmod + testl %ebp, %ebp # apply sign to result + jge 3f + negl %eax + adcl $0, %edx + negl %edx +3: popl %edi + popl %esi + popl %ebp + ret + .type __i64_sdiv, @function + .size __i64_sdiv, . - __i64_sdiv + +# Note on shifts: +# IA32 shift instructions treat their amount (in %cl) modulo 32 + +# Shift left + + .globl __i64_shl + .balign 16 +__i64_shl: + movl 12(%esp), %ecx # ecx = shift amount, treated mod 64 + testb $32, %cl + jne 1f + # shift amount < 32 + movl 4(%esp), %eax + movl 8(%esp), %edx + shldl %cl, %eax, %edx # edx = high(XH:XL << amount) + shll %cl, %eax # eax = XL << amount + ret + # shift amount >= 32 +1: movl 4(%esp), %edx + shll %cl, %edx # edx = XL << (amount - 32) + xorl %eax, %eax # eax = 0 + ret + .type __i64_shl, @function + .size __i64_shl, . - __i64_shl + +# Shift right unsigned + + .globl __i64_shr + .balign 16 +__i64_shr: + movl 12(%esp), %ecx # ecx = shift amount, treated mod 64 + testb $32, %cl + jne 1f + # shift amount < 32 + movl 4(%esp), %eax + movl 8(%esp), %edx + shrdl %cl, %edx, %eax # eax = low(XH:XL >> amount) + shrl %cl, %edx # edx = XH >> amount + ret + # shift amount >= 32 +1: movl 8(%esp), %eax + shrl %cl, %eax # eax = XH >> (amount - 32) + xorl %edx, %edx # edx = 0 + ret + .type __i64_shr, @function + .size __i64_shr, . - __i64_shr + +# Shift right signed + + .globl __i64_sar + .balign 16 +__i64_sar: + movl 12(%esp), %ecx # ecx = shift amount, treated mod 64 + testb $32, %cl + jne 1f + # shift amount < 32 + movl 4(%esp), %eax + movl 8(%esp), %edx + shrdl %cl, %edx, %eax # eax = low(XH:XL >> amount) + sarl %cl, %edx # edx = XH >> amount (signed) + ret + # shift amount >= 32 +1: movl 8(%esp), %eax + movl %eax, %edx + sarl %cl, %eax # eax = XH >> (amount - 32) + sarl $31, %edx # edx = sign of X + ret + .type __i64_sar, @function + .size __i64_sar, . - __i64_sar + +# Unsigned comparison + + .globl __i64_ucmp + .balign 16 +__i64_ucmp: + movl 8(%esp), %eax # compare high words + cmpl 16(%esp), %eax + jne 1f # if high words equal, + movl 4(%esp), %eax # compare low words + cmpl 12(%esp), %eax +1: seta %al # AL = 1 if >, 0 if <= + setb %dl # DL = 1 if <, 0 if >= + subb %dl, %al # AL = 0 if same, 1 if >, -1 if < + movsbl %al, %eax + ret + .type __i64_ucmp, @function + .size __i64_ucmp, . - __i64_ucmp + +# Signed comparison + + .globl __i64_scmp + .balign 16 +__i64_scmp: + movl 8(%esp), %eax # compare high words (signed) + cmpl 16(%esp), %eax + je 1f # if different, + setg %al # extract result + setl %dl + subb %dl, %al + movsbl %al, %eax + ret +1: movl 4(%esp), %eax # if high words equal, + cmpl 12(%esp), %eax # compare low words (unsigned) + seta %al # and extract result + setb %dl + subb %dl, %al + movsbl %al, %eax + ret + .type __i64_scmp, @function + .size __i64_scmp, . - __i64_scmp + +# Conversion signed long -> float + + .globl __i64_stod + .balign 16 +__i64_stod: + fildll 4(%esp) + ret + .type __i64_stod, @function + .size __i64_stod, . - __i64_stod + +# Conversion unsigned long -> float + + .globl __i64_utod + .balign 16 +__i64_utod: + fildll 4(%esp) # convert as if signed + cmpl $0, 8(%esp) # is argument >= 2^63? + jns 1f + fadds LC1 # adjust by 2^64 +1: ret + .type __i64_stod, @function + .size __i64_stod, . - __i64_stod + + .balign 4 +LC1: .long 0x5f800000 # 2^64 in single precision + +# Conversion float -> signed long + + .globl __i64_dtos + .balign 16 +__i64_dtos: + subl $4, %esp + # Change rounding mode to "round towards zero" + fnstcw 0(%esp) + movw 0(%esp), %ax + movb $12, %ah + movw %ax, 2(%esp) + fldcw 2(%esp) + # Convert + fldl 8(%esp) + fistpll 8(%esp) + # Restore rounding mode + fldcw 0(%esp) + # Load result in edx:eax + movl 8(%esp), %eax + movl 12(%esp), %edx + addl $4, %esp + ret + .type __i64_dtos, @function + .size __i64_dtos, . - __i64_dtos + +# Conversion float -> unsigned long + + .globl __i64_dtou + .balign 16 +__i64_dtou: + subl $4, %esp + # Change rounding mode to "round towards zero" + fnstcw 0(%esp) + movw 0(%esp), %ax + movb $12, %ah + movw %ax, 2(%esp) + fldcw 2(%esp) + # Compare argument with 2^63 + fldl (4+4)(%esp) + flds LC2 + fucomp + fnstsw %ax + sahf + jbe 1f # branch if not (ARG < 2^63) + # Argument < 2^63: convert as is + fistpll 8(%esp) + movl 8(%esp), %eax + movl 12(%esp), %edx + jmp 2f + # Argument > 2^63: offset ARG by -2^63, then convert, then offset RES by 2^63 +1: fsubs LC2 + fistpll 8(%esp) + movl 8(%esp), %eax + movl 12(%esp), %edx + addl $0x80000000, %edx + # Restore rounding mode +2: fldcw 0(%esp) + addl $4, %esp + ret + .type __i64_dtou, @function + .size __i64_dtou, . - __i64_dtou + + .balign 4 +LC2: .long 0x5f000000 # 2^63 in single precision diff --git a/runtime/powerpc/int64.s b/runtime/powerpc/int64.s new file mode 100644 index 0000000..34b65b5 --- /dev/null +++ b/runtime/powerpc/int64.s @@ -0,0 +1,492 @@ +# ***************************************************************** +# +# The Compcert verified compiler +# +# Xavier Leroy, INRIA Paris-Rocquencourt +# +# Copyright (c) 2013 Institut National de Recherche en Informatique et +# en Automatique. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# ********************************************************************* + +# Helper functions for 64-bit integer arithmetic. PowerPC version. + +# Calling conventions for R = F(X) or R = F(X,Y): +# one or two long arguments: XH in r3, XL in r4, YH in r5, YL in r6 +# one long argument, one int: XH in r3, XL in r4, Y in r5 +# one float argument: X in f1 +# one long result: RH in r3, RL in r4 +# one float result: R in f1 +# This is a big-endian convention: the high word is in the low-numbered register +# Can use r3...r12 and f0...r13 as temporary registers (caller-save) + + .text + +### Opposite + + .balign 16 + .globl __i64_neg +__i64_neg: + subfic r4, r4, 0 # RL = -XL and set borrow iff XL != 0 + subfze r3, r3 # RH = -XH - borrow + blr + .type __i64_neg, @function + .size __i64_neg, .-__i64_neg + +### Addition + + .balign 16 + .globl __i64_add +__i64_add: + addc r4, r4, r6 # RL = XL + YL and set carry if overflow + adde r3, r3, r5 # RH = XH + YH + carry + blr + .type __i64_add, @function + .size __i64_add, .-__i64_add + +### Subtraction + + .balign 16 + .globl __i64_sub +__i64_sub: + subfc r4, r6, r4 # RL = XL - YL and set borrow if underflow + subfe r3, r5, r3 # RH = XH - YH - borrow + blr + .type __i64_sub, @function + .size __i64_sub, .-__i64_sub + +### Multiplication + + .balign 16 + .globl __i64_mul +__i64_mul: + # Form intermediate products + mulhwu r7, r4, r6 # r7 = high half of XL * YL + mullw r8, r3, r6 # r8 = low half of XH * YL + mullw r9, r4, r5 # r9 = low half of XL * YH + mullw r4, r4, r6 # r4 = low half of XL * YL = low half of result + # Reconstruct high half of result + add r3, r7, r8 + add r3, r3, r9 + blr + .type __i64_mul, @function + .size __i64_mul, .-__i64_mul + +### Helper function for division and modulus. Not exported. +# Input: numerator N in (r3,r4), divisor D in (r5,r6) +# Output: quotient Q in (r7,r8), remainder R in (r3,r4) + .balign 16 +__i64_udivmod: + # Set up quotient and mask + li r8, 0 # Q = 0 + li r7, 0 + li r10, 1 # M = 1 + li r9, 0 + # Check for zero divisor + or. r0, r6, r5 + beqlr # return with unspecified quotient & remainder + # Scale divisor and mask +1: cmpwi r5, 0 # while top bit of D is zero... + blt 2f + subfc r0, r6, r4 # compute borrow out of N - D + subfe r0, r5, r3 + subfe. r0, r0, r0 # EQ iff no borrow iff N >= D + bne 2f # ... and while N >= D ... + addc r6, r6, r6 # scale divisor: D = D << 1 + adde r5, r5, r5 + addc r10, r10, r10 # scale mask: M = M << 1 + adde r9, r9, r9 + b 1b # end while + # Long division +2: subfc r4, r6, r4 # Q = Q | M, N = N - D, and compute borrow + or r8, r8, r10 + subfe r3, r5, r3 + or r7, r7, r9 + subfe. r0, r0, r0 # test borrow + beq 3f # no borrow: N >= D, continue + addc r4, r4, r6 # borrow: undo what we just did to N and Q + andc r8, r8, r10 + adde r3, r3, r5 + andc r7, r7, r9 +3: slwi r0, r9, 31 # unscale mask: M = M >> 1 + srwi r10, r10, 1 + or r10, r10, r0 + srwi r9, r9, 1 + slwi r0, r5, 31 # unscale divisor: D = D >> 1 + srwi r6, r6, 1 + or r6, r6, r0 + srwi r5, r5, 1 + or. r0, r10, r9 # iterate while M != 0 + bne 2b + blr + +### Unsigned division + + .balign 16 + .globl __i64_udiv +__i64_udiv: + mflr r11 # save return address in r11 + bl __i64_udivmod # unsigned divide + mtlr r11 # restore return address + mr r3, r7 # R = quotient + mr r4, r8 + blr + .type __i64_udiv, @function + .size __i64_udiv, .-__i64_udiv + +### Unsigned remainder + + .balign 16 + .globl __i64_umod +__i64_umod: + mflr r11 + bl __i64_udivmod + mtlr r11 + blr # remainder is already in R + .type __i64_umod, @function + .size __i64_umod, .-__i64_umod + +### Signed division + + .balign 16 + .globl __i64_sdiv +__i64_sdiv: + mflr r11 # save return address + xor r12, r3, r5 # save sign of result in r12 (top bit) + srawi r0, r3, 31 # take absolute value of N + xor r4, r4, r0 # (i.e. N = N ^ r0 - r0, + xor r3, r3, r0 # where r0 = 0 if N >= 0 and r0 = -1 if N < 0) + subfc r4, r0, r4 + subfe r3, r0, r3 + srawi r0, r5, 31 # take absolute value of D + xor r6, r6, r0 # (same trick) + xor r5, r5, r0 + subfc r6, r0, r6 + subfe r5, r0, r5 + bl __i64_udivmod # do unsigned division + mtlr r11 # restore return address + srawi r0, r12, 31 # apply expected sign to quotient + xor r8, r8, r0 # RES = Q if r12 >= 0, -Q if r12 < 0 + xor r7, r7, r0 + subfc r4, r0, r8 + subfe r3, r0, r7 + blr + .type __i64_sdiv, @function + .size __i64_sdiv, .-__i64_sdiv + +## Signed remainder + + .balign 16 + .globl __i64_smod +__i64_smod: + mflr r11 # save return address + srawi r12, r3, 31 # save sign of result in r12 (sign of N) + xor r4, r4, r12 # and take absolute value of N + xor r3, r3, r12 + subfc r4, r12, r4 + subfe r3, r12, r3 + srawi r0, r5, 31 # take absolute value of D + xor r6, r6, r0 + xor r5, r5, r0 + subfc r6, r0, r6 + subfe r5, r0, r5 + bl __i64_udivmod # do unsigned division + mtlr r11 # restore return address + xor r4, r4, r12 # apply expected sign to remainder + xor r3, r3, r12 # RES = R if r12 == 0, -R if r12 == -1 + subfc r4, r12, r4 + subfe r3, r12, r3 + blr + .type __i64_smod, @function + .size __i64_smod, .-__i64_smod + +### Unsigned comparison + + .balign 16 + .globl __i64_ucmp +__i64_ucmp: + cmplw cr0, r3, r5 # compare high words (unsigned) + cmplw cr1, r4, r6 # compare low words (unsigned) + mfcr r0 +# At this point, the bits of r0 are as follow: +# bit 31: XH < YH +# bit 30: XH > YH +# bit 27: XL > YL +# bit 26: XL < YL + rlwinm r3, r0, 0, 0, 1 # r3 = r0 & 0xC000_0000 + srawi r3, r3, 24 # r4 = r4 >>s 28 +# r3 = -0x80 if XH < YH +# = 0x40 if XH > YH +# = 0 if XH = YH + rlwinm r4, r0, 4, 0, 1 # r4 = (r0 << 4) & 0xC000_0000 + srawi r4, r4, 28 # r4 = r4 >>s 28 +# r4 = -8 if XL < YL +# = 4 if XL > YL +# = 0 if XL = YL + add r3, r3, r4 +# r3 = -0x80 or -0x80 - 8 or -0x80 + 4 or -8 if X < Y +# (in all cases, r3 < 0 if X < Y) +# = 0x40 or 0x40 - 8 or 0x40 + 4 or 4 if X > Y +# (in all cases, r3 > 0 if X > Y) +# = 0 if X = Y + blr + .type __i64_ucmp, @function + .size __i64_ucmp, .-__i64_ucmp + +### Signed comparison + + .balign 16 + .globl __i64_scmp +__i64_scmp: + cmpw cr0, r3, r5 # compare high words (signed) + cmplw cr1, r4, r6 # compare low words (unsigned) + mfcr r0 +# Same trick as in __i64_ucmp + rlwinm r3, r0, 0, 0, 1 + srawi r3, r3, 24 + rlwinm r4, r0, 4, 0, 1 + srawi r4, r4, 28 + add r3, r3, r4 + blr + .type __i64_scmp, @function + .size __i64_scmp, .-__i64_scmp + +### Shifts + +# On PowerPC, shift instructions with amount mod 64 >= 32 return 0 + + .balign 16 + .globl __i64_shl +__i64_shl: +# hi = (hi << amount) | (lo >> (32 - amount)) | (lo << (amount - 32)) +# lo = lo << amount +# if 0 <= amount < 32: +# (amount - 32) mod 64 >= 32, hence lo << (amount - 32) == 0 +# if 32 <= amount < 64: +# lo << amount == 0 +# (32 - amount) mod 64 >= 32, hence lo >> (32 - amount) == 0 + andi. r5, r5, 63 # take amount modulo 64 + subfic r6, r5, 32 # r6 = 32 - amount + addi r7, r5, -32 # r7 = amount - 32 + slw r3, r3, r5 + srw r0, r4, r6 + or r3, r3, r0 + slw r0, r4, r7 + or r3, r3, r0 + slw r4, r4, r5 + blr + .type __i64_shl, @function + .size __i64_shl, .-__i64_shl + + .balign 16 + .globl __i64_shr +__i64_shr: +# lo = (lo >> amount) | (hi << (32 - amount)) | (hi >> (amount - 32)) +# hi = hi >> amount +# if 0 <= amount < 32: +# (amount - 32) mod 64 >= 32, hence hi >> (amount - 32) == 0 +# if 32 <= amount < 64: +# hi >> amount == 0 +# (32 - amount) mod 64 >= 32, hence hi << (32 - amount) == 0 + andi. r5, r5, 63 # take amount modulo 64 + subfic r6, r5, 32 # r6 = 32 - amount + addi r7, r5, -32 # r7 = amount - 32 + srw r4, r4, r5 + slw r0, r3, r6 + or r4, r4, r0 + srw r0, r3, r7 + or r4, r4, r0 + srw r3, r3, r5 + blr + .type __i64_shr, @function + .size __i64_shr, .-__i64_shr + + .balign 16 + .globl __i64_sar +__i64_sar: + andi. r5, r5, 63 # take amount modulo 64 + cmpwi r5, 32 + bge 1f # amount < 32? + subfic r6, r5, 32 # r6 = 32 - amount + srw r4, r4, r5 # RH = XH >>s amount + slw r0, r3, r6 # RL = XL >>u amount | XH << (32 - amount) + or r4, r4, r0 + sraw r3, r3, r5 + blr +1: addi r6, r5, -32 # amount >= 32 + sraw r4, r3, r6 # RL = XH >>s (amount - 32) + srawi r3, r3, 31 # RL = sign extension of XH + blr + .type __i64_sar, @function + .size __i64_sar, .-__i64_sar + +### Conversion from unsigned long to double float + + .balign 16 + .globl __i64_utod +__i64_utod: + addi r1, r1, -16 + lis r5, 0x4330 + li r6, 0 + stw r5, 0(r1) + stw r4, 4(r1) + stw r5, 8(r1) + stw r6, 12(r1) + lfd f1, 0(r1) + lfd f2, 8(r1) + fsub f1, f1, f2 # f1 is XL as a double + lis r5, 0x4530 + stw r5, 0(r1) + stw r3, 4(r1) + stw r5, 8(r1) + lfd f2, 0(r1) + lfd f3, 8(r1) + fsub f2, f2, f3 # f2 is XH * 2^32 as a double + fadd f1, f1, f2 # add both to get result + addi r1, r1, 16 + blr + .type __i64_utod, @function + .size __i64_utod, .-__i64_utod + +### Conversion from signed long to double float + + .balign 16 + .globl __i64_stod +__i64_stod: + addi r1, r1, -16 + lis r5, 0x4330 + li r6, 0 + stw r5, 0(r1) + stw r4, 4(r1) + stw r5, 8(r1) + stw r6, 12(r1) + lfd f1, 0(r1) + lfd f2, 8(r1) + fsub f1, f1, f2 # f1 is XL (unsigned) as a double + lis r5, 0x4530 + lis r6, 0x8000 + stw r5, 0(r1) + add r3, r3, r6 + stw r3, 4(r1) + stw r5, 8(r1) + stw r6, 12(r1) + lfd f2, 0(r1) + lfd f3, 8(r1) + fsub f2, f2, f3 # f2 is XH (signed) * 2^32 as a double + fadd f1, f1, f2 # add both to get result + addi r1, r1, 16 + blr + .type __i64_stod, @function + .size __i64_stod, .-__i64_stod + +### Conversion from double float to unsigned long + + .balign 16 + .globl __i64_dtou +__i64_dtou: + stfdu f1, -16(r1) # extract LO (r4) and HI (r3) halves of double + lwz r3, 0(r1) + lwz r4, 4(r1) + addi r1, r1, 16 + cmpwi r3, 0 # is double < 0? + blt 1f # then it converts to 0 + # extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) + rlwinm r5, r3, 12, 21, 31 + addi r5, r5, -1075 + # check range of exponent + cmpwi r5, -52 # if EXP < -52, double is < 1.0 + blt 1f + cmpwi r5, 12 # if EXP >= 64 - 52, double is >= 2^64 + bge 2f + # extract true mantissa + rlwinm r3, r3, 0, 12, 31 # HI &= ~0xFFF00000 + oris r3, r3, 0x10 # HI |= 0x00100000 + # shift it appropriately + cmpwi r5, 0 + blt 3f + b __i64_shl # if EXP >= 0, shift left by EXP +3: subfic r5, r5, 0 + b __i64_shr # if EXP < 0, shift right by -EXP + # Special cases +1: li r3, 0 # result is 0 + li r4, 0 + blr +2: li r3, -1 # result is MAX_UINT + li r4, -1 + blr + .type __i64_dtou, @function + .size __i64_dtou, .-__i64_dtou + +### Conversion from double float to signed long + + .balign 16 + .globl __i64_dtos +__i64_dtos: + stfdu f1, -16(r1) # extract LO (r4) and HI (r3) halves of double + lwz r3, 0(r1) + lwz r4, 4(r1) + addi r1, r1, 16 + srawi r10, r3, 31 # save sign of double in r10 + # extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) + rlwinm r5, r3, 12, 21, 31 + addi r5, r5, -1075 + # check range of exponent + cmpwi r5, -52 # if EXP < -52, abs(double) is < 1.0 + blt 1f + cmpwi r5, 11 # if EXP >= 63 - 52, abs(double) is >= 2^63 + bge 2f + # extract true mantissa + rlwinm r3, r3, 0, 12, 31 # HI &= ~0xFFF00000 + oris r3, r3, 0x10 # HI |= 0x00100000 + # shift it appropriately + mflr r9 # save retaddr in r9 + cmpwi r5, 0 + blt 3f + bl __i64_shl # if EXP >= 0, shift left by EXP + b 4f +3: subfic r5, r5, 0 + bl __i64_shr # if EXP < 0, shift right by -EXP + # apply sign to result +4: mtlr r9 + xor r4, r4, r10 + xor r3, r3, r10 + subfc r4, r10, r4 + subfe r3, r10, r3 + blr + # Special cases +1: li r3, 0 # result is 0 + li r4, 0 + blr +2: cmpwi r10, 0 # result is MAX_SINT or MIN_SINT + bge 5f # depending on sign + li r4, -1 # result is MAX_SINT = 0x7FFF_FFFF + srwi r3, r4, 1 + blr +5: lis r3, 0x8000 # result is MIN_SINT = 0x8000_0000 + li r4, 0 + blr + .type __i64_dtos, @function + .size __i64_dtos, .-__i64_dtos diff --git a/runtime/test/test_int64.c b/runtime/test/test_int64.c new file mode 100644 index 0000000..11adce3 --- /dev/null +++ b/runtime/test/test_int64.c @@ -0,0 +1,238 @@ +/* ************************************************************************** */ +/* */ +/* The Compcert verified compiler */ +/* */ +/* Xavier Leroy, INRIA Paris-Rocquencourt */ +/* */ +/* Copyright (c) 2013 Institut National de Recherche en Informatique et */ +/* en Automatique. */ +/* */ +/* Redistribution and use in source and binary forms, with or without */ +/* modification, are permitted provided that the following conditions are met:*/ +/* * Redistributions of source code must retain the above copyright */ +/* notice, this list of conditions and the following disclaimer. */ +/* * Redistributions in binary form must reproduce the above copyright */ +/* notice, this list of conditions and the following disclaimer in the */ +/* documentation and/or other materials provided with the distribution. */ +/* * Neither the name of the nor the */ +/* names of its contributors may be used to endorse or promote products */ +/* derived from this software without specific prior written permission.*/ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ +/* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */ +/* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */ +/* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ +/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */ +/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */ +/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING */ +/* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS */ +/* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* ************************************************************************** */ + +/* Differential testing of 64-bit integer operations */ +/* This file is to be compiled by a C compiler other than CompCert C */ + +#include +#include + +typedef unsigned long long u64; +typedef signed long long s64; + +extern s64 __i64_neg(s64 x); +extern s64 __i64_add(s64 x, s64 y); +extern s64 __i64_sub(s64 x, s64 y); +extern s64 __i64_mul(s64 x, s64 y); +extern u64 __i64_udiv(u64 x, u64 y); +extern u64 __i64_umod(u64 x, u64 y); +extern s64 __i64_sdiv(s64 x, s64 y); +extern s64 __i64_smod(s64 x, s64 y); + +extern u64 __i64_shl(u64 x, unsigned amount); +extern u64 __i64_shr(u64 x, unsigned amount); +extern s64 __i64_sar(s64 x, unsigned amount); + +extern int __i64_ucmp(u64 x, u64 y); +extern int __i64_scmp(s64 x, s64 y); + +extern double __i64_utod(u64 x); +extern double __i64_stod(s64 x); +extern u64 __i64_dtou(double d); +extern s64 __i64_dtos(double d); + +static u64 rnd64(void) +{ + static u64 seed = 0; + seed = seed * 6364136223846793005ULL + 1442695040888963407ULL; + return seed; +} + +static int error = 0; + +static void test1(u64 x, u64 y) +{ + u64 z, uy; + s64 t, sy; + int i; + double f, g; + + z = __i64_neg(x); + if (z != -x) + error++, printf("- %016llx = %016llx, expected %016llx\n", x, z, -x); + z = __i64_add(x, y); + if (z != x + y) + error++, printf("%016llx + %016llx = %016llx, expected %016llx\n", x, y, z, x + y); + + z = __i64_sub(x, y); + if (z != x - y) + error++, printf("%016llx - %016llx = %016llx, expected %016llx\n", x, y, z, x - y); + + z = __i64_mul(x, y); + if (z != x * y) + error++, printf("%016llx * %016llx = %016llx, expected %016llx\n", x, y, z, x * y); + + if (y != 0) { + + z = __i64_udiv(x, y); + if (z != x / y) + error++, printf("%llu /u %llu = %llu, expected %llu\n", x, y, z, x / y); + + z = __i64_umod(x, y); + if (z != x % y) + error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, y, z, x % y); + + } + + if (y != 0 && !(x == 0x800000000000LLU && y == -1)) { + + t = __i64_sdiv(x, y); + if (t != (s64) x / (s64) y) + error++, printf("%lld /s %lld = %lld, expected %lld\n", x, y, t, (s64) x / (s64) y); + + t = __i64_smod(x, y); + if (t != (s64) x % (s64) y) + error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, y, t, (s64) x % (s64) y); + + } + + /* Test division with small (32-bit) divisors */ + uy = y >> 32; + sy = (s64)y >> 32; + + if (uy != 0) { + + z = __i64_udiv(x, uy); + if (z != x / uy) + error++, printf("%llu /u %llu = %llu, expected %llu\n", x, uy, z, x / uy); + + z = __i64_umod(x, uy); + if (z != x % uy) + error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, uy, z, x % uy); + + } + + if (sy != 0 && !(x == 0x800000000000LLU && sy == -1)) { + + t = __i64_sdiv(x, sy); + if (t != (s64) x / sy) + error++, printf("%lld /s %lld = %lld, expected %lld\n", x, sy, t, (s64) x / sy); + + t = __i64_smod(x, sy); + if (t != (s64) x % sy) + error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, sy, t, (s64) x % sy); + + } + + i = y & 63; + + z = __i64_shl(x, i); + if (z != x << i) + error++, printf("%016llx << %d = %016llx, expected %016llx\n", x, i, z, x << i); + + z = __i64_shr(x, i); + if (z != x >> i) + error++, printf("%016llx >>u %d = %016llx, expected %016llx\n", x, i, z, x >> i); + + t = __i64_sar(x, i); + if (t != (s64) x >> i) + error++, printf("%016llx >>s %d = %016llx, expected %016llx\n", x, i, t, (s64) x >> i); + + i = __i64_ucmp(x, y); + if (x == y) { + if (! (i == 0)) + error++, printf("ucmp(%016llx, %016llx) = %d, expected 0\n", x, y, i); + } + else if (x < y) { + if (! (i < 0)) + error++, printf("ucmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i); + } else { + if (! (i > 0)) + error++, printf("ucmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i); + } + + i = __i64_scmp(x, y); + if (x == y) { + if (! (i == 0)) + error++, printf("scmp(%016llx, %016llx) = %d, expected 0\n", x, y, i); + } + else if ((s64)x < (s64)y) { + if (! (i < 0)) + error++, printf("scmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i); + } else { + if (! (i > 0)) + error++, printf("scmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i); + } + + f = __i64_utod(x); + g = (double) x; + if (f != g) + error++, printf("(double) %llu (u) = %a, expected %a\n", x, f, g); + + f = __i64_stod(x); + g = (double) (s64) x; + if (f != g) + error++, printf("(double) %lld (s) = %a, expected %a\n", x, f, g); + + f = ((double) x) * 0.0001; + z = __i64_dtou(f); + if (z != (u64) f) + error++, printf("(u64) %a = %llu, expected %llu\n", f, z, (u64) f); + + f = ((double) (s64) x) * 0.0001; + t = __i64_dtos(f); + if (t != (s64) f) + error++, printf("(s64) %a = %lld, expected %lld\n", f, z, (s64) f); +} + +#define NSPECIFIC 8 + +unsigned long long specific[NSPECIFIC] = { + 0, 1, -1, 0x7FFFFFFFULL, 0x80000000ULL, 0xFFFFFFFFULL, + 0x7FFFFFFFFFFFULL, 0x8000000000000000ULL +}; + +int main() +{ + int i, j; + + /* Some specific values */ + for (i = 0; i < NSPECIFIC; i++) + for (j = 0; j < NSPECIFIC; j++) + test1(specific[i], specific[j]); + + /* Random testing */ + for (i = 0; i < 50; i++) { + for (j = 0; j < 1000000; j++) + test1(rnd64(), rnd64()); + printf("."); fflush(stdout); + } + printf("\n"); + if (error == 0) + printf ("Test passed\n"); + else + printf ("TEST FAILED, %d error(s) detected\n", error); + return 0; +} + -- cgit v1.2.3