summaryrefslogtreecommitdiff
path: root/runtime
diff options
context:
space:
mode:
authorGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2013-04-20 07:54:52 +0000
committerGravatar xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>2013-04-20 07:54:52 +0000
commit255cee09b71255051c2b40eae0c88bffce1f6f32 (patch)
tree7951b1b13e8fd5e525b9223e8be0580e83550f55 /runtime
parent6e5041958df01c56762e90770abd704b95a36e5d (diff)
Big merge of the newregalloc-int64 branch. Lots of changes in two directions:
1- new register allocator (+ live range splitting, spilling&reloading, etc) based on a posteriori validation using the Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long long"). git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
Diffstat (limited to 'runtime')
-rw-r--r--runtime/Makefile28
-rw-r--r--runtime/arm/int64.s424
-rw-r--r--runtime/ia32/int64.s471
-rw-r--r--runtime/powerpc/int64.s492
-rw-r--r--runtime/test/test_int64.c238
5 files changed, 1643 insertions, 10 deletions
diff --git a/runtime/Makefile b/runtime/Makefile
index c128ba2..5550e6b 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -2,26 +2,34 @@ include ../Makefile.config
CFLAGS=-O1 -g -Wall
INCLUDES=
-OBJS=
+OBJS=int64.o
LIB=libcompcert.a
-ifeq ($(strip $(HAS_RUNTIME_LIB)),true)
all: $(LIB) $(INCLUDES)
-else
-all:
-endif
$(LIB): $(OBJS)
rm -f $(LIB)
ar rcs $(LIB) $(OBJS)
-clean:
+%.o: $(ARCH)/%.s
+ $(CASM) $^
+
+clean::
rm -f *.o $(LIB)
-ifeq ($(strip $(HAS_RUNTIME_LIB)),true)
install:
install -d $(LIBDIR)
install -c $(LIB) $(INCLUDES) $(LIBDIR)
-else
-install:
-endif
+
+test/test_int64: test/test_int64.c $(LIB)
+ $(CC) -o $@ test/test_int64.c $(LIB)
+
+clean::
+ rm -f test/test_int64
+
+.PHONY: test
+
+test: FORCE test/test_int64
+ test/test_int64
+
+FORCE:
diff --git a/runtime/arm/int64.s b/runtime/arm/int64.s
new file mode 100644
index 0000000..6b03351
--- /dev/null
+++ b/runtime/arm/int64.s
@@ -0,0 +1,424 @@
+@ *****************************************************************
+@
+@ The Compcert verified compiler
+@
+@ Xavier Leroy, INRIA Paris-Rocquencourt
+@
+@ Copyright (c) 2013 Institut National de Recherche en Informatique et
+@ en Automatique.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are met:
+@ * Redistributions of source code must retain the above copyright
+@ notice, this list of conditions and the following disclaimer.
+@ * Redistributions in binary form must reproduce the above copyright
+@ notice, this list of conditions and the following disclaimer in the
+@ documentation and/or other materials provided with the distribution.
+@ * Neither the name of the <organization> nor the
+@ names of its contributors may be used to endorse or promote products
+@ derived from this software without specific prior written permission.
+@
+@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+@ HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+@
+@ *********************************************************************
+
+@ Helper functions for 64-bit integer arithmetic. ARM version.
+
+@ Calling conventions for R = F(X) or R = F(X,Y):
+@ one or two long arguments: XL in r0, XH in r1, YL in r2, YH in r3
+@ one long argument, one int: XL in r0, XH in r1, Y in r2
+@ one float argument: X in r0, r1
+@ one long result: RL in r0, RH in r1
+@ one float result: R in r0, r1
+@ This is a little-endian convention: the low word is in the
+@ low-numbered register.
+@ Can use r0...r3 and f0...f7 as temporary registers (caller-save)
+
+ .text
+
+@@@ Unsigned comparison
+
+ .global __i64_ucmp
+__i64_ucmp:
+ cmp r1, r3 @ compare high words
+ cmpeq r0, r2 @ if equal, compare low words instead
+ moveq r0, #0 @ res = 0 if eq
+ movhi r0, #1 @ res = 1 if unsigned higher
+ mvnlo r0, #0 @ res = -1 if unsigned lower
+ bx lr
+ .type __i64_ucmp, %function
+ .size __i64_ucmp, . - __i64_ucmp
+
+@@@ Signed comparison
+
+ .global __i64_scmp
+__i64_scmp:
+ cmp r0, r2 @ compare low words (unsigned)
+ moveq r0, #0 @ res = 0 if eq
+ movhi r0, #1 @ res = 1 if unsigned higher
+ mvnlo r0, #0 @ res = -1 if unsigned lower
+ cmp r1, r3 @ compare high words (signed)
+ addgt r0, r0, #2 @ res += 2 if signed greater
+ sublt r0, r0, #2 @ res -= 2 if signed less
+ @ here, r0 = 0 if X == Y
+ @ r0 = -3, -2, -1 if X < Y
+ @ r0 = 1, 2, 3 if X > Y
+ bx lr
+ .type __i64_scmp, %function
+ .size __i64_scmp, . - __i64_scmp
+
+@@@ Opposite
+
+ .global __i64_neg
+__i64_neg:
+ rsbs r0, r0, #0
+ rsc r1, r1, #0
+ bx lr
+ .type __i64_neg, %function
+ .size __i64_neg, . - __i64_neg
+
+@@@ Addition
+
+ .global __i64_add
+__i64_add:
+ adds r0, r0, r2
+ adc r1, r1, r3
+ bx lr
+ .type __i64_add, %function
+ .size __i64_add, . - __i64_add
+
+@@@ Subtraction
+
+ .global __i64_sub
+__i64_sub:
+ subs r0, r0, r2
+ sbc r1, r1, r3
+ bx lr
+ .type __i64_sub, %function
+ .size __i64_sub, . - __i64_sub
+
+@ Note on ARM shifts: the shift amount is taken modulo 256.
+@ Therefore, unsigned shifts by 32 bits or more produce 0.
+
+@@@ Shift left
+
+ .global __i64_shl
+__i64_shl:
+ and r2, r2, #63 @ normalize amount to 0...63
+ rsbs r3, r2, #32 @ r3 = 32 - amount
+ ble 1f @ branch if <= 0, namely if amount >= 32
+ mov r1, r1, lsl r2
+ orr r1, r0, lsr r3
+ mov r0, r0, lsl r2
+ bx lr
+1:
+ sub r2, r2, #32
+ mov r1, r0, lsl r2
+ mov r0, #0
+ bx lr
+ .type __i64_shl, %function
+ .size __i64_shl, . - __i64_shl
+
+@@@ Shift right unsigned
+
+ .global __i64_shr
+__i64_shr:
+ and r2, r2, #63 @ normalize amount to 0...63
+ rsbs r3, r2, #32 @ r3 = 32 - amount
+ ble 1f @ branch if <= 0, namely if amount >= 32
+ mov r0, r0, lsr r2
+ orr r0, r1, lsl r3
+ mov r1, r1, lsr r2
+ bx lr
+1:
+ sub r2, r2, #32
+ mov r0, r1, lsr r2
+ mov r1, #0
+ bx lr
+ .type __i64_shr, %function
+ .size __i64_shr, . - __i64_shr
+
+@@@ Shift right signed
+
+ .global __i64_sar
+__i64_sar:
+ and r2, r2, #63 @ normalize amount to 0...63
+ rsbs r3, r2, #32 @ r3 = 32 - amount
+ ble 1f @ branch if <= 0, namely if amount >= 32
+ mov r0, r0, lsr r2
+ orr r0, r1, lsl r3
+ mov r1, r1, asr r2
+ bx lr
+1:
+ sub r2, r2, #32
+ mov r0, r1, asr r2
+ mov r1, r1, asr #31
+ bx lr
+ .type __i64_sar, %function
+ .size __i64_sar, . - __i64_sar
+
+@@@ Multiplication
+
+ .global __i64_mul
+__i64_mul:
+ push {r4, r5}
+ mov r4, r0 @ save first arg in r4,r5
+ mov r5, r1
+ umull r0, r1, r2, r4 @ 64-bit product of low halves
+ mla r1, r2, r5, r1 @ add 32-bit products low half * high half
+ mla r1, r3, r4, r1 @ to high half of result
+ pop {r4, r5}
+ bx lr
+ .type __i64_mul, %function
+ .size __i64_mul, . - __i64_mul
+
+@@@ Auxiliary function for division and modulus. Not exported.
+
+@ On entry: N = (r0, r1) numerator D = (r2, r3) divisor
+@ On exit: Q = (r4, r5) quotient R = (r0, r1) remainder
+@ Locals: M = (r6, r7) mask TMP = r8 temporary
+
+__i64_udivmod:
+ orrs r8, r2, r3 @ is D == 0?
+ bxeq lr @ if so, return with unspecified results
+ mov r4, #0 @ Q = 0
+ mov r5, #0
+ mov r6, #1 @ M = 1
+ mov r7, #0
+1: cmp r3, #0 @ while ((signed) D >= 0) ...
+ blt 2f
+ subs r8, r0, r2 @ ... and N >= D ...
+ sbcs r8, r1, r3
+ blo 2f
+ adds r2, r2, r2 @ D = D << 1
+ adc r3, r3, r3
+ adds r6, r6, r6 @ M = M << 1
+ adc r7, r7, r7
+ b 1b
+2: subs r0, r0, r2 @ N = N - D
+ sbcs r1, r1, r3
+ orr r4, r4, r6 @ Q = Q | M
+ orr r5, r5, r7
+ bhs 3f @ if N was >= D, continue
+ adds r0, r0, r2 @ otherwise, undo what we just did
+ adc r1, r1, r3 @ N = N + D
+ bic r4, r4, r6 @ Q = Q & ~M
+ bic r5, r5, r7
+3: movs r7, r7, lsr #1 @ M = M >> 1
+ mov r6, r6, rrx
+ movs r3, r3, lsr #1 @ D = D >> 1
+ mov r2, r2, rrx
+ orrs r8, r6, r7 @ repeat while (M != 0) ...
+ bne 2b
+ bx lr
+
+@@@ Unsigned division
+
+ .global __i64_udiv
+__i64_udiv:
+ push {r4, r5, r6, r7, r8, lr}
+ bl __i64_udivmod
+ mov r0, r4
+ mov r1, r5
+ pop {r4, r5, r6, r7, r8, lr}
+ bx lr
+ .type __i64_udiv, %function
+ .size __i64_udiv, . - __i64_udiv
+
+@@@ Unsigned modulus
+
+ .global __i64_umod
+__i64_umod:
+ push {r4, r5, r6, r7, r8, lr}
+ bl __i64_udivmod @ remainder is already in r0,r1
+ pop {r4, r5, r6, r7, r8, lr}
+ bx lr
+ .type __i64_umod, %function
+ .size __i64_umod, . - __i64_umod
+
+ .global __i64_sdiv
+__i64_sdiv:
+ push {r4, r5, r6, r7, r8, r10, lr}
+ eor r10, r1, r3 @ r10 = sign of result
+ mov r4, r1, asr #31 @ take absolute value of N
+ eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
+ eor r1, r1, r4
+ subs r0, r0, r4
+ sbc r1, r1, r4
+ mov r4, r3, asr #31 @ take absolute value of D
+ eor r2, r2, r4
+ eor r3, r3, r4
+ subs r2, r2, r4
+ sbc r3, r3, r4
+ bl __i64_udivmod @ do unsigned division
+ mov r0, r4
+ mov r1, r5
+ eor r0, r0, r10, asr#31 @ apply expected sign
+ eor r1, r1, r10, asr#31
+ subs r0, r0, r10, asr#31
+ sbc r1, r1, r10, asr#31
+ pop {r4, r5, r6, r7, r8, r10, lr}
+ bx lr
+ .type __i64_sdiv, %function
+ .size __i64_sdiv, . - __i64_sdiv
+
+@@@ Signed modulus
+
+ .global __i64_smod
+__i64_smod:
+ push {r4, r5, r6, r7, r8, r10, lr}
+ mov r10, r1 @ r10 = sign of result
+ mov r4, r1, asr#31 @ take absolute value of N
+ eor r0, r0, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
+ eor r1, r1, r4
+ subs r0, r0, r4
+ sbc r1, r1, r4
+ mov r4, r3, asr #31 @ take absolute value of D
+ eor r2, r2, r4
+ eor r3, r3, r4
+ subs r2, r2, r4
+ sbc r3, r3, r4
+ bl __i64_udivmod @ do unsigned division
+ eor r0, r0, r10, asr#31 @ apply expected sign
+ eor r1, r1, r10, asr#31
+ subs r0, r0, r10, asr#31
+ sbc r1, r1, r10, asr#31
+ pop {r4, r5, r6, r7, r8, r10, lr}
+ bx lr
+ .type __i64_smod, %function
+ .size __i64_smod, . - __i64_smod
+
+@@@ Conversion from unsigned 64-bit integer to double float
+
+ .global __i64_utod
+__i64_utod:
+ fmsr s0, r0
+ fuitod d0, s0 @ convert low half to double (unsigned)
+ fmsr s2, r1
+ fuitod d1, s2 @ convert high half to double (unsigned)
+ fldd d2, .LC1 @ d2 = 2^32
+ fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
+ fmrrd r0, r1, d0 @ return result in r0, r1
+ bx lr
+ .type __i64_utod, %function
+ .size __i64_utod, . - __i64_utod
+
+ .balign 8
+.LC1: .quad 0x41f0000000000000 @ 2^32 in double precision
+
+@@@ Conversion from signed 64-bit integer to double float
+
+ .global __i64_stod
+__i64_stod:
+ fmsr s0, r0
+ fuitod d0, s0 @ convert low half to double (unsigned)
+ fmsr s2, r1
+ fsitod d1, s2 @ convert high half to double (signed)
+ fldd d2, .LC1 @ d2 = 2^32
+ fmacd d0, d1, d2 @ d0 = d0 + d1 * d2 = double value of int64
+ fmrrd r0, r1, d0 @ return result in r0, r1
+ bx lr
+ .type __i64_stod, %function
+ .size __i64_stod, . - __i64_stod
+
+@@@ Conversion from double float to unsigned 64-bit integer
+
+ .global __i64_dtou
+__i64_dtou:
+ cmp r1, #0 @ is double < 0 ?
+ blt 1f @ then it converts to 0
+ @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+ @ note: 1023 + 52 = 1075 = 1024 + 51
+ @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+ mov r2, r1, lsl #1
+ mov r2, r2, lsr #21
+ sub r2, r2, #51
+ sub r2, r2, #1024
+ @ check range of exponent
+ cmn r2, #52 @ if EXP < -52, double is < 1.0
+ blt 1f
+ cmp r2, #12 @ if EXP >= 64 - 52, double is >= 2^64
+ bge 2f
+ @ extract true mantissa
+ bic r1, r1, #0xFF000000
+ bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+ orr r1, r1, #0x00100000 @ HI |= 0x00100000
+ @ shift it appropriately
+ cmp r2, #0
+ bge __i64_shl @ if EXP >= 0, shift left by EXP
+ rsb r2, r2, #0
+ b __i64_shr @ otherwise, shift right by -EXP
+ @ special cases
+1: mov r0, #0 @ result is 0
+ mov r1, #0
+ bx lr
+2: mvn r0, #0 @ result is 0xFF....FF (MAX_UINT)
+ mvn r1, #0
+ bx lr
+ .type __i64_dtou, %function
+ .size __i64_dtou, . - __i64_dtou
+
+@@@ Conversion from double float to signed 64-bit integer
+
+ .global __i64_dtos
+__i64_dtos:
+ push {r4, lr}
+ mov r4, r1, asr #31 @ save sign in r4
+ @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+ @ note: 1023 + 52 = 1075 = 1024 + 51
+ @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+ mov r2, r1, lsl #1
+ mov r2, r2, lsr #21
+ sub r2, r2, #51
+ sub r2, r2, #1024
+ @ check range of exponent
+ cmn r2, #52 @ if EXP < -52, |double| is < 1.0
+ blt 1f
+ cmp r2, #11 @ if EXP >= 63 - 52, |double| is >= 2^63
+ bge 2f
+ @ extract true mantissa
+ bic r1, r1, #0xFF000000
+ bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+ orr r1, r1, #0x00100000 @ HI |= 0x00100000
+ @ shift it appropriately
+ cmp r2, #0
+ blt 3f
+ bl __i64_shl @ if EXP >= 0, shift left by EXP
+ b 4f
+3: rsb r2, r2, #0
+ bl __i64_shr @ otherwise, shift right by -EXP
+ @ apply sign to result
+4: eor r0, r0, r4
+ eor r1, r1, r4
+ subs r0, r0, r4
+ sbc r1, r1, r4
+ pop {r4, lr}
+ bx lr
+ @ special cases
+1: mov r0, #0 @ result is 0
+ mov r1, #0
+ pop {r4, lr}
+ bx lr
+2: cmp r4, #0
+ blt 6f
+ mvn r0, #0 @ result is 0x7F....FF (MAX_SINT)
+ mov r1, r0, lsr #1
+ pop {r4, lr}
+ bx lr
+6: mov r0, #0 @ result is 0x80....00 (MIN_SINT)
+ mov r1, #0x80000000
+ pop {r4, lr}
+ bx lr
+ .type __i64_dtos, %function
+ .size __i64_dtos, . - __i64_dtos
+
diff --git a/runtime/ia32/int64.s b/runtime/ia32/int64.s
new file mode 100644
index 0000000..8fd8151
--- /dev/null
+++ b/runtime/ia32/int64.s
@@ -0,0 +1,471 @@
+# *****************************************************************
+#
+# The Compcert verified compiler
+#
+# Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+# en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of the <organization> nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic. IA32 version.
+
+ .text
+
+# Opposite
+
+ .globl __i64_neg
+ .balign 16
+__i64_neg:
+ movl 4(%esp), %eax
+ movl 8(%esp), %edx
+ negl %eax
+ adcl $0, %edx
+ negl %edx
+ ret
+ .type __i64_neg, @function
+ .size __i64_neg, . - __i64_neg
+
+# Addition
+
+ .globl __i64_add
+ .balign 16
+__i64_add:
+ movl 4(%esp), %eax
+ movl 8(%esp), %edx
+ addl 12(%esp), %eax
+ adcl 16(%esp), %edx
+ ret
+ .type __i64_add, @function
+ .size __i64_add, . - __i64_add
+
+# Subtraction
+
+ .globl __i64_sub
+ .balign 16
+__i64_sub:
+ movl 4(%esp), %eax
+ movl 8(%esp), %edx
+ subl 12(%esp), %eax
+ sbbl 16(%esp), %edx
+ ret
+ .type __i64_sub, @function
+ .size __i64_sub, . - __i64_sub
+
+# Multiplication
+
+ .globl __i64_mul
+ .balign 16
+__i64_mul:
+ movl 4(%esp), %eax
+ mull 12(%esp) # edx:eax = xlo * ylo
+ movl 4(%esp), %ecx
+ imull 16(%esp), %ecx # ecx = xlo * yhi
+ addl %ecx, %edx
+ movl 12(%esp), %ecx # ecx = xhi * ylo
+ imull 8(%esp), %ecx
+ addl %ecx, %edx
+ ret
+ .type __i64_mul, @function
+ .size __i64_mul, . - __i64_mul
+
+# Division and remainder
+
+# Auxiliary function, not exported.
+# Input: 20(esp), 24(esp) is dividend N
+# 28(esp), 32(esp) is divisor D
+# Output: esi:edi is quotient Q
+# eax:edx is remainder R
+# ebp is preserved
+
+ .balign 16
+__i64_udivmod:
+ cmpl $0, 32(%esp) # single-word divisor? (DH = 0)
+ jne 1f
+ # Special case 64 bits divided by 32 bits
+ movl 28(%esp), %ecx # divide NH by DL
+ movl 24(%esp), %eax # (will trap if D = 0)
+ xorl %edx, %edx
+ divl %ecx # eax = quotient, edx = remainder
+ movl %eax, %edi # high word of quotient in edi
+ movl 20(%esp), %eax # divide rem : NL by DL
+ divl %ecx # eax = quotient, edx = remainder
+ movl %eax, %esi # low word of quotient in esi */
+ movl %edx, %eax # low word of remainder in eax
+ xorl %edx, %edx # high word of remainder is 0, in edx
+ ret
+ # The general case
+1: movl 28(%esp), %ecx # esi:ecx = D
+ movl 32(%esp), %esi
+ movl 20(%esp), %eax # edx:eax = N
+ movl 24(%esp), %edx
+ # Scale D and N down, giving D' and N', until D' fits in 32 bits
+2: shrl $1, %esi # shift D' right by one
+ rcrl $1, %ecx
+ shrl $1, %edx # shift N' right by one
+ rcrl $1, %eax
+ testl %esi, %esi # repeat until D'H = 0
+ jnz 2b
+ # Divide N' by D' to get an approximate quotient
+ divl %ecx # eax = quotient, edx = remainder
+ movl %eax, %esi # save tentative quotient Q in esi
+ # Check for off by one quotient
+ # Compute Q * D
+3: movl 32(%esp), %ecx
+ imull %esi, %ecx # ecx = Q * DH
+ movl 28(%esp), %eax
+ mull %esi # edx:eax = Q * DL
+ add %ecx, %edx # edx:eax = Q * D
+ jc 5f # overflow in addition means Q is too high
+ # Compare Q * D with N, computing the remainder in the process
+ movl %eax, %ecx
+ movl 20(%esp), %eax
+ subl %ecx, %eax
+ movl %edx, %ecx
+ movl 24(%esp), %edx
+ sbbl %ecx, %edx # edx:eax = N - Q * D
+ jnc 4f # no carry: N >= Q * D, we are fine
+ decl %esi # carry: N < Q * D, adjust Q down by 1
+ addl 28(%esp), %eax # and remainder up by D
+ adcl 32(%esp), %edx
+ # Finished
+4: xorl %edi, %edi # high half of quotient is 0
+ ret
+ # Special case when Q * D overflows
+5: decl %esi # adjust Q down by 1
+ jmp 3b # and redo check & computation of remainder
+
+# Unsigned division
+
+ .globl __i64_udiv
+ .balign 16
+__i64_udiv:
+ pushl %ebp
+ pushl %esi
+ pushl %edi
+ call __i64_udivmod
+ movl %esi, %eax
+ movl %edi, %edx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+ .type __i64_udiv, @function
+ .size __i64_udiv, . - __i64_udiv
+
+# Unsigned remainder
+
+ .globl __i64_umod
+ .balign 16
+__i64_umod:
+ pushl %ebp
+ pushl %esi
+ pushl %edi
+ call __i64_udivmod
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+ .type __i64_umod, @function
+ .size __i64_umod, . - __i64_umod
+
+# Signed division
+
+ .globl __i64_sdiv
+ .balign 16
+__i64_sdiv:
+ pushl %ebp
+ pushl %esi
+ pushl %edi
+ movl 20(%esp), %esi # esi = NH
+ movl %esi, %ebp # save sign of N in ebp
+ testl %esi, %esi
+ jge 1f # if N < 0,
+ negl 16(%esp) # N = -N
+ adcl $0, %esi
+ negl %esi
+ movl %esi, 20(%esp)
+1: movl 28(%esp), %esi # esi = DH
+ xorl %esi, %ebp # sign of result in ebp
+ testl %esi, %esi
+ jge 2f # if D < 0,
+ negl 24(%esp) # D = -D
+ adcl $0, %esi
+ negl %esi
+ movl %esi, 28(%esp)
+2: call __i64_udivmod
+ testl %ebp, %ebp # apply sign to result
+ jge 3f
+ negl %esi
+ adcl $0, %edi
+ negl %edi
+3: movl %esi, %eax
+ movl %edi, %edx
+ popl %edi
+ popl %esi
+ popl %ebp
+ ret
+ .type __i64_sdiv, @function
+ .size __i64_sdiv, . - __i64_sdiv
+
+# Signed remainder
+
+ .globl __i64_smod
+ .balign 16
+__i64_smod:
+ pushl %ebp
+ pushl %esi
+ pushl %edi
+ movl 20(%esp), %esi # esi = NH
+ movl %esi, %ebp # save sign of result in ebp
+ testl %esi, %esi
+ jge 1f # if N < 0,
+ negl 16(%esp) # N = -N
+ adcl $0, %esi
+ negl %esi
+ movl %esi, 20(%esp)
+1: movl 28(%esp), %esi # esi = DH
+ testl %esi, %esi
+ jge 2f # if D < 0,
+ negl 24(%esp) # D = -D
+ adcl $0, %esi
+ negl %esi
+ movl %esi, 28(%esp)
+2: call __i64_udivmod
+ testl %ebp, %ebp # apply sign to result
+ jge 3f
+ negl %eax
+ adcl $0, %edx
+ negl %edx
+3: popl %edi
+ popl %esi
+ popl %ebp
+ ret
+ .type __i64_sdiv, @function
+ .size __i64_sdiv, . - __i64_sdiv
+
+# Note on shifts:
+# IA32 shift instructions treat their amount (in %cl) modulo 32
+
+# Shift left
+
+ .globl __i64_shl
+ .balign 16
+__i64_shl:
+ movl 12(%esp), %ecx # ecx = shift amount, treated mod 64
+ testb $32, %cl
+ jne 1f
+ # shift amount < 32
+ movl 4(%esp), %eax
+ movl 8(%esp), %edx
+ shldl %cl, %eax, %edx # edx = high(XH:XL << amount)
+ shll %cl, %eax # eax = XL << amount
+ ret
+ # shift amount >= 32
+1: movl 4(%esp), %edx
+ shll %cl, %edx # edx = XL << (amount - 32)
+ xorl %eax, %eax # eax = 0
+ ret
+ .type __i64_shl, @function
+ .size __i64_shl, . - __i64_shl
+
+# Shift right unsigned
+
+ .globl __i64_shr
+ .balign 16
+__i64_shr:
+ movl 12(%esp), %ecx # ecx = shift amount, treated mod 64
+ testb $32, %cl
+ jne 1f
+ # shift amount < 32
+ movl 4(%esp), %eax
+ movl 8(%esp), %edx
+ shrdl %cl, %edx, %eax # eax = low(XH:XL >> amount)
+ shrl %cl, %edx # edx = XH >> amount
+ ret
+ # shift amount >= 32
+1: movl 8(%esp), %eax
+ shrl %cl, %eax # eax = XH >> (amount - 32)
+ xorl %edx, %edx # edx = 0
+ ret
+ .type __i64_shr, @function
+ .size __i64_shr, . - __i64_shr
+
+# Shift right signed
+
+ .globl __i64_sar
+ .balign 16
+__i64_sar:
+ movl 12(%esp), %ecx # ecx = shift amount, treated mod 64
+ testb $32, %cl
+ jne 1f
+ # shift amount < 32
+ movl 4(%esp), %eax
+ movl 8(%esp), %edx
+ shrdl %cl, %edx, %eax # eax = low(XH:XL >> amount)
+ sarl %cl, %edx # edx = XH >> amount (signed)
+ ret
+ # shift amount >= 32
+1: movl 8(%esp), %eax
+ movl %eax, %edx
+ sarl %cl, %eax # eax = XH >> (amount - 32)
+ sarl $31, %edx # edx = sign of X
+ ret
+ .type __i64_sar, @function
+ .size __i64_sar, . - __i64_sar
+
+# Unsigned comparison
+
+ .globl __i64_ucmp
+ .balign 16
+__i64_ucmp:
+ movl 8(%esp), %eax # compare high words
+ cmpl 16(%esp), %eax
+ jne 1f # if high words equal,
+ movl 4(%esp), %eax # compare low words
+ cmpl 12(%esp), %eax
+1: seta %al # AL = 1 if >, 0 if <=
+ setb %dl # DL = 1 if <, 0 if >=
+ subb %dl, %al # AL = 0 if same, 1 if >, -1 if <
+ movsbl %al, %eax
+ ret
+ .type __i64_ucmp, @function
+ .size __i64_ucmp, . - __i64_ucmp
+
+# Signed comparison
+
+ .globl __i64_scmp
+ .balign 16
+__i64_scmp:
+ movl 8(%esp), %eax # compare high words (signed)
+ cmpl 16(%esp), %eax
+ je 1f # if different,
+ setg %al # extract result
+ setl %dl
+ subb %dl, %al
+ movsbl %al, %eax
+ ret
+1: movl 4(%esp), %eax # if high words equal,
+ cmpl 12(%esp), %eax # compare low words (unsigned)
+ seta %al # and extract result
+ setb %dl
+ subb %dl, %al
+ movsbl %al, %eax
+ ret
+ .type __i64_scmp, @function
+ .size __i64_scmp, . - __i64_scmp
+
+# Conversion signed long -> float
+
+ .globl __i64_stod
+ .balign 16
+__i64_stod:
+ fildll 4(%esp)
+ ret
+ .type __i64_stod, @function
+ .size __i64_stod, . - __i64_stod
+
+# Conversion unsigned long -> float
+
+ .globl __i64_utod
+ .balign 16
+__i64_utod:
+ fildll 4(%esp) # convert as if signed
+ cmpl $0, 8(%esp) # is argument >= 2^63?
+ jns 1f
+ fadds LC1 # adjust by 2^64
+1: ret
+ .type __i64_stod, @function
+ .size __i64_stod, . - __i64_stod
+
+ .balign 4
+LC1: .long 0x5f800000 # 2^64 in single precision
+
+# Conversion float -> signed long
+
+ .globl __i64_dtos
+ .balign 16
+__i64_dtos:
+ subl $4, %esp
+ # Change rounding mode to "round towards zero"
+ fnstcw 0(%esp)
+ movw 0(%esp), %ax
+ movb $12, %ah
+ movw %ax, 2(%esp)
+ fldcw 2(%esp)
+ # Convert
+ fldl 8(%esp)
+ fistpll 8(%esp)
+ # Restore rounding mode
+ fldcw 0(%esp)
+ # Load result in edx:eax
+ movl 8(%esp), %eax
+ movl 12(%esp), %edx
+ addl $4, %esp
+ ret
+ .type __i64_dtos, @function
+ .size __i64_dtos, . - __i64_dtos
+
+# Conversion float -> unsigned long
+
+ .globl __i64_dtou
+ .balign 16
+__i64_dtou:
+ subl $4, %esp
+ # Change rounding mode to "round towards zero"
+ fnstcw 0(%esp)
+ movw 0(%esp), %ax
+ movb $12, %ah
+ movw %ax, 2(%esp)
+ fldcw 2(%esp)
+ # Compare argument with 2^63
+ fldl (4+4)(%esp)
+ flds LC2
+ fucomp
+ fnstsw %ax
+ sahf
+ jbe 1f # branch if not (ARG < 2^63)
+ # Argument < 2^63: convert as is
+ fistpll 8(%esp)
+ movl 8(%esp), %eax
+ movl 12(%esp), %edx
+ jmp 2f
+ # Argument > 2^63: offset ARG by -2^63, then convert, then offset RES by 2^63
+1: fsubs LC2
+ fistpll 8(%esp)
+ movl 8(%esp), %eax
+ movl 12(%esp), %edx
+ addl $0x80000000, %edx
+ # Restore rounding mode
+2: fldcw 0(%esp)
+ addl $4, %esp
+ ret
+ .type __i64_dtou, @function
+ .size __i64_dtou, . - __i64_dtou
+
+ .balign 4
+LC2: .long 0x5f000000 # 2^63 in single precision
diff --git a/runtime/powerpc/int64.s b/runtime/powerpc/int64.s
new file mode 100644
index 0000000..34b65b5
--- /dev/null
+++ b/runtime/powerpc/int64.s
@@ -0,0 +1,492 @@
+# *****************************************************************
+#
+# The Compcert verified compiler
+#
+# Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+# en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of the <organization> nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic. PowerPC version.
+
+# Calling conventions for R = F(X) or R = F(X,Y):
+# one or two long arguments: XH in r3, XL in r4, YH in r5, YL in r6
+# one long argument, one int: XH in r3, XL in r4, Y in r5
+# one float argument: X in f1
+# one long result: RH in r3, RL in r4
+# one float result: R in f1
+# This is a big-endian convention: the high word is in the low-numbered register
+# Can use r3...r12 and f0...r13 as temporary registers (caller-save)
+
+ .text
+
+### Opposite
+
+ .balign 16
+ .globl __i64_neg
+__i64_neg:
+ subfic r4, r4, 0 # RL = -XL and set borrow iff XL != 0
+ subfze r3, r3 # RH = -XH - borrow
+ blr
+ .type __i64_neg, @function
+ .size __i64_neg, .-__i64_neg
+
+### Addition
+
+ .balign 16
+ .globl __i64_add
+__i64_add:
+ addc r4, r4, r6 # RL = XL + YL and set carry if overflow
+ adde r3, r3, r5 # RH = XH + YH + carry
+ blr
+ .type __i64_add, @function
+ .size __i64_add, .-__i64_add
+
+### Subtraction
+
+ .balign 16
+ .globl __i64_sub
+__i64_sub:
+ subfc r4, r6, r4 # RL = XL - YL and set borrow if underflow
+ subfe r3, r5, r3 # RH = XH - YH - borrow
+ blr
+ .type __i64_sub, @function
+ .size __i64_sub, .-__i64_sub
+
+### Multiplication
+
+ .balign 16
+ .globl __i64_mul
+__i64_mul:
+ # Form intermediate products
+ mulhwu r7, r4, r6 # r7 = high half of XL * YL
+ mullw r8, r3, r6 # r8 = low half of XH * YL
+ mullw r9, r4, r5 # r9 = low half of XL * YH
+ mullw r4, r4, r6 # r4 = low half of XL * YL = low half of result
+ # Reconstruct high half of result
+ add r3, r7, r8
+ add r3, r3, r9
+ blr
+ .type __i64_mul, @function
+ .size __i64_mul, .-__i64_mul
+
+### Helper function for division and modulus. Not exported.
+# Input: numerator N in (r3,r4), divisor D in (r5,r6)
+# Output: quotient Q in (r7,r8), remainder R in (r3,r4)
+ .balign 16
+__i64_udivmod:
+ # Set up quotient and mask
+ li r8, 0 # Q = 0
+ li r7, 0
+ li r10, 1 # M = 1
+ li r9, 0
+ # Check for zero divisor
+ or. r0, r6, r5
+ beqlr # return with unspecified quotient & remainder
+ # Scale divisor and mask
+1: cmpwi r5, 0 # while top bit of D is zero...
+ blt 2f
+ subfc r0, r6, r4 # compute borrow out of N - D
+ subfe r0, r5, r3
+ subfe. r0, r0, r0 # EQ iff no borrow iff N >= D
+ bne 2f # ... and while N >= D ...
+ addc r6, r6, r6 # scale divisor: D = D << 1
+ adde r5, r5, r5
+ addc r10, r10, r10 # scale mask: M = M << 1
+ adde r9, r9, r9
+ b 1b # end while
+ # Long division
+2: subfc r4, r6, r4 # Q = Q | M, N = N - D, and compute borrow
+ or r8, r8, r10
+ subfe r3, r5, r3
+ or r7, r7, r9
+ subfe. r0, r0, r0 # test borrow
+ beq 3f # no borrow: N >= D, continue
+ addc r4, r4, r6 # borrow: undo what we just did to N and Q
+ andc r8, r8, r10
+ adde r3, r3, r5
+ andc r7, r7, r9
+3: slwi r0, r9, 31 # unscale mask: M = M >> 1
+ srwi r10, r10, 1
+ or r10, r10, r0
+ srwi r9, r9, 1
+ slwi r0, r5, 31 # unscale divisor: D = D >> 1
+ srwi r6, r6, 1
+ or r6, r6, r0
+ srwi r5, r5, 1
+ or. r0, r10, r9 # iterate while M != 0
+ bne 2b
+ blr
+
+### Unsigned division
+
+ .balign 16
+ .globl __i64_udiv
+__i64_udiv:
+ mflr r11 # save return address in r11
+ bl __i64_udivmod # unsigned divide
+ mtlr r11 # restore return address
+ mr r3, r7 # R = quotient
+ mr r4, r8
+ blr
+ .type __i64_udiv, @function
+ .size __i64_udiv, .-__i64_udiv
+
+### Unsigned remainder
+
+ .balign 16
+ .globl __i64_umod
+__i64_umod:
+ mflr r11
+ bl __i64_udivmod
+ mtlr r11
+ blr # remainder is already in R
+ .type __i64_umod, @function
+ .size __i64_umod, .-__i64_umod
+
+### Signed division
+
+ .balign 16
+ .globl __i64_sdiv
+__i64_sdiv:
+ mflr r11 # save return address
+ xor r12, r3, r5 # save sign of result in r12 (top bit)
+ srawi r0, r3, 31 # take absolute value of N
+ xor r4, r4, r0 # (i.e. N = N ^ r0 - r0,
+ xor r3, r3, r0 # where r0 = 0 if N >= 0 and r0 = -1 if N < 0)
+ subfc r4, r0, r4
+ subfe r3, r0, r3
+ srawi r0, r5, 31 # take absolute value of D
+ xor r6, r6, r0 # (same trick)
+ xor r5, r5, r0
+ subfc r6, r0, r6
+ subfe r5, r0, r5
+ bl __i64_udivmod # do unsigned division
+ mtlr r11 # restore return address
+ srawi r0, r12, 31 # apply expected sign to quotient
+ xor r8, r8, r0 # RES = Q if r12 >= 0, -Q if r12 < 0
+ xor r7, r7, r0
+ subfc r4, r0, r8
+ subfe r3, r0, r7
+ blr
+ .type __i64_sdiv, @function
+ .size __i64_sdiv, .-__i64_sdiv
+
+## Signed remainder
+
+ .balign 16
+ .globl __i64_smod
+__i64_smod:
+ mflr r11 # save return address
+ srawi r12, r3, 31 # save sign of result in r12 (sign of N)
+ xor r4, r4, r12 # and take absolute value of N
+ xor r3, r3, r12
+ subfc r4, r12, r4
+ subfe r3, r12, r3
+ srawi r0, r5, 31 # take absolute value of D
+ xor r6, r6, r0
+ xor r5, r5, r0
+ subfc r6, r0, r6
+ subfe r5, r0, r5
+ bl __i64_udivmod # do unsigned division
+ mtlr r11 # restore return address
+ xor r4, r4, r12 # apply expected sign to remainder
+ xor r3, r3, r12 # RES = R if r12 == 0, -R if r12 == -1
+ subfc r4, r12, r4
+ subfe r3, r12, r3
+ blr
+ .type __i64_smod, @function
+ .size __i64_smod, .-__i64_smod
+
+### Unsigned comparison
+
+ .balign 16
+ .globl __i64_ucmp
+__i64_ucmp:
+ cmplw cr0, r3, r5 # compare high words (unsigned)
+ cmplw cr1, r4, r6 # compare low words (unsigned)
+ mfcr r0
+# At this point, the bits of r0 are as follow:
+# bit 31: XH < YH
+# bit 30: XH > YH
+# bit 27: XL > YL
+# bit 26: XL < YL
+ rlwinm r3, r0, 0, 0, 1 # r3 = r0 & 0xC000_0000
+ srawi r3, r3, 24 # r4 = r4 >>s 28
+# r3 = -0x80 if XH < YH
+# = 0x40 if XH > YH
+# = 0 if XH = YH
+ rlwinm r4, r0, 4, 0, 1 # r4 = (r0 << 4) & 0xC000_0000
+ srawi r4, r4, 28 # r4 = r4 >>s 28
+# r4 = -8 if XL < YL
+# = 4 if XL > YL
+# = 0 if XL = YL
+ add r3, r3, r4
+# r3 = -0x80 or -0x80 - 8 or -0x80 + 4 or -8 if X < Y
+# (in all cases, r3 < 0 if X < Y)
+# = 0x40 or 0x40 - 8 or 0x40 + 4 or 4 if X > Y
+# (in all cases, r3 > 0 if X > Y)
+# = 0 if X = Y
+ blr
+ .type __i64_ucmp, @function
+ .size __i64_ucmp, .-__i64_ucmp
+
+### Signed comparison
+
+ .balign 16
+ .globl __i64_scmp
+__i64_scmp:
+ cmpw cr0, r3, r5 # compare high words (signed)
+ cmplw cr1, r4, r6 # compare low words (unsigned)
+ mfcr r0
+# Same trick as in __i64_ucmp
+ rlwinm r3, r0, 0, 0, 1
+ srawi r3, r3, 24
+ rlwinm r4, r0, 4, 0, 1
+ srawi r4, r4, 28
+ add r3, r3, r4
+ blr
+ .type __i64_scmp, @function
+ .size __i64_scmp, .-__i64_scmp
+
+### Shifts
+
+# On PowerPC, shift instructions with amount mod 64 >= 32 return 0
+
+ .balign 16
+ .globl __i64_shl
+__i64_shl:
+# hi = (hi << amount) | (lo >> (32 - amount)) | (lo << (amount - 32))
+# lo = lo << amount
+# if 0 <= amount < 32:
+# (amount - 32) mod 64 >= 32, hence lo << (amount - 32) == 0
+# if 32 <= amount < 64:
+# lo << amount == 0
+# (32 - amount) mod 64 >= 32, hence lo >> (32 - amount) == 0
+ andi. r5, r5, 63 # take amount modulo 64
+ subfic r6, r5, 32 # r6 = 32 - amount
+ addi r7, r5, -32 # r7 = amount - 32
+ slw r3, r3, r5
+ srw r0, r4, r6
+ or r3, r3, r0
+ slw r0, r4, r7
+ or r3, r3, r0
+ slw r4, r4, r5
+ blr
+ .type __i64_shl, @function
+ .size __i64_shl, .-__i64_shl
+
+ .balign 16
+ .globl __i64_shr
+__i64_shr:
+# lo = (lo >> amount) | (hi << (32 - amount)) | (hi >> (amount - 32))
+# hi = hi >> amount
+# if 0 <= amount < 32:
+# (amount - 32) mod 64 >= 32, hence hi >> (amount - 32) == 0
+# if 32 <= amount < 64:
+# hi >> amount == 0
+# (32 - amount) mod 64 >= 32, hence hi << (32 - amount) == 0
+ andi. r5, r5, 63 # take amount modulo 64
+ subfic r6, r5, 32 # r6 = 32 - amount
+ addi r7, r5, -32 # r7 = amount - 32
+ srw r4, r4, r5
+ slw r0, r3, r6
+ or r4, r4, r0
+ srw r0, r3, r7
+ or r4, r4, r0
+ srw r3, r3, r5
+ blr
+ .type __i64_shr, @function
+ .size __i64_shr, .-__i64_shr
+
+ .balign 16
+ .globl __i64_sar
+__i64_sar:
+ andi. r5, r5, 63 # take amount modulo 64
+ cmpwi r5, 32
+ bge 1f # amount < 32?
+ subfic r6, r5, 32 # r6 = 32 - amount
+ srw r4, r4, r5 # RH = XH >>s amount
+ slw r0, r3, r6 # RL = XL >>u amount | XH << (32 - amount)
+ or r4, r4, r0
+ sraw r3, r3, r5
+ blr
+1: addi r6, r5, -32 # amount >= 32
+ sraw r4, r3, r6 # RL = XH >>s (amount - 32)
+ srawi r3, r3, 31 # RL = sign extension of XH
+ blr
+ .type __i64_sar, @function
+ .size __i64_sar, .-__i64_sar
+
+### Conversion from unsigned long to double float
+
+ .balign 16
+ .globl __i64_utod
+__i64_utod:
+ addi r1, r1, -16
+ lis r5, 0x4330
+ li r6, 0
+ stw r5, 0(r1)
+ stw r4, 4(r1)
+ stw r5, 8(r1)
+ stw r6, 12(r1)
+ lfd f1, 0(r1)
+ lfd f2, 8(r1)
+ fsub f1, f1, f2 # f1 is XL as a double
+ lis r5, 0x4530
+ stw r5, 0(r1)
+ stw r3, 4(r1)
+ stw r5, 8(r1)
+ lfd f2, 0(r1)
+ lfd f3, 8(r1)
+ fsub f2, f2, f3 # f2 is XH * 2^32 as a double
+ fadd f1, f1, f2 # add both to get result
+ addi r1, r1, 16
+ blr
+ .type __i64_utod, @function
+ .size __i64_utod, .-__i64_utod
+
+### Conversion from signed long to double float
+
+ .balign 16
+ .globl __i64_stod
+__i64_stod:
+ addi r1, r1, -16
+ lis r5, 0x4330
+ li r6, 0
+ stw r5, 0(r1)
+ stw r4, 4(r1)
+ stw r5, 8(r1)
+ stw r6, 12(r1)
+ lfd f1, 0(r1)
+ lfd f2, 8(r1)
+ fsub f1, f1, f2 # f1 is XL (unsigned) as a double
+ lis r5, 0x4530
+ lis r6, 0x8000
+ stw r5, 0(r1)
+ add r3, r3, r6
+ stw r3, 4(r1)
+ stw r5, 8(r1)
+ stw r6, 12(r1)
+ lfd f2, 0(r1)
+ lfd f3, 8(r1)
+ fsub f2, f2, f3 # f2 is XH (signed) * 2^32 as a double
+ fadd f1, f1, f2 # add both to get result
+ addi r1, r1, 16
+ blr
+ .type __i64_stod, @function
+ .size __i64_stod, .-__i64_stod
+
+### Conversion from double float to unsigned long
+
+ .balign 16
+ .globl __i64_dtou
+__i64_dtou:
+ stfdu f1, -16(r1) # extract LO (r4) and HI (r3) halves of double
+ lwz r3, 0(r1)
+ lwz r4, 4(r1)
+ addi r1, r1, 16
+ cmpwi r3, 0 # is double < 0?
+ blt 1f # then it converts to 0
+ # extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+ rlwinm r5, r3, 12, 21, 31
+ addi r5, r5, -1075
+ # check range of exponent
+ cmpwi r5, -52 # if EXP < -52, double is < 1.0
+ blt 1f
+ cmpwi r5, 12 # if EXP >= 64 - 52, double is >= 2^64
+ bge 2f
+ # extract true mantissa
+ rlwinm r3, r3, 0, 12, 31 # HI &= ~0xFFF00000
+ oris r3, r3, 0x10 # HI |= 0x00100000
+ # shift it appropriately
+ cmpwi r5, 0
+ blt 3f
+ b __i64_shl # if EXP >= 0, shift left by EXP
+3: subfic r5, r5, 0
+ b __i64_shr # if EXP < 0, shift right by -EXP
+ # Special cases
+1: li r3, 0 # result is 0
+ li r4, 0
+ blr
+2: li r3, -1 # result is MAX_UINT
+ li r4, -1
+ blr
+ .type __i64_dtou, @function
+ .size __i64_dtou, .-__i64_dtou
+
+### Conversion from double float to signed long
+
+ .balign 16
+ .globl __i64_dtos
+__i64_dtos:
+ stfdu f1, -16(r1) # extract LO (r4) and HI (r3) halves of double
+ lwz r3, 0(r1)
+ lwz r4, 4(r1)
+ addi r1, r1, 16
+ srawi r10, r3, 31 # save sign of double in r10
+ # extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+ rlwinm r5, r3, 12, 21, 31
+ addi r5, r5, -1075
+ # check range of exponent
+ cmpwi r5, -52 # if EXP < -52, abs(double) is < 1.0
+ blt 1f
+ cmpwi r5, 11 # if EXP >= 63 - 52, abs(double) is >= 2^63
+ bge 2f
+ # extract true mantissa
+ rlwinm r3, r3, 0, 12, 31 # HI &= ~0xFFF00000
+ oris r3, r3, 0x10 # HI |= 0x00100000
+ # shift it appropriately
+ mflr r9 # save retaddr in r9
+ cmpwi r5, 0
+ blt 3f
+ bl __i64_shl # if EXP >= 0, shift left by EXP
+ b 4f
+3: subfic r5, r5, 0
+ bl __i64_shr # if EXP < 0, shift right by -EXP
+ # apply sign to result
+4: mtlr r9
+ xor r4, r4, r10
+ xor r3, r3, r10
+ subfc r4, r10, r4
+ subfe r3, r10, r3
+ blr
+ # Special cases
+1: li r3, 0 # result is 0
+ li r4, 0
+ blr
+2: cmpwi r10, 0 # result is MAX_SINT or MIN_SINT
+ bge 5f # depending on sign
+ li r4, -1 # result is MAX_SINT = 0x7FFF_FFFF
+ srwi r3, r4, 1
+ blr
+5: lis r3, 0x8000 # result is MIN_SINT = 0x8000_0000
+ li r4, 0
+ blr
+ .type __i64_dtos, @function
+ .size __i64_dtos, .-__i64_dtos
diff --git a/runtime/test/test_int64.c b/runtime/test/test_int64.c
new file mode 100644
index 0000000..11adce3
--- /dev/null
+++ b/runtime/test/test_int64.c
@@ -0,0 +1,238 @@
+/* ************************************************************************** */
+/* */
+/* The Compcert verified compiler */
+/* */
+/* Xavier Leroy, INRIA Paris-Rocquencourt */
+/* */
+/* Copyright (c) 2013 Institut National de Recherche en Informatique et */
+/* en Automatique. */
+/* */
+/* Redistribution and use in source and binary forms, with or without */
+/* modification, are permitted provided that the following conditions are met:*/
+/* * Redistributions of source code must retain the above copyright */
+/* notice, this list of conditions and the following disclaimer. */
+/* * Redistributions in binary form must reproduce the above copyright */
+/* notice, this list of conditions and the following disclaimer in the */
+/* documentation and/or other materials provided with the distribution. */
+/* * Neither the name of the <organization> nor the */
+/* names of its contributors may be used to endorse or promote products */
+/* derived from this software without specific prior written permission.*/
+/* */
+/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
+/* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
+/* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */
+/* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT */
+/* HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
+/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, */
+/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR */
+/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
+/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING */
+/* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS */
+/* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+/* */
+/* ************************************************************************** */
+
+/* Differential testing of 64-bit integer operations */
+/* This file is to be compiled by a C compiler other than CompCert C */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef unsigned long long u64;
+typedef signed long long s64;
+
+extern s64 __i64_neg(s64 x);
+extern s64 __i64_add(s64 x, s64 y);
+extern s64 __i64_sub(s64 x, s64 y);
+extern s64 __i64_mul(s64 x, s64 y);
+extern u64 __i64_udiv(u64 x, u64 y);
+extern u64 __i64_umod(u64 x, u64 y);
+extern s64 __i64_sdiv(s64 x, s64 y);
+extern s64 __i64_smod(s64 x, s64 y);
+
+extern u64 __i64_shl(u64 x, unsigned amount);
+extern u64 __i64_shr(u64 x, unsigned amount);
+extern s64 __i64_sar(s64 x, unsigned amount);
+
+extern int __i64_ucmp(u64 x, u64 y);
+extern int __i64_scmp(s64 x, s64 y);
+
+extern double __i64_utod(u64 x);
+extern double __i64_stod(s64 x);
+extern u64 __i64_dtou(double d);
+extern s64 __i64_dtos(double d);
+
+static u64 rnd64(void)
+{
+ static u64 seed = 0;
+ seed = seed * 6364136223846793005ULL + 1442695040888963407ULL;
+ return seed;
+}
+
+static int error = 0;
+
+static void test1(u64 x, u64 y)
+{
+ u64 z, uy;
+ s64 t, sy;
+ int i;
+ double f, g;
+
+ z = __i64_neg(x);
+ if (z != -x)
+ error++, printf("- %016llx = %016llx, expected %016llx\n", x, z, -x);
+ z = __i64_add(x, y);
+ if (z != x + y)
+ error++, printf("%016llx + %016llx = %016llx, expected %016llx\n", x, y, z, x + y);
+
+ z = __i64_sub(x, y);
+ if (z != x - y)
+ error++, printf("%016llx - %016llx = %016llx, expected %016llx\n", x, y, z, x - y);
+
+ z = __i64_mul(x, y);
+ if (z != x * y)
+ error++, printf("%016llx * %016llx = %016llx, expected %016llx\n", x, y, z, x * y);
+
+ if (y != 0) {
+
+ z = __i64_udiv(x, y);
+ if (z != x / y)
+ error++, printf("%llu /u %llu = %llu, expected %llu\n", x, y, z, x / y);
+
+ z = __i64_umod(x, y);
+ if (z != x % y)
+ error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, y, z, x % y);
+
+ }
+
+ if (y != 0 && !(x == 0x800000000000LLU && y == -1)) {
+
+ t = __i64_sdiv(x, y);
+ if (t != (s64) x / (s64) y)
+ error++, printf("%lld /s %lld = %lld, expected %lld\n", x, y, t, (s64) x / (s64) y);
+
+ t = __i64_smod(x, y);
+ if (t != (s64) x % (s64) y)
+ error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, y, t, (s64) x % (s64) y);
+
+ }
+
+ /* Test division with small (32-bit) divisors */
+ uy = y >> 32;
+ sy = (s64)y >> 32;
+
+ if (uy != 0) {
+
+ z = __i64_udiv(x, uy);
+ if (z != x / uy)
+ error++, printf("%llu /u %llu = %llu, expected %llu\n", x, uy, z, x / uy);
+
+ z = __i64_umod(x, uy);
+ if (z != x % uy)
+ error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, uy, z, x % uy);
+
+ }
+
+ if (sy != 0 && !(x == 0x800000000000LLU && sy == -1)) {
+
+ t = __i64_sdiv(x, sy);
+ if (t != (s64) x / sy)
+ error++, printf("%lld /s %lld = %lld, expected %lld\n", x, sy, t, (s64) x / sy);
+
+ t = __i64_smod(x, sy);
+ if (t != (s64) x % sy)
+ error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, sy, t, (s64) x % sy);
+
+ }
+
+ i = y & 63;
+
+ z = __i64_shl(x, i);
+ if (z != x << i)
+ error++, printf("%016llx << %d = %016llx, expected %016llx\n", x, i, z, x << i);
+
+ z = __i64_shr(x, i);
+ if (z != x >> i)
+ error++, printf("%016llx >>u %d = %016llx, expected %016llx\n", x, i, z, x >> i);
+
+ t = __i64_sar(x, i);
+ if (t != (s64) x >> i)
+ error++, printf("%016llx >>s %d = %016llx, expected %016llx\n", x, i, t, (s64) x >> i);
+
+ i = __i64_ucmp(x, y);
+ if (x == y) {
+ if (! (i == 0))
+ error++, printf("ucmp(%016llx, %016llx) = %d, expected 0\n", x, y, i);
+ }
+ else if (x < y) {
+ if (! (i < 0))
+ error++, printf("ucmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i);
+ } else {
+ if (! (i > 0))
+ error++, printf("ucmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i);
+ }
+
+ i = __i64_scmp(x, y);
+ if (x == y) {
+ if (! (i == 0))
+ error++, printf("scmp(%016llx, %016llx) = %d, expected 0\n", x, y, i);
+ }
+ else if ((s64)x < (s64)y) {
+ if (! (i < 0))
+ error++, printf("scmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i);
+ } else {
+ if (! (i > 0))
+ error++, printf("scmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i);
+ }
+
+ f = __i64_utod(x);
+ g = (double) x;
+ if (f != g)
+ error++, printf("(double) %llu (u) = %a, expected %a\n", x, f, g);
+
+ f = __i64_stod(x);
+ g = (double) (s64) x;
+ if (f != g)
+ error++, printf("(double) %lld (s) = %a, expected %a\n", x, f, g);
+
+ f = ((double) x) * 0.0001;
+ z = __i64_dtou(f);
+ if (z != (u64) f)
+ error++, printf("(u64) %a = %llu, expected %llu\n", f, z, (u64) f);
+
+ f = ((double) (s64) x) * 0.0001;
+ t = __i64_dtos(f);
+ if (t != (s64) f)
+ error++, printf("(s64) %a = %lld, expected %lld\n", f, z, (s64) f);
+}
+
+#define NSPECIFIC 8
+
+unsigned long long specific[NSPECIFIC] = {
+ 0, 1, -1, 0x7FFFFFFFULL, 0x80000000ULL, 0xFFFFFFFFULL,
+ 0x7FFFFFFFFFFFULL, 0x8000000000000000ULL
+};
+
+int main()
+{
+ int i, j;
+
+ /* Some specific values */
+ for (i = 0; i < NSPECIFIC; i++)
+ for (j = 0; j < NSPECIFIC; j++)
+ test1(specific[i], specific[j]);
+
+ /* Random testing */
+ for (i = 0; i < 50; i++) {
+ for (j = 0; j < 1000000; j++)
+ test1(rnd64(), rnd64());
+ printf("."); fflush(stdout);
+ }
+ printf("\n");
+ if (error == 0)
+ printf ("Test passed\n");
+ else
+ printf ("TEST FAILED, %d error(s) detected\n", error);
+ return 0;
+}
+