From 255cee09b71255051c2b40eae0c88bffce1f6f32 Mon Sep 17 00:00:00 2001
From: xleroy <xleroy@fca1b0fc-160b-0410-b1d3-a4f43f01ea2e>
Date: Sat, 20 Apr 2013 07:54:52 +0000
Subject: Big merge of the newregalloc-int64 branch.  Lots of changes in two
 directions: 1- new register allocator (+ live range splitting,
 spilling&reloading, etc)    based on a posteriori validation using the
 Rideau-Leroy algorithm 2- support for 64-bit integer arithmetic (type "long
 long").

git-svn-id: https://yquem.inria.fr/compcert/svn/compcert/trunk@2200 fca1b0fc-160b-0410-b1d3-a4f43f01ea2e
---
 runtime/Makefile          |  28 ++-
 runtime/arm/int64.s       | 424 +++++++++++++++++++++++++++++++++++++++
 runtime/ia32/int64.s      | 471 ++++++++++++++++++++++++++++++++++++++++++++
 runtime/powerpc/int64.s   | 492 ++++++++++++++++++++++++++++++++++++++++++++++
 runtime/test/test_int64.c | 238 ++++++++++++++++++++++
 5 files changed, 1643 insertions(+), 10 deletions(-)
 create mode 100644 runtime/arm/int64.s
 create mode 100644 runtime/ia32/int64.s
 create mode 100644 runtime/powerpc/int64.s
 create mode 100644 runtime/test/test_int64.c

(limited to 'runtime')
diff --git a/runtime/Makefile b/runtime/Makefile
index c128ba2..5550e6b 100644
--- a/runtime/Makefile
+++ b/runtime/Makefile
@@ -2,26 +2,34 @@ include ../Makefile.config
 
 CFLAGS=-O1 -g -Wall
 INCLUDES=
-OBJS=
+OBJS=int64.o
 LIB=libcompcert.a
 
-ifeq ($(strip $(HAS_RUNTIME_LIB)),true)
 all: $(LIB) $(INCLUDES)
-else
-all:
-endif
 
 $(LIB): $(OBJS)
 	rm -f $(LIB)
 	ar rcs $(LIB) $(OBJS)
 
-clean:
+%.o: $(ARCH)/%.s
+	$(CASM) $^
+
+clean::
 	rm -f *.o $(LIB)
 
-ifeq ($(strip $(HAS_RUNTIME_LIB)),true)
 install:
 	install -d $(LIBDIR)
 	install -c $(LIB) $(INCLUDES) $(LIBDIR)
-else
-install:
-endif
+
+test/test_int64: test/test_int64.c $(LIB)
+	$(CC) -o $@ test/test_int64.c $(LIB)
+
+clean::
+	rm -f test/test_int64
+
+.PHONY: test
+
+test: FORCE test/test_int64
+	test/test_int64
+
+FORCE:
diff --git a/runtime/arm/int64.s b/runtime/arm/int64.s
new file mode 100644
index 0000000..6b03351
--- /dev/null
+++ b/runtime/arm/int64.s
@@ -0,0 +1,424 @@
+@ *****************************************************************
+@
+@               The Compcert verified compiler
+@
+@           Xavier Leroy, INRIA Paris-Rocquencourt
+@
+@ Copyright (c) 2013 Institut National de Recherche en Informatique et
+@  en Automatique.
+@
+@ Redistribution and use in source and binary forms, with or without
+@ modification, are permitted provided that the following conditions are met:
+@     * Redistributions of source code must retain the above copyright
+@       notice, this list of conditions and the following disclaimer.
+@     * Redistributions in binary form must reproduce the above copyright
+@       notice, this list of conditions and the following disclaimer in the
+@       documentation and/or other materials provided with the distribution.
+@     * Neither the name of the <organization> nor the
+@       names of its contributors may be used to endorse or promote products
+@       derived from this software without specific prior written permission.
+@ 
+@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+@ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+@ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+@ HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+@ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+@
+@ *********************************************************************
+
+@ Helper functions for 64-bit integer arithmetic.  ARM version.
+
+@ Calling conventions for R = F(X) or R = F(X,Y):
+@   one or two long arguments:   XL in r0, XH in r1, YL in r2, YH in r3
+@   one long argument, one int:  XL in r0, XH in r1, Y in r2
+@   one float argument:          X in r0, r1
+@   one long result:             RL in r0, RH in r1
+@   one float result:            R in r0, r1
+@ This is a little-endian convention: the low word is in the
+@ low-numbered register.
+@ Can use r0...r3 and f0...f7 as temporary registers (caller-save)	
+	
+	.text
+        
+@@@ Unsigned comparison	
+	
+	.global __i64_ucmp
+__i64_ucmp:
+        cmp r1, r3      @ compare high words
+        cmpeq r0, r2    @ if equal, compare low words instead
+        moveq r0, #0    @ res = 0 if eq
+        movhi r0, #1    @ res = 1 if unsigned higher
+        mvnlo r0, #0    @ res = -1 if unsigned lower
+        bx lr
+	.type __i64_ucmp, %function
+	.size __i64_ucmp, . - __i64_ucmp
+	
+@@@ Signed comparison	
+	
+	.global __i64_scmp
+__i64_scmp:
+        cmp r0, r2              @ compare low words (unsigned)
+        moveq r0, #0            @ res = 0 if eq
+        movhi r0, #1            @ res = 1 if unsigned higher
+        mvnlo r0, #0            @ res = -1 if unsigned lower
+        cmp r1, r3              @ compare high words (signed)
+	addgt r0, r0, #2        @ res += 2 if signed greater
+	sublt r0, r0, #2        @ res -= 2 if signed less
+   @ here, r0 = 0 if X == Y
+   @       r0 = -3, -2, -1 if X < Y
+   @       r0 = 1, 2, 3 if X > Y
+        bx      lr
+	.type __i64_scmp, %function
+	.size __i64_scmp, . - __i64_scmp
+	
+@@@ Opposite
+	
+        .global __i64_neg
+__i64_neg:
+        rsbs r0, r0, #0
+        rsc r1, r1, #0
+        bx lr
+	.type __i64_neg, %function
+	.size __i64_neg, . - __i64_neg
+
+@@@ Addition	
+	
+	.global __i64_add
+__i64_add:
+        adds r0, r0, r2
+        adc r1, r1, r3
+        bx lr
+	.type __i64_add, %function
+	.size __i64_add, . - __i64_add
+	
+@@@ Subtraction
+	
+	.global __i64_sub
+__i64_sub:
+        subs r0, r0, r2
+        sbc r1, r1, r3
+        bx lr
+	.type __i64_sub, %function
+	.size __i64_sub, . - __i64_sub
+	
+@ Note on ARM shifts: the shift amount is taken modulo 256.
+@ Therefore, unsigned shifts by 32 bits or more produce 0.        
+	
+@@@ Shift left	
+
+	.global __i64_shl
+__i64_shl:
+        and r2, r2, #63         @ normalize amount to 0...63
+        rsbs r3, r2, #32        @ r3 = 32 - amount
+        ble 1f                  @ branch if <= 0, namely if amount >= 32
+        mov r1, r1, lsl r2
+        orr r1, r0, lsr r3
+        mov r0, r0, lsl r2
+        bx lr
+1:
+        sub r2, r2, #32
+        mov r1, r0, lsl r2
+        mov r0, #0
+        bx lr
+	.type __i64_shl, %function
+	.size __i64_shl, . - __i64_shl
+
+@@@ Shift right unsigned	
+
+	.global __i64_shr
+__i64_shr:
+        and r2, r2, #63         @ normalize amount to 0...63
+        rsbs r3, r2, #32        @ r3 = 32 - amount
+        ble 1f                  @ branch if <= 0, namely if amount >= 32
+        mov r0, r0, lsr r2
+        orr r0, r1, lsl r3
+        mov r1, r1, lsr r2
+        bx lr
+1:
+        sub r2, r2, #32
+        mov r0, r1, lsr r2
+        mov r1, #0
+        bx lr
+	.type __i64_shr, %function
+	.size __i64_shr, . - __i64_shr
+	
+@@@ Shift right signed
+
+	.global __i64_sar
+__i64_sar:
+        and r2, r2, #63         @ normalize amount to 0...63
+        rsbs r3, r2, #32        @ r3 = 32 - amount
+        ble 1f                  @ branch if <= 0, namely if amount >= 32
+        mov r0, r0, lsr r2
+        orr r0, r1, lsl r3
+        mov r1, r1, asr r2
+        bx lr
+1:
+        sub r2, r2, #32
+        mov r0, r1, asr r2
+        mov r1, r1, asr #31
+        bx lr
+	.type __i64_sar, %function
+	.size __i64_sar, . - __i64_sar
+	
+@@@ Multiplication
+	
+	.global __i64_mul
+__i64_mul:
+        push {r4, r5}
+        mov r4, r0              @ save first arg in r4,r5
+        mov r5, r1
+        umull r0, r1, r2, r4    @ 64-bit product of low halves
+        mla r1, r2, r5, r1      @ add 32-bit products low half * high half
+        mla r1, r3, r4, r1      @ to high half of result
+        pop {r4, r5}
+        bx lr
+	.type __i64_mul, %function
+	.size __i64_mul, . - __i64_mul
+	
+@@@ Auxiliary function for division and modulus. Not exported.
+
+@ On entry:  N = (r0, r1) numerator    D = (r2, r3) divisor	
+@ On exit:   Q = (r4, r5) quotient     R = (r0, r1) remainder	
+@ Locals:    M = (r6, r7) mask         TMP = r8 temporary	
+	
+__i64_udivmod:
+        orrs r8, r2, r3         @ is D == 0?
+        bxeq lr                 @ if so, return with unspecified results
+        mov r4, #0              @ Q = 0
+        mov r5, #0
+        mov r6, #1              @ M = 1
+        mov r7, #0
+1:      cmp r3, #0              @ while ((signed) D >= 0) ...
+        blt 2f
+        subs r8, r0, r2         @ ... and N >= D ...
+        sbcs r8, r1, r3
+        blo 2f
+        adds r2, r2, r2         @ D = D << 1
+        adc r3, r3, r3
+        adds r6, r6, r6         @ M = M << 1
+        adc r7, r7, r7
+        b 1b
+2:      subs r0, r0, r2         @ N = N - D
+        sbcs r1, r1, r3
+        orr r4, r4, r6          @ Q = Q | M
+        orr r5, r5, r7
+        bhs 3f                  @ if N was >= D, continue
+        adds r0, r0, r2         @ otherwise, undo what we just did
+        adc r1, r1, r3          @ N = N + D
+        bic r4, r4, r6          @ Q = Q & ~M
+        bic r5, r5, r7
+3:      movs r7, r7, lsr #1     @ M = M >> 1
+        mov r6, r6, rrx
+        movs r3, r3, lsr #1     @ D = D >> 1
+        mov r2, r2, rrx
+        orrs r8, r6, r7         @ repeat while (M != 0) ...
+        bne 2b
+        bx lr
+	
+@@@ Unsigned division	
+	
+	.global __i64_udiv
+__i64_udiv:
+        push {r4, r5, r6, r7, r8, lr}
+        bl __i64_udivmod
+        mov r0, r4
+        mov r1, r5
+        pop {r4, r5, r6, r7, r8, lr}
+        bx lr
+	.type __i64_udiv, %function
+	.size __i64_udiv, . - __i64_udiv
+	
+@@@ Unsigned modulus	
+	
+	.global __i64_umod
+__i64_umod:
+        push {r4, r5, r6, r7, r8, lr}
+        bl __i64_udivmod         @ remainder is already in r0,r1
+        pop {r4, r5, r6, r7, r8, lr}
+        bx lr
+	.type __i64_umod, %function
+	.size __i64_umod, . - __i64_umod
+	
+	.global __i64_sdiv
+__i64_sdiv:
+        push {r4, r5, r6, r7, r8, r10, lr}
+        eor r10, r1, r3          @ r10 = sign of result
+        mov r4, r1, asr #31      @ take absolute value of N
+        eor r0, r0, r4           @ N = (N ^ (N >>s 31)) - (N >>s 31)
+        eor r1, r1, r4
+        subs r0, r0, r4
+        sbc r1, r1, r4
+        mov r4, r3, asr #31      @ take absolute value of D
+        eor r2, r2, r4
+        eor r3, r3, r4
+        subs r2, r2, r4
+        sbc r3, r3, r4
+        bl __i64_udivmod         @ do unsigned division
+        mov r0, r4
+        mov r1, r5
+        eor r0, r0, r10, asr#31  @ apply expected sign
+        eor r1, r1, r10, asr#31
+        subs r0, r0, r10, asr#31
+        sbc r1, r1, r10, asr#31
+        pop {r4, r5, r6, r7, r8, r10, lr}
+        bx lr
+	.type __i64_sdiv, %function
+	.size __i64_sdiv, . - __i64_sdiv
+	
+@@@ Signed modulus	
+
+	.global __i64_smod
+__i64_smod:
+        push {r4, r5, r6, r7, r8, r10, lr}
+	mov r10, r1              @ r10 = sign of result
+        mov r4, r1, asr#31       @ take absolute value of N
+        eor r0, r0, r4           @ N = (N ^ (N >>s 31)) - (N >>s 31)
+        eor r1, r1, r4
+        subs r0, r0, r4
+        sbc r1, r1, r4
+        mov r4, r3, asr #31      @ take absolute value of D
+        eor r2, r2, r4
+        eor r3, r3, r4
+        subs r2, r2, r4
+        sbc r3, r3, r4
+        bl __i64_udivmod         @ do unsigned division
+        eor r0, r0, r10, asr#31  @ apply expected sign
+        eor r1, r1, r10, asr#31
+        subs r0, r0, r10, asr#31
+        sbc r1, r1, r10, asr#31
+        pop {r4, r5, r6, r7, r8, r10, lr}
+        bx lr
+	.type __i64_smod, %function
+	.size __i64_smod, . - __i64_smod
+	
+@@@ Conversion from unsigned 64-bit integer to double float
+	
+	.global __i64_utod
+__i64_utod:
+        fmsr s0, r0
+        fuitod d0, s0           @ convert low half to double (unsigned)
+        fmsr s2, r1
+        fuitod d1, s2           @ convert high half to double (unsigned)
+        fldd d2, .LC1           @ d2 = 2^32
+        fmacd d0, d1, d2        @ d0 = d0 + d1 * d2 = double value of int64
+	fmrrd r0, r1, d0        @ return result in r0, r1
+        bx lr
+	.type __i64_utod, %function
+	.size __i64_utod, . - __i64_utod
+	
+        .balign 8
+.LC1:   .quad 0x41f0000000000000 @ 2^32 in double precision
+	
+@@@ Conversion from signed 64-bit integer to double float
+	
+	.global __i64_stod
+__i64_stod:
+        fmsr s0, r0
+        fuitod d0, s0           @ convert low half to double (unsigned)
+        fmsr s2, r1
+        fsitod d1, s2           @ convert high half to double (signed)
+        fldd d2, .LC1           @ d2 = 2^32
+        fmacd d0, d1, d2        @ d0 = d0 + d1 * d2 = double value of int64
+	fmrrd r0, r1, d0        @ return result in r0, r1
+        bx lr
+	.type __i64_stod, %function
+	.size __i64_stod, . - __i64_stod
+
+@@@ Conversion from double float to unsigned 64-bit integer	
+
+	.global __i64_dtou
+__i64_dtou:
+        cmp r1, #0              @ is double < 0 ?
+        blt 1f                  @ then it converts to 0
+  @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+  @ note: 1023 + 52 = 1075 = 1024 + 51
+  @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+        mov r2, r1, lsl #1
+        mov r2, r2, lsr #21
+        sub r2, r2, #51
+        sub r2, r2, #1024
+  @ check range of exponent
+        cmn r2, #52             @ if EXP < -52, double is < 1.0
+        blt 1f
+        cmp r2, #12             @ if EXP >= 64 - 52, double is >= 2^64
+        bge 2f
+  @ extract true mantissa
+        bic r1, r1, #0xFF000000
+        bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+        orr r1, r1, #0x00100000 @ HI |= 0x00100000
+  @ shift it appropriately
+        cmp r2, #0
+        bge __i64_shl           @ if EXP >= 0, shift left by EXP
+        rsb r2, r2, #0
+        b __i64_shr             @ otherwise, shift right by -EXP
+  @ special cases
+1:      mov r0, #0              @ result is 0
+        mov r1, #0
+        bx lr
+2:      mvn r0, #0              @ result is 0xFF....FF (MAX_UINT)
+        mvn r1, #0
+        bx lr
+	.type __i64_dtou, %function
+        .size __i64_dtou, . - __i64_dtou
+	
+@@@ Conversion from double float to signed 64-bit integer
+	
+	.global __i64_dtos
+__i64_dtos:
+        push {r4, lr}
+        mov r4, r1, asr #31     @ save sign in r4
+  @ extract unbiased exponent ((HI & 0x7FF00000) >> 20) - (1023 + 52) in r2
+  @ note: 1023 + 52 = 1075 = 1024 + 51
+  @ note: (HI & 0x7FF00000) >> 20 = (HI << 1) >> 21
+        mov r2, r1, lsl #1
+        mov r2, r2, lsr #21
+        sub r2, r2, #51
+        sub r2, r2, #1024
+  @ check range of exponent
+        cmn r2, #52             @ if EXP < -52, |double| is < 1.0
+        blt 1f
+        cmp r2, #11             @ if EXP >= 63 - 52, |double| is >= 2^63
+        bge 2f
+  @ extract true mantissa
+        bic r1, r1, #0xFF000000
+        bic r1, r1, #0x00F00000 @ HI &= ~0xFFF00000
+        orr r1, r1, #0x00100000 @ HI |= 0x00100000
+  @ shift it appropriately
+        cmp r2, #0
+        blt 3f
+        bl __i64_shl            @ if EXP >= 0, shift left by EXP
+        b 4f
+3:      rsb r2, r2, #0
+        bl __i64_shr            @ otherwise, shift right by -EXP
+  @ apply sign to result
+4:      eor r0, r0, r4
+        eor r1, r1, r4
+        subs r0, r0, r4
+        sbc r1, r1, r4
+        pop {r4, lr}
+        bx lr
+  @ special cases
+1:      mov r0, #0 @ result is 0
+        mov r1, #0
+        pop {r4, lr}
+        bx lr
+2:      cmp r4, #0
+        blt 6f
+        mvn r0, #0              @ result is 0x7F....FF (MAX_SINT)
+        mov r1, r0, lsr #1
+        pop {r4, lr}
+        bx lr
+6:      mov r0, #0              @ result is 0x80....00 (MIN_SINT)
+        mov r1, #0x80000000
+        pop {r4, lr}
+        bx lr
+	.type __i64_dtos, %function
+        .size __i64_dtos, . - __i64_dtos
+
diff --git a/runtime/ia32/int64.s b/runtime/ia32/int64.s
new file mode 100644
index 0000000..8fd8151
--- /dev/null
+++ b/runtime/ia32/int64.s
@@ -0,0 +1,471 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  IA32 version.
+	
+        .text
+
+# Opposite
+
+        .globl __i64_neg
+        .balign 16
+__i64_neg:
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        negl %eax
+        adcl $0, %edx
+        negl %edx
+        ret
+        .type __i64_neg, @function
+        .size __i64_neg, . - __i64_neg
+
+# Addition
+
+        .globl __i64_add
+        .balign 16
+__i64_add:
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        addl 12(%esp), %eax
+        adcl 16(%esp), %edx
+        ret
+        .type __i64_add, @function
+        .size __i64_add, . - __i64_add
+
+# Subtraction
+
+        .globl __i64_sub
+        .balign 16
+__i64_sub:
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        subl 12(%esp), %eax
+        sbbl 16(%esp), %edx
+        ret
+        .type __i64_sub, @function
+        .size __i64_sub, . - __i64_sub
+
+# Multiplication
+
+        .globl __i64_mul
+        .balign 16
+__i64_mul:
+        movl 4(%esp), %eax
+        mull 12(%esp)            # edx:eax = xlo * ylo 
+        movl 4(%esp), %ecx
+        imull 16(%esp), %ecx     # ecx = xlo * yhi
+        addl %ecx, %edx
+        movl 12(%esp), %ecx      # ecx = xhi * ylo
+        imull 8(%esp), %ecx
+        addl %ecx, %edx
+        ret
+        .type __i64_mul, @function
+        .size __i64_mul, . - __i64_mul
+
+# Division and remainder
+
+# Auxiliary function, not exported.
+# Input:   20(esp), 24(esp)  is dividend N
+#          28(esp), 32(esp)  is divisor  D
+# Output:  esi:edi is quotient Q
+#          eax:edx is remainder R
+# ebp is preserved
+
+        .balign 16
+__i64_udivmod:
+        cmpl $0, 32(%esp)        # single-word divisor? (DH = 0)
+        jne 1f
+  # Special case 64 bits divided by 32 bits
+        movl 28(%esp), %ecx      # divide NH by DL
+        movl 24(%esp), %eax      # (will trap if D = 0)
+        xorl %edx, %edx
+        divl %ecx                # eax = quotient, edx = remainder
+        movl %eax, %edi          # high word of quotient in edi
+        movl 20(%esp), %eax      # divide rem : NL by DL
+        divl %ecx                # eax = quotient, edx = remainder
+        movl %eax, %esi          # low word of quotient in esi */
+        movl %edx, %eax          # low word of remainder in eax
+        xorl %edx, %edx          # high word of remainder is 0, in edx
+        ret
+  # The general case
+1:      movl 28(%esp), %ecx      # esi:ecx = D
+        movl 32(%esp), %esi
+        movl 20(%esp), %eax      # edx:eax = N
+        movl 24(%esp), %edx
+  # Scale D and N down, giving D' and N', until D' fits in 32 bits
+2:      shrl $1, %esi            # shift D' right by one
+        rcrl $1, %ecx
+        shrl $1, %edx            # shift N' right by one
+        rcrl $1, %eax
+        testl %esi, %esi         # repeat until D'H = 0
+        jnz 2b
+  # Divide N' by D' to get an approximate quotient
+        divl %ecx                # eax = quotient, edx = remainder
+        movl %eax, %esi          # save tentative quotient Q in esi
+  # Check for off by one quotient
+  # Compute Q * D
+3:      movl 32(%esp), %ecx
+        imull %esi, %ecx         # ecx = Q * DH
+        movl 28(%esp), %eax
+        mull %esi                # edx:eax = Q * DL
+        add %ecx, %edx           # edx:eax = Q * D
+        jc 5f                    # overflow in addition means Q is too high
+  # Compare Q * D with N, computing the remainder in the process
+        movl %eax, %ecx
+        movl 20(%esp), %eax
+        subl %ecx, %eax
+        movl %edx, %ecx
+        movl 24(%esp), %edx
+        sbbl %ecx, %edx          # edx:eax = N - Q * D
+        jnc 4f                   # no carry: N >= Q * D, we are fine
+        decl %esi                # carry: N < Q * D, adjust Q down by 1
+        addl 28(%esp), %eax      # and remainder up by D
+        adcl 32(%esp), %edx
+  # Finished
+4:      xorl %edi, %edi          # high half of quotient is 0
+        ret
+  # Special case when Q * D overflows
+5:      decl %esi                # adjust Q down by 1
+        jmp 3b                   # and redo check & computation of remainder
+
+# Unsigned division
+
+        .globl __i64_udiv
+        .balign 16
+__i64_udiv:
+        pushl %ebp
+        pushl %esi
+        pushl %edi
+        call __i64_udivmod
+        movl %esi, %eax
+        movl %edi, %edx
+        popl %edi
+        popl %esi
+        popl %ebp
+        ret
+        .type __i64_udiv, @function
+        .size __i64_udiv, . - __i64_udiv
+
+# Unsigned remainder
+
+        .globl __i64_umod
+        .balign 16
+__i64_umod:
+        pushl %ebp
+        pushl %esi
+        pushl %edi
+        call __i64_udivmod
+        popl %edi
+        popl %esi
+        popl %ebp
+        ret
+        .type __i64_umod, @function
+        .size __i64_umod, . - __i64_umod
+
+# Signed division
+
+        .globl __i64_sdiv
+        .balign 16
+__i64_sdiv:
+        pushl %ebp
+        pushl %esi
+        pushl %edi
+        movl 20(%esp), %esi          # esi = NH
+        movl %esi, %ebp              # save sign of N in ebp
+        testl %esi, %esi
+        jge 1f                       # if N < 0,
+        negl 16(%esp)                # N = -N
+        adcl $0, %esi
+        negl %esi
+        movl %esi, 20(%esp)
+1:      movl 28(%esp), %esi          # esi = DH
+        xorl %esi, %ebp              # sign of result in ebp
+        testl %esi, %esi
+        jge 2f                       # if D < 0,
+        negl 24(%esp)                # D = -D
+        adcl $0, %esi
+        negl %esi
+        movl %esi, 28(%esp)
+2:      call __i64_udivmod
+        testl %ebp, %ebp             # apply sign to result
+        jge 3f
+        negl %esi
+        adcl $0, %edi
+        negl %edi
+3:      movl %esi, %eax
+        movl %edi, %edx
+        popl %edi
+        popl %esi
+        popl %ebp
+        ret
+        .type __i64_sdiv, @function
+        .size __i64_sdiv, . - __i64_sdiv
+
+# Signed remainder
+
+        .globl __i64_smod
+        .balign 16
+__i64_smod:
+        pushl %ebp
+        pushl %esi
+        pushl %edi
+        movl 20(%esp), %esi            # esi = NH
+        movl %esi, %ebp                # save sign of result in ebp
+        testl %esi, %esi
+        jge 1f                         # if N < 0,
+        negl 16(%esp)                  # N = -N
+        adcl $0, %esi
+        negl %esi
+        movl %esi, 20(%esp)
+1:      movl 28(%esp), %esi            # esi = DH
+        testl %esi, %esi
+        jge 2f                         # if D < 0,
+        negl 24(%esp)                  # D = -D
+        adcl $0, %esi
+        negl %esi
+        movl %esi, 28(%esp)
+2:      call __i64_udivmod
+        testl %ebp, %ebp               # apply sign to result
+        jge 3f
+        negl %eax
+        adcl $0, %edx
+        negl %edx
+3:      popl %edi
+        popl %esi
+        popl %ebp
+        ret
+        .type __i64_sdiv, @function
+        .size __i64_sdiv, . - __i64_sdiv
+
+# Note on shifts:
+# IA32 shift instructions treat their amount (in %cl) modulo 32
+
+# Shift left
+
+        .globl __i64_shl
+        .balign 16
+__i64_shl:
+        movl 12(%esp), %ecx             # ecx = shift amount, treated mod 64
+        testb $32, %cl
+        jne 1f
+  # shift amount < 32  
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        shldl %cl, %eax, %edx           # edx = high(XH:XL << amount)
+        shll %cl, %eax                  # eax = XL << amount
+        ret
+  # shift amount >= 32
+1:      movl 4(%esp), %edx
+        shll %cl, %edx                  # edx = XL << (amount - 32)
+        xorl %eax, %eax                 # eax = 0
+        ret
+        .type __i64_shl, @function
+        .size __i64_shl, . - __i64_shl
+
+# Shift right unsigned
+
+        .globl __i64_shr
+        .balign 16
+__i64_shr:
+        movl 12(%esp), %ecx             # ecx = shift amount, treated mod 64
+        testb $32, %cl
+        jne 1f
+  # shift amount < 32
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        shrdl %cl, %edx, %eax           # eax = low(XH:XL >> amount)
+        shrl %cl, %edx                  # edx = XH >> amount
+        ret
+  # shift amount >= 32
+1:      movl 8(%esp), %eax
+        shrl %cl, %eax                  # eax = XH >> (amount - 32)
+        xorl %edx, %edx                 # edx = 0
+        ret
+        .type __i64_shr, @function
+        .size __i64_shr, . - __i64_shr
+
+# Shift right signed
+
+        .globl __i64_sar
+        .balign 16
+__i64_sar:
+        movl 12(%esp), %ecx             # ecx = shift amount, treated mod 64
+        testb $32, %cl
+        jne 1f
+  # shift amount < 32
+        movl 4(%esp), %eax
+        movl 8(%esp), %edx
+        shrdl %cl, %edx, %eax           # eax = low(XH:XL >> amount)
+        sarl %cl, %edx                  # edx = XH >> amount (signed)
+        ret
+  # shift amount >= 32
+1:      movl 8(%esp), %eax
+        movl %eax, %edx
+        sarl %cl, %eax                  # eax = XH >> (amount - 32)
+        sarl $31, %edx                  # edx = sign of X
+        ret
+        .type __i64_sar, @function
+        .size __i64_sar, . - __i64_sar
+
+# Unsigned comparison
+
+        .globl __i64_ucmp
+        .balign 16
+__i64_ucmp:
+        movl 8(%esp), %eax              # compare high words
+        cmpl 16(%esp), %eax
+        jne 1f                          # if high words equal,
+        movl 4(%esp), %eax              # compare low words
+        cmpl 12(%esp), %eax
+1:      seta %al                        # AL = 1 if >, 0 if <=
+        setb %dl                        # DL = 1 if <, 0 if >=
+        subb %dl, %al                   # AL = 0 if same, 1 if >, -1 if <
+        movsbl %al, %eax
+        ret
+        .type __i64_ucmp, @function
+        .size __i64_ucmp, . - __i64_ucmp
+
+# Signed comparison
+
+        .globl __i64_scmp
+        .balign 16
+__i64_scmp:
+        movl 8(%esp), %eax              # compare high words (signed)
+        cmpl 16(%esp), %eax
+        je 1f                           # if different,
+        setg %al                        # extract result 
+        setl %dl
+        subb %dl, %al
+        movsbl %al, %eax
+        ret
+1:      movl 4(%esp), %eax              # if high words equal,
+        cmpl 12(%esp), %eax             # compare low words (unsigned)
+        seta %al                        # and extract result
+        setb %dl
+        subb %dl, %al
+        movsbl %al, %eax
+        ret
+        .type __i64_scmp, @function
+        .size __i64_scmp, . - __i64_scmp
+
+# Conversion signed long -> float
+
+        .globl __i64_stod
+        .balign 16
+__i64_stod:
+        fildll 4(%esp)
+        ret
+        .type __i64_stod, @function
+        .size __i64_stod, . - __i64_stod
+
+# Conversion unsigned long -> float
+
+        .globl __i64_utod
+        .balign 16
+__i64_utod:
+        fildll 4(%esp)                  # convert as if signed
+        cmpl $0, 8(%esp)                # is argument >= 2^63?
+        jns 1f
+        fadds LC1                       # adjust by 2^64
+1:      ret
+        .type __i64_stod, @function
+        .size __i64_stod, . - __i64_stod
+
+        .balign 4
+LC1:    .long 0x5f800000                # 2^64 in single precision
+
+# Conversion float -> signed long
+
+        .globl __i64_dtos
+        .balign 16
+__i64_dtos:
+        subl $4, %esp
+  # Change rounding mode to "round towards zero"
+        fnstcw 0(%esp)
+        movw 0(%esp), %ax
+        movb $12, %ah
+        movw %ax, 2(%esp)
+        fldcw 2(%esp)
+  # Convert
+        fldl 8(%esp)
+        fistpll 8(%esp)
+  # Restore rounding mode
+        fldcw 0(%esp)
+  # Load result in edx:eax
+        movl 8(%esp), %eax
+        movl 12(%esp), %edx
+        addl $4, %esp
+        ret
+        .type __i64_dtos, @function
+        .size __i64_dtos, . - __i64_dtos
+
+# Conversion float -> unsigned long
+
+        .globl __i64_dtou
+        .balign 16
+__i64_dtou:
+        subl $4, %esp
+  # Change rounding mode to "round towards zero"
+        fnstcw 0(%esp)
+        movw 0(%esp), %ax
+        movb $12, %ah
+        movw %ax, 2(%esp)
+        fldcw 2(%esp)
+  # Compare argument with 2^63
+        fldl (4+4)(%esp)
+        flds LC2
+        fucomp
+        fnstsw %ax
+        sahf
+        jbe 1f                  # branch if not (ARG < 2^63)
+  # Argument < 2^63: convert as is
+        fistpll 8(%esp)
+        movl 8(%esp), %eax
+        movl 12(%esp), %edx
+        jmp 2f
+  # Argument > 2^63: offset ARG by -2^63, then convert, then offset RES by 2^63
+1:      fsubs LC2
+        fistpll 8(%esp)
+        movl 8(%esp), %eax
+        movl 12(%esp), %edx
+        addl $0x80000000, %edx
+  # Restore rounding mode
+2:      fldcw 0(%esp)
+        addl $4, %esp
+        ret
+        .type __i64_dtou, @function
+        .size __i64_dtou, . - __i64_dtou
+
+        .balign 4
+LC2:    .long 0x5f000000        # 2^63 in single precision
diff --git a/runtime/powerpc/int64.s b/runtime/powerpc/int64.s
new file mode 100644
index 0000000..34b65b5
--- /dev/null
+++ b/runtime/powerpc/int64.s
@@ -0,0 +1,492 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+# Calling conventions for R = F(X) or R = F(X,Y):
+#   one or two long arguments:   XH in r3, XL in r4, YH in r5, YL in r6
+#   one long argument, one int:  XH in r3, XL in r4, Y in r5
+#   one float argument:          X in f1	
+#   one long result:             RH in r3, RL in r4
+#   one float result:            R in f1        
+# This is a big-endian convention: the high word is in the low-numbered register
+# Can use r3...r12 and f0...r13 as temporary registers (caller-save)	
+	
+	.text
+        
+### Opposite
+
+        .balign 16
+        .globl __i64_neg
+__i64_neg:
+        subfic r4, r4, 0        # RL = -XL and set borrow iff XL != 0
+        subfze r3, r3           # RH = -XH - borrow
+        blr
+        .type __i64_neg, @function
+        .size __i64_neg, .-__i64_neg
+
+### Addition	
+
+        .balign 16
+        .globl __i64_add
+__i64_add:
+        addc r4, r4, r6         # RL = XL + YL and set carry if overflow
+        adde r3, r3, r5         # RH = XH + YH + carry
+        blr
+        .type __i64_add, @function
+        .size __i64_add, .-__i64_add
+
+### Subtraction	
+
+        .balign 16
+        .globl __i64_sub
+__i64_sub:
+        subfc r4, r6, r4        # RL = XL - YL and set borrow if underflow
+        subfe r3, r5, r3        # RH = XH - YH - borrow
+        blr
+        .type __i64_sub, @function
+        .size __i64_sub, .-__i64_sub
+
+### Multiplication	
+
+        .balign 16
+        .globl __i64_mul
+__i64_mul:
+   # Form intermediate products
+        mulhwu r7, r4, r6       # r7 = high half of XL * YL
+        mullw r8, r3, r6        # r8 = low half of XH * YL
+        mullw r9, r4, r5        # r9 = low half of XL * YH
+        mullw r4, r4, r6        # r4 = low half of XL * YL = low half of result
+   # Reconstruct high half of result
+        add r3, r7, r8
+        add r3, r3, r9
+        blr
+        .type __i64_mul, @function
+        .size __i64_mul, .-__i64_mul
+
+### Helper function for division and modulus.  Not exported.	
+# Input:  numerator N in (r3,r4), divisor D in (r5,r6)
+# Output: quotient Q in (r7,r8),  remainder R in (r3,r4)
+	.balign 16
+__i64_udivmod:
+   # Set up quotient and mask
+        li r8, 0                # Q = 0
+        li r7, 0
+        li r10, 1               # M = 1
+        li r9, 0
+   # Check for zero divisor
+        or. r0, r6, r5
+        beqlr                   # return with unspecified quotient & remainder
+   # Scale divisor and mask
+1:      cmpwi r5, 0             # while top bit of D is zero...
+        blt 2f
+        subfc r0, r6, r4        # compute borrow out of N - D
+        subfe r0, r5, r3
+        subfe. r0, r0, r0       # EQ iff no borrow iff N >= D
+        bne 2f                  # ... and while N >= D ...
+        addc r6, r6, r6         # scale divisor: D = D << 1
+        adde r5, r5, r5
+        addc r10, r10, r10      # scale mask: M = M << 1
+        adde r9, r9, r9
+        b 1b                    # end while
+   # Long division
+2:      subfc r4, r6, r4        # Q = Q | M, N = N - D, and compute borrow
+ 	or r8, r8, r10
+        subfe r3, r5, r3
+        or r7, r7, r9
+        subfe. r0, r0, r0       # test borrow
+        beq 3f                  # no borrow: N >= D, continue
+        addc r4, r4, r6         # borrow: undo what we just did to N and Q
+        andc r8, r8, r10
+        adde r3, r3, r5
+        andc r7, r7, r9
+3:      slwi r0, r9, 31         # unscale mask: M = M >> 1
+        srwi r10, r10, 1
+        or r10, r10, r0
+        srwi r9, r9, 1
+        slwi r0, r5, 31         # unscale divisor: D = D >> 1
+        srwi r6, r6, 1
+        or r6, r6, r0
+        srwi r5, r5, 1
+        or. r0, r10, r9         # iterate while M != 0
+        bne 2b
+        blr
+
+### Unsigned division	
+
+        .balign 16
+        .globl __i64_udiv
+__i64_udiv:
+        mflr r11                # save return address in r11
+        bl __i64_udivmod        # unsigned divide
+        mtlr r11                # restore return address
+        mr r3, r7               # R = quotient
+        mr r4, r8
+        blr
+        .type __i64_udiv, @function
+        .size __i64_udiv, .-__i64_udiv
+
+### Unsigned remainder        
+
+        .balign 16
+        .globl __i64_umod
+__i64_umod:
+        mflr r11
+        bl __i64_udivmod
+        mtlr r11
+        blr                     # remainder is already in R
+        .type __i64_umod, @function
+        .size __i64_umod, .-__i64_umod
+
+### Signed division	
+	
+        .balign 16
+        .globl __i64_sdiv
+__i64_sdiv:
+        mflr r11                # save return address
+        xor r12, r3, r5         # save sign of result in r12 (top bit)
+        srawi r0, r3, 31        # take absolute value of N
+        xor r4, r4, r0          # (i.e.  N = N ^ r0 - r0,
+        xor r3, r3, r0          #  where r0 = 0 if N >= 0 and r0 = -1 if N < 0)
+        subfc r4, r0, r4
+        subfe r3, r0, r3
+        srawi r0, r5, 31        # take absolute value of D
+        xor r6, r6, r0          # (same trick)
+        xor r5, r5, r0
+        subfc r6, r0, r6
+        subfe r5, r0, r5
+        bl __i64_udivmod        # do unsigned division
+        mtlr r11                # restore return address
+        srawi r0, r12, 31       # apply expected sign to quotient
+        xor r8, r8, r0          # RES = Q if r12 >= 0, -Q if r12 < 0
+        xor r7, r7, r0
+        subfc r4, r0, r8
+        subfe r3, r0, r7
+        blr
+        .type __i64_sdiv, @function
+        .size __i64_sdiv, .-__i64_sdiv
+	
+## Signed remainder	
+	
+        .balign 16
+        .globl __i64_smod
+__i64_smod:
+        mflr r11                # save return address
+        srawi r12, r3, 31       # save sign of result in r12 (sign of N)
+        xor r4, r4, r12         # and take absolute value of N
+        xor r3, r3, r12
+        subfc r4, r12, r4
+        subfe r3, r12, r3
+        srawi r0, r5, 31        # take absolute value of D
+        xor r6, r6, r0
+        xor r5, r5, r0
+        subfc r6, r0, r6
+        subfe r5, r0, r5
+        bl __i64_udivmod        # do unsigned division
+        mtlr r11                # restore return address
+        xor r4, r4, r12         # apply expected sign to remainder
+        xor r3, r3, r12         # RES = R if r12 == 0, -R if r12 == -1
+        subfc r4, r12, r4
+        subfe r3, r12, r3
+        blr
+        .type __i64_smod, @function
+        .size __i64_smod, .-__i64_smod
+
+### Unsigned comparison        
+
+        .balign 16
+        .globl __i64_ucmp
+__i64_ucmp:
+        cmplw cr0, r3, r5       # compare high words (unsigned)
+	cmplw cr1, r4, r6       # compare low words (unsigned)
+        mfcr r0
+# At this point, the bits of r0 are as follow:	
+#   bit 31: XH < YH
+#   bit 30: XH > YH        
+#   bit 27: XL > YL	
+#   bit 26: XL < YL	
+        rlwinm r3, r0, 0, 0, 1  # r3 = r0 & 0xC000_0000
+        srawi r3, r3, 24        # r4 = r4 >>s 28
+# r3 = -0x80 if XH < YH
+#    = 0x40  if XH > YH
+#    = 0     if XH = YH        
+        rlwinm r4, r0, 4, 0, 1  # r4 = (r0 << 4) & 0xC000_0000
+        srawi r4, r4, 28        # r4 = r4 >>s 28
+# r4 = -8  if XL < YL
+#    =  4  if XL > YL
+#    =  0  if XL = YL        
+	add     r3, r3, r4
+# r3 = -0x80 or -0x80 - 8 or -0x80 + 4 or -8 if X < Y 
+#         (in all cases, r3 < 0 if X < Y)	
+#    = 0x40 or 0x40 - 8 or 0x40 + 4 or 4 if X > Y 
+#         (in all cases, r3 > 0 if X > Y)	
+#    = 0 if X = Y
+        blr
+        .type __i64_ucmp, @function
+        .size __i64_ucmp, .-__i64_ucmp
+	
+### Signed comparison        
+
+        .balign 16
+        .globl __i64_scmp
+__i64_scmp:
+        cmpw cr0, r3, r5        # compare high words (signed)
+	cmplw cr1, r4, r6       # compare low words (unsigned)
+        mfcr r0
+# Same trick as in __i64_ucmp	
+        rlwinm r3, r0, 0, 0, 1
+        srawi r3, r3, 24
+        rlwinm r4, r0, 4, 0, 1
+        srawi r4, r4, 28
+	add     r3, r3, r4
+        blr
+        .type __i64_scmp, @function
+        .size __i64_scmp, .-__i64_scmp
+
+### Shifts	
+	
+# On PowerPC, shift instructions with amount mod 64 >= 32 return 0
+
+        .balign 16
+        .globl __i64_shl
+__i64_shl:
+# hi = (hi << amount) | (lo >> (32 - amount)) | (lo << (amount - 32))
+# lo = lo << amount
+# if 0 <= amount < 32:
+#    (amount - 32) mod 64 >= 32, hence lo << (amount - 32) == 0
+# if 32 <= amount < 64:
+#    lo << amount == 0
+#    (32 - amount) mod 64 >= 32, hence lo >> (32 - amount) == 0
+        andi. r5, r5, 63        # take amount modulo 64
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32
+        slw r3, r3, r5
+        srw r0, r4, r6
+        or r3, r3, r0
+        slw r0, r4, r7
+        or r3, r3, r0
+        slw r4, r4, r5
+        blr
+        .type __i64_shl, @function
+        .size __i64_shl, .-__i64_shl
+        
+        .balign 16
+        .globl __i64_shr
+__i64_shr:
+# lo = (lo >> amount) | (hi << (32 - amount)) | (hi >> (amount - 32))
+# hi = hi >> amount
+# if 0 <= amount < 32:
+#    (amount - 32) mod 64 >= 32, hence hi >> (amount - 32) == 0
+# if 32 <= amount < 64:
+#    hi >> amount == 0
+#    (32 - amount) mod 64 >= 32, hence hi << (32 - amount) == 0
+        andi. r5, r5, 63        # take amount modulo 64
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32
+        srw r4, r4, r5
+        slw r0, r3, r6
+        or r4, r4, r0
+        srw r0, r3, r7
+        or r4, r4, r0
+        srw r3, r3, r5
+        blr
+        .type __i64_shr, @function
+        .size __i64_shr, .-__i64_shr
+	
+        .balign 16
+        .globl __i64_sar
+__i64_sar:
+        andi. r5, r5, 63        # take amount modulo 64
+        cmpwi r5, 32
+        bge 1f                  # amount < 32?
+        subfic r6, r5, 32       # r6 = 32 - amount
+        srw r4, r4, r5          # RH = XH >>s amount
+        slw r0, r3, r6          # RL = XL >>u amount | XH << (32 - amount)
+        or r4, r4, r0
+        sraw r3, r3, r5
+        blr
+1:      addi r6, r5, -32        # amount >= 32
+        sraw r4, r3, r6         # RL = XH >>s (amount - 32)
+        srawi r3, r3, 31        # RL = sign extension of XH
+        blr
+        .type __i64_sar, @function
+        .size __i64_sar, .-__i64_sar
+
+### Conversion from unsigned long to double float	
+
+        .balign 16
+        .globl __i64_utod
+__i64_utod:
+        addi r1, r1, -16
+        lis r5, 0x4330
+        li r6, 0
+        stw r5, 0(r1)
+        stw r4, 4(r1)
+        stw r5, 8(r1)
+        stw r6, 12(r1)
+        lfd f1, 0(r1)
+        lfd f2, 8(r1)
+        fsub f1, f1, f2         # f1 is XL as a double
+        lis r5, 0x4530
+        stw r5, 0(r1)
+        stw r3, 4(r1)
+        stw r5, 8(r1)
+        lfd f2, 0(r1)
+        lfd f3, 8(r1)
+        fsub f2, f2, f3         # f2 is XH * 2^32 as a double
+        fadd f1, f1, f2         # add both to get result
+        addi r1, r1, 16
+        blr
+        .type __i64_utod, @function
+        .size __i64_utod, .-__i64_utod
+	
+### Conversion from signed long to double float	
+
+        .balign 16
+        .globl __i64_stod
+__i64_stod:
+        addi r1, r1, -16
+        lis r5, 0x4330
+        li r6, 0
+        stw r5, 0(r1)
+        stw r4, 4(r1)
+        stw r5, 8(r1)
+        stw r6, 12(r1)
+        lfd f1, 0(r1)
+        lfd f2, 8(r1)
+        fsub f1, f1, f2         # f1 is XL (unsigned) as a double
+        lis r5, 0x4530
+        lis r6, 0x8000
+        stw r5, 0(r1)
+        add r3, r3, r6
+        stw r3, 4(r1)
+        stw r5, 8(r1)
+        stw r6, 12(r1)
+        lfd f2, 0(r1)
+        lfd f3, 8(r1)
+        fsub f2, f2, f3         # f2 is XH (signed) * 2^32 as a double
+        fadd f1, f1, f2         # add both to get result
+        addi r1, r1, 16
+        blr
+        .type __i64_stod, @function
+        .size __i64_stod, .-__i64_stod
+
+### Conversion from double float to unsigned long	
+
+        .balign 16
+        .globl __i64_dtou
+__i64_dtou:
+        stfdu f1, -16(r1)       # extract LO (r4) and HI (r3) halves of double
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addi r1, r1, 16
+        cmpwi r3, 0             # is double < 0?
+        blt 1f                  # then it converts to 0
+  # extract unbiased exponent  ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+        rlwinm r5, r3, 12, 21, 31
+        addi r5, r5, -1075
+  # check range of exponent
+        cmpwi r5, -52           # if EXP < -52, double is < 1.0
+        blt 1f
+        cmpwi r5, 12            # if EXP >= 64 - 52, double is >= 2^64
+        bge 2f
+  # extract true mantissa
+        rlwinm r3, r3, 0, 12, 31  # HI &= ~0xFFF00000
+        oris r3, r3, 0x10         # HI |=  0x00100000
+  # shift it appropriately
+        cmpwi r5, 0
+        blt 3f
+        b __i64_shl             # if EXP >= 0, shift left by EXP
+3:      subfic r5, r5, 0
+        b __i64_shr             # if EXP < 0, shift right by -EXP
+  # Special cases
+1:      li r3, 0                # result is 0
+        li r4, 0
+        blr
+2:      li r3, -1               # result is MAX_UINT
+        li r4, -1
+        blr
+        .type __i64_dtou, @function
+        .size __i64_dtou, .-__i64_dtou
+
+### Conversion from double float to signed long	
+
+        .balign 16
+        .globl __i64_dtos
+__i64_dtos:
+        stfdu f1, -16(r1)       # extract LO (r4) and HI (r3) halves of double
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addi r1, r1, 16
+        srawi r10, r3, 31       # save sign of double in r10
+ # extract unbiased exponent  ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+        rlwinm r5, r3, 12, 21, 31
+        addi r5, r5, -1075
+ # check range of exponent
+        cmpwi r5, -52           # if EXP < -52, abs(double) is < 1.0
+        blt 1f
+        cmpwi r5, 11            # if EXP >= 63 - 52, abs(double) is >= 2^63
+        bge 2f
+  # extract true mantissa
+        rlwinm r3, r3, 0, 12, 31  # HI &= ~0xFFF00000
+        oris r3, r3, 0x10         # HI |=  0x00100000
+  # shift it appropriately
+        mflr r9                 # save retaddr in r9
+        cmpwi r5, 0
+        blt 3f
+        bl __i64_shl            # if EXP >= 0, shift left by EXP
+        b 4f
+3:      subfic r5, r5, 0
+        bl __i64_shr            # if EXP < 0, shift right by -EXP
+  # apply sign to result	
+4:      mtlr r9
+        xor r4, r4, r10
+        xor r3, r3, r10
+        subfc r4, r10, r4
+        subfe r3, r10, r3
+        blr
+  # Special cases
+1:      li r3, 0                # result is 0
+        li r4, 0
+        blr
+2:      cmpwi r10, 0            # result is MAX_SINT or MIN_SINT
+        bge 5f                  # depending on sign
+        li r4, -1               # result is MAX_SINT =  0x7FFF_FFFF
+        srwi r3, r4, 1
+        blr
+5:      lis r3, 0x8000          # result is MIN_SINT = 0x8000_0000
+        li r4, 0
+        blr
+        .type __i64_dtos, @function
+        .size __i64_dtos, .-__i64_dtos
diff --git a/runtime/test/test_int64.c b/runtime/test/test_int64.c
new file mode 100644
index 0000000..11adce3
--- /dev/null
+++ b/runtime/test/test_int64.c
@@ -0,0 +1,238 @@
+/* ************************************************************************** */
+/*                                                                            */
+/*               The Compcert verified compiler                               */
+/*                                                                            */
+/*           Xavier Leroy, INRIA Paris-Rocquencourt                           */
+/*                                                                            */
+/* Copyright (c) 2013 Institut National de Recherche en Informatique et       */
+/*  en Automatique.                                                           */
+/*                                                                            */
+/* Redistribution and use in source and binary forms, with or without         */
+/* modification, are permitted provided that the following conditions are met:*/
+/*     * Redistributions of source code must retain the above copyright       */
+/*       notice, this list of conditions and the following disclaimer.        */
+/*     * Redistributions in binary form must reproduce the above copyright    */
+/*       notice, this list of conditions and the following disclaimer in the  */
+/*       documentation and/or other materials provided with the distribution. */
+/*     * Neither the name of the <organization> nor the                       */
+/*       names of its contributors may be used to endorse or promote products */
+/*       derived from this software without specific prior written permission.*/
+/*                                                                            */
+/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS        */
+/* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT          */
+/* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR      */
+/* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT          */
+/* HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,           */
+/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,        */
+/* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR         */
+/* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF     */
+/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING       */
+/* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS         */
+/* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.               */
+/*                                                                            */
+/* ************************************************************************** */
+
+/* Differential testing of 64-bit integer operations */
+/* This file is to be compiled by a C compiler other than CompCert C */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef unsigned long long u64;
+typedef signed long long s64;
+
+extern s64 __i64_neg(s64 x);
+extern s64 __i64_add(s64 x, s64 y);
+extern s64 __i64_sub(s64 x, s64 y);
+extern s64 __i64_mul(s64 x, s64 y);
+extern u64 __i64_udiv(u64 x, u64 y);
+extern u64 __i64_umod(u64 x, u64 y);
+extern s64 __i64_sdiv(s64 x, s64 y);
+extern s64 __i64_smod(s64 x, s64 y);
+
+extern u64 __i64_shl(u64 x, unsigned amount);
+extern u64 __i64_shr(u64 x, unsigned amount);
+extern s64 __i64_sar(s64 x, unsigned amount);
+
+extern int __i64_ucmp(u64 x, u64 y);
+extern int __i64_scmp(s64 x, s64 y);
+
+extern double __i64_utod(u64 x);
+extern double __i64_stod(s64 x);
+extern u64 __i64_dtou(double d);
+extern s64 __i64_dtos(double d);
+
+static u64 rnd64(void)
+{
+  static u64 seed = 0;
+  seed = seed * 6364136223846793005ULL + 1442695040888963407ULL;
+  return seed;
+}
+
+static int error = 0;
+
+static void test1(u64 x, u64 y)
+{
+  u64 z, uy;
+  s64 t, sy;
+  int i;
+  double f, g;
+
+  z = __i64_neg(x);
+  if (z != -x) 
+    error++, printf("- %016llx = %016llx, expected %016llx\n", x, z, -x);
+  z = __i64_add(x, y);
+  if (z != x + y) 
+    error++, printf("%016llx + %016llx = %016llx, expected %016llx\n", x, y, z, x + y);
+
+  z = __i64_sub(x, y);
+  if (z != x - y) 
+    error++, printf("%016llx - %016llx = %016llx, expected %016llx\n", x, y, z, x - y);
+
+  z = __i64_mul(x, y);
+  if (z != x * y) 
+    error++, printf("%016llx * %016llx = %016llx, expected %016llx\n", x, y, z, x * y);
+
+  if (y != 0) {
+
+  z = __i64_udiv(x, y);
+  if (z != x / y) 
+    error++, printf("%llu /u %llu = %llu, expected %llu\n", x, y, z, x / y);
+
+  z = __i64_umod(x, y);
+  if (z != x % y) 
+    error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, y, z, x % y);
+
+  }
+
+  if (y != 0 && !(x == 0x800000000000LLU && y == -1)) {
+
+  t = __i64_sdiv(x, y);
+  if (t != (s64) x / (s64) y) 
+    error++, printf("%lld /s %lld = %lld, expected %lld\n", x, y, t, (s64) x / (s64) y);
+
+  t = __i64_smod(x, y);
+  if (t != (s64) x % (s64) y) 
+    error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, y, t, (s64) x % (s64) y);
+
+  }
+
+  /* Test division with small (32-bit) divisors */
+  uy = y >> 32;
+  sy = (s64)y >> 32;
+
+  if (uy != 0) {
+
+  z = __i64_udiv(x, uy);
+  if (z != x / uy) 
+    error++, printf("%llu /u %llu = %llu, expected %llu\n", x, uy, z, x / uy);
+
+  z = __i64_umod(x, uy);
+  if (z != x % uy) 
+    error++, printf("%llu %%u %llu = %llu, expected %llu\n", x, uy, z, x % uy);
+
+  }
+
+  if (sy != 0 && !(x == 0x800000000000LLU && sy == -1)) {
+
+  t = __i64_sdiv(x, sy);
+  if (t != (s64) x / sy) 
+    error++, printf("%lld /s %lld = %lld, expected %lld\n", x, sy, t, (s64) x / sy);
+
+  t = __i64_smod(x, sy);
+  if (t != (s64) x % sy) 
+    error++, printf("%lld %%s %lld = %lld, expected %lld\n", x, sy, t, (s64) x % sy);
+
+  }
+
+  i = y & 63;
+
+  z = __i64_shl(x, i);
+  if (z != x << i) 
+    error++, printf("%016llx << %d = %016llx, expected %016llx\n", x, i, z, x << i);
+
+  z = __i64_shr(x, i);
+  if (z != x >> i) 
+    error++, printf("%016llx >>u %d = %016llx, expected %016llx\n", x, i, z, x >> i);
+
+  t = __i64_sar(x, i);
+  if (t != (s64) x >> i) 
+    error++, printf("%016llx >>s %d = %016llx, expected %016llx\n", x, i, t, (s64) x >> i);
+
+  i = __i64_ucmp(x, y);
+  if (x == y) {
+    if (! (i == 0))
+      error++, printf("ucmp(%016llx, %016llx) = %d, expected 0\n", x, y, i);
+  }
+  else if (x < y) {
+    if (! (i < 0))
+      error++, printf("ucmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i);
+  } else {
+    if (! (i > 0))
+      error++, printf("ucmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i);
+  }
+
+  i = __i64_scmp(x, y);
+  if (x == y) {
+    if (! (i == 0))
+      error++, printf("scmp(%016llx, %016llx) = %d, expected 0\n", x, y, i);
+  }
+  else if ((s64)x < (s64)y) {
+    if (! (i < 0))
+      error++, printf("scmp(%016llx, %016llx) = %d, expected < 0\n", x, y, i);
+  } else {
+    if (! (i > 0))
+      error++, printf("scmp(%016llx, %016llx) = %d, expected > 0\n", x, y, i);
+  }
+
+  f = __i64_utod(x);
+  g = (double) x;
+  if (f != g)
+    error++, printf("(double) %llu (u) = %a, expected %a\n", x, f, g);
+
+  f = __i64_stod(x);
+  g = (double) (s64) x;
+  if (f != g)
+    error++, printf("(double) %lld (s) = %a, expected %a\n", x, f, g);
+
+  f = ((double) x) * 0.0001;
+  z = __i64_dtou(f);
+  if (z != (u64) f)
+    error++, printf("(u64) %a = %llu, expected %llu\n", f, z, (u64) f);
+
+  f = ((double) (s64) x) * 0.0001;
+  t = __i64_dtos(f);
+  if (t != (s64) f)
+    error++, printf("(s64) %a = %lld, expected %lld\n", f, z, (s64) f);
+}
+
+#define NSPECIFIC 8
+
+unsigned long long specific[NSPECIFIC] = {
+  0, 1, -1, 0x7FFFFFFFULL, 0x80000000ULL, 0xFFFFFFFFULL,
+  0x7FFFFFFFFFFFULL, 0x8000000000000000ULL
+};
+
+int main()
+{
+  int i, j;
+
+  /* Some specific values */
+  for (i = 0; i < NSPECIFIC; i++)
+    for (j = 0; j < NSPECIFIC; j++)
+      test1(specific[i], specific[j]);
+
+  /* Random testing */
+  for (i = 0; i < 50; i++) {
+    for (j = 0; j < 1000000; j++)
+      test1(rnd64(), rnd64());
+    printf("."); fflush(stdout);
+  }
+  printf("\n");
+  if (error == 0)
+    printf ("Test passed\n");
+  else
+    printf ("TEST FAILED, %d error(s) detected\n", error);
+  return 0;
+}
+
-- 
cgit v1.2.3