1 files changed, 492 insertions, 0 deletions
diff --git a/runtime/powerpc/int64.s b/runtime/powerpc/int64.s
new file mode 100644
index 0000000..34b65b5
--- /dev/null
+++ b/runtime/powerpc/int64.s
@@ -0,0 +1,492 @@
+# *****************************************************************
+#
+#               The Compcert verified compiler
+#
+#           Xavier Leroy, INRIA Paris-Rocquencourt
+#
+# Copyright (c) 2013 Institut National de Recherche en Informatique et
+#  en Automatique.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the <organization> nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
+# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *********************************************************************
+
+# Helper functions for 64-bit integer arithmetic.  PowerPC version.
+
+# Calling conventions for R = F(X) or R = F(X,Y):
+#   one or two long arguments:   XH in r3, XL in r4, YH in r5, YL in r6
+#   one long argument, one int:  XH in r3, XL in r4, Y in r5
+#   one float argument:          X in f1	
+#   one long result:             RH in r3, RL in r4
+#   one float result:            R in f1        
+# This is a big-endian convention: the high word is in the low-numbered register
+# Can use r3...r12 and f0...r13 as temporary registers (caller-save)	
+	
+	.text
+        
+### Opposite
+
+        .balign 16
+        .globl __i64_neg
+__i64_neg:
+        subfic r4, r4, 0        # RL = -XL and set borrow iff XL != 0
+        subfze r3, r3           # RH = -XH - borrow
+        blr
+        .type __i64_neg, @function
+        .size __i64_neg, .-__i64_neg
+
+### Addition	
+
+        .balign 16
+        .globl __i64_add
+__i64_add:
+        addc r4, r4, r6         # RL = XL + YL and set carry if overflow
+        adde r3, r3, r5         # RH = XH + YH + carry
+        blr
+        .type __i64_add, @function
+        .size __i64_add, .-__i64_add
+
+### Subtraction	
+
+        .balign 16
+        .globl __i64_sub
+__i64_sub:
+        subfc r4, r6, r4        # RL = XL - YL and set borrow if underflow
+        subfe r3, r5, r3        # RH = XH - YH - borrow
+        blr
+        .type __i64_sub, @function
+        .size __i64_sub, .-__i64_sub
+
+### Multiplication	
+
+        .balign 16
+        .globl __i64_mul
+__i64_mul:
+   # Form intermediate products
+        mulhwu r7, r4, r6       # r7 = high half of XL * YL
+        mullw r8, r3, r6        # r8 = low half of XH * YL
+        mullw r9, r4, r5        # r9 = low half of XL * YH
+        mullw r4, r4, r6        # r4 = low half of XL * YL = low half of result
+   # Reconstruct high half of result
+        add r3, r7, r8
+        add r3, r3, r9
+        blr
+        .type __i64_mul, @function
+        .size __i64_mul, .-__i64_mul
+
+### Helper function for division and modulus.  Not exported.	
+# Input:  numerator N in (r3,r4), divisor D in (r5,r6)
+# Output: quotient Q in (r7,r8),  remainder R in (r3,r4)
+	.balign 16
+__i64_udivmod:
+   # Set up quotient and mask
+        li r8, 0                # Q = 0
+        li r7, 0
+        li r10, 1               # M = 1
+        li r9, 0
+   # Check for zero divisor
+        or. r0, r6, r5
+        beqlr                   # return with unspecified quotient & remainder
+   # Scale divisor and mask
+1:      cmpwi r5, 0             # while top bit of D is zero...
+        blt 2f
+        subfc r0, r6, r4        # compute borrow out of N - D
+        subfe r0, r5, r3
+        subfe. r0, r0, r0       # EQ iff no borrow iff N >= D
+        bne 2f                  # ... and while N >= D ...
+        addc r6, r6, r6         # scale divisor: D = D << 1
+        adde r5, r5, r5
+        addc r10, r10, r10      # scale mask: M = M << 1
+        adde r9, r9, r9
+        b 1b                    # end while
+   # Long division
+2:      subfc r4, r6, r4        # Q = Q | M, N = N - D, and compute borrow
+ 	or r8, r8, r10
+        subfe r3, r5, r3
+        or r7, r7, r9
+        subfe. r0, r0, r0       # test borrow
+        beq 3f                  # no borrow: N >= D, continue
+        addc r4, r4, r6         # borrow: undo what we just did to N and Q
+        andc r8, r8, r10
+        adde r3, r3, r5
+        andc r7, r7, r9
+3:      slwi r0, r9, 31         # unscale mask: M = M >> 1
+        srwi r10, r10, 1
+        or r10, r10, r0
+        srwi r9, r9, 1
+        slwi r0, r5, 31         # unscale divisor: D = D >> 1
+        srwi r6, r6, 1
+        or r6, r6, r0
+        srwi r5, r5, 1
+        or. r0, r10, r9         # iterate while M != 0
+        bne 2b
+        blr
+
+### Unsigned division	
+
+        .balign 16
+        .globl __i64_udiv
+__i64_udiv:
+        mflr r11                # save return address in r11
+        bl __i64_udivmod        # unsigned divide
+        mtlr r11                # restore return address
+        mr r3, r7               # R = quotient
+        mr r4, r8
+        blr
+        .type __i64_udiv, @function
+        .size __i64_udiv, .-__i64_udiv
+
+### Unsigned remainder        
+
+        .balign 16
+        .globl __i64_umod
+__i64_umod:
+        mflr r11
+        bl __i64_udivmod
+        mtlr r11
+        blr                     # remainder is already in R
+        .type __i64_umod, @function
+        .size __i64_umod, .-__i64_umod
+
+### Signed division	
+	
+        .balign 16
+        .globl __i64_sdiv
+__i64_sdiv:
+        mflr r11                # save return address
+        xor r12, r3, r5         # save sign of result in r12 (top bit)
+        srawi r0, r3, 31        # take absolute value of N
+        xor r4, r4, r0          # (i.e.  N = N ^ r0 - r0,
+        xor r3, r3, r0          #  where r0 = 0 if N >= 0 and r0 = -1 if N < 0)
+        subfc r4, r0, r4
+        subfe r3, r0, r3
+        srawi r0, r5, 31        # take absolute value of D
+        xor r6, r6, r0          # (same trick)
+        xor r5, r5, r0
+        subfc r6, r0, r6
+        subfe r5, r0, r5
+        bl __i64_udivmod        # do unsigned division
+        mtlr r11                # restore return address
+        srawi r0, r12, 31       # apply expected sign to quotient
+        xor r8, r8, r0          # RES = Q if r12 >= 0, -Q if r12 < 0
+        xor r7, r7, r0
+        subfc r4, r0, r8
+        subfe r3, r0, r7
+        blr
+        .type __i64_sdiv, @function
+        .size __i64_sdiv, .-__i64_sdiv
+	
+## Signed remainder	
+	
+        .balign 16
+        .globl __i64_smod
+__i64_smod:
+        mflr r11                # save return address
+        srawi r12, r3, 31       # save sign of result in r12 (sign of N)
+        xor r4, r4, r12         # and take absolute value of N
+        xor r3, r3, r12
+        subfc r4, r12, r4
+        subfe r3, r12, r3
+        srawi r0, r5, 31        # take absolute value of D
+        xor r6, r6, r0
+        xor r5, r5, r0
+        subfc r6, r0, r6
+        subfe r5, r0, r5
+        bl __i64_udivmod        # do unsigned division
+        mtlr r11                # restore return address
+        xor r4, r4, r12         # apply expected sign to remainder
+        xor r3, r3, r12         # RES = R if r12 == 0, -R if r12 == -1
+        subfc r4, r12, r4
+        subfe r3, r12, r3
+        blr
+        .type __i64_smod, @function
+        .size __i64_smod, .-__i64_smod
+
+### Unsigned comparison        
+
+        .balign 16
+        .globl __i64_ucmp
+__i64_ucmp:
+        cmplw cr0, r3, r5       # compare high words (unsigned)
+	cmplw cr1, r4, r6       # compare low words (unsigned)
+        mfcr r0
+# At this point, the bits of r0 are as follow:	
+#   bit 31: XH < YH
+#   bit 30: XH > YH        
+#   bit 27: XL > YL	
+#   bit 26: XL < YL	
+        rlwinm r3, r0, 0, 0, 1  # r3 = r0 & 0xC000_0000
+        srawi r3, r3, 24        # r4 = r4 >>s 28
+# r3 = -0x80 if XH < YH
+#    = 0x40  if XH > YH
+#    = 0     if XH = YH        
+        rlwinm r4, r0, 4, 0, 1  # r4 = (r0 << 4) & 0xC000_0000
+        srawi r4, r4, 28        # r4 = r4 >>s 28
+# r4 = -8  if XL < YL
+#    =  4  if XL > YL
+#    =  0  if XL = YL        
+	add     r3, r3, r4
+# r3 = -0x80 or -0x80 - 8 or -0x80 + 4 or -8 if X < Y 
+#         (in all cases, r3 < 0 if X < Y)	
+#    = 0x40 or 0x40 - 8 or 0x40 + 4 or 4 if X > Y 
+#         (in all cases, r3 > 0 if X > Y)	
+#    = 0 if X = Y
+        blr
+        .type __i64_ucmp, @function
+        .size __i64_ucmp, .-__i64_ucmp
+	
+### Signed comparison        
+
+        .balign 16
+        .globl __i64_scmp
+__i64_scmp:
+        cmpw cr0, r3, r5        # compare high words (signed)
+	cmplw cr1, r4, r6       # compare low words (unsigned)
+        mfcr r0
+# Same trick as in __i64_ucmp	
+        rlwinm r3, r0, 0, 0, 1
+        srawi r3, r3, 24
+        rlwinm r4, r0, 4, 0, 1
+        srawi r4, r4, 28
+	add     r3, r3, r4
+        blr
+        .type __i64_scmp, @function
+        .size __i64_scmp, .-__i64_scmp
+
+### Shifts	
+	
+# On PowerPC, shift instructions with amount mod 64 >= 32 return 0
+
+        .balign 16
+        .globl __i64_shl
+__i64_shl:
+# hi = (hi << amount) | (lo >> (32 - amount)) | (lo << (amount - 32))
+# lo = lo << amount
+# if 0 <= amount < 32:
+#    (amount - 32) mod 64 >= 32, hence lo << (amount - 32) == 0
+# if 32 <= amount < 64:
+#    lo << amount == 0
+#    (32 - amount) mod 64 >= 32, hence lo >> (32 - amount) == 0
+        andi. r5, r5, 63        # take amount modulo 64
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32
+        slw r3, r3, r5
+        srw r0, r4, r6
+        or r3, r3, r0
+        slw r0, r4, r7
+        or r3, r3, r0
+        slw r4, r4, r5
+        blr
+        .type __i64_shl, @function
+        .size __i64_shl, .-__i64_shl
+        
+        .balign 16
+        .globl __i64_shr
+__i64_shr:
+# lo = (lo >> amount) | (hi << (32 - amount)) | (hi >> (amount - 32))
+# hi = hi >> amount
+# if 0 <= amount < 32:
+#    (amount - 32) mod 64 >= 32, hence hi >> (amount - 32) == 0
+# if 32 <= amount < 64:
+#    hi >> amount == 0
+#    (32 - amount) mod 64 >= 32, hence hi << (32 - amount) == 0
+        andi. r5, r5, 63        # take amount modulo 64
+        subfic r6, r5, 32       # r6 = 32 - amount
+        addi r7, r5, -32        # r7 = amount - 32
+        srw r4, r4, r5
+        slw r0, r3, r6
+        or r4, r4, r0
+        srw r0, r3, r7
+        or r4, r4, r0
+        srw r3, r3, r5
+        blr
+        .type __i64_shr, @function
+        .size __i64_shr, .-__i64_shr
+	
+        .balign 16
+        .globl __i64_sar
+__i64_sar:
+        andi. r5, r5, 63        # take amount modulo 64
+        cmpwi r5, 32
+        bge 1f                  # amount < 32?
+        subfic r6, r5, 32       # r6 = 32 - amount
+        srw r4, r4, r5          # RH = XH >>s amount
+        slw r0, r3, r6          # RL = XL >>u amount | XH << (32 - amount)
+        or r4, r4, r0
+        sraw r3, r3, r5
+        blr
+1:      addi r6, r5, -32        # amount >= 32
+        sraw r4, r3, r6         # RL = XH >>s (amount - 32)
+        srawi r3, r3, 31        # RL = sign extension of XH
+        blr
+        .type __i64_sar, @function
+        .size __i64_sar, .-__i64_sar
+
+### Conversion from unsigned long to double float	
+
+        .balign 16
+        .globl __i64_utod
+__i64_utod:
+        addi r1, r1, -16
+        lis r5, 0x4330
+        li r6, 0
+        stw r5, 0(r1)
+        stw r4, 4(r1)
+        stw r5, 8(r1)
+        stw r6, 12(r1)
+        lfd f1, 0(r1)
+        lfd f2, 8(r1)
+        fsub f1, f1, f2         # f1 is XL as a double
+        lis r5, 0x4530
+        stw r5, 0(r1)
+        stw r3, 4(r1)
+        stw r5, 8(r1)
+        lfd f2, 0(r1)
+        lfd f3, 8(r1)
+        fsub f2, f2, f3         # f2 is XH * 2^32 as a double
+        fadd f1, f1, f2         # add both to get result
+        addi r1, r1, 16
+        blr
+        .type __i64_utod, @function
+        .size __i64_utod, .-__i64_utod
+	
+### Conversion from signed long to double float	
+
+        .balign 16
+        .globl __i64_stod
+__i64_stod:
+        addi r1, r1, -16
+        lis r5, 0x4330
+        li r6, 0
+        stw r5, 0(r1)
+        stw r4, 4(r1)
+        stw r5, 8(r1)
+        stw r6, 12(r1)
+        lfd f1, 0(r1)
+        lfd f2, 8(r1)
+        fsub f1, f1, f2         # f1 is XL (unsigned) as a double
+        lis r5, 0x4530
+        lis r6, 0x8000
+        stw r5, 0(r1)
+        add r3, r3, r6
+        stw r3, 4(r1)
+        stw r5, 8(r1)
+        stw r6, 12(r1)
+        lfd f2, 0(r1)
+        lfd f3, 8(r1)
+        fsub f2, f2, f3         # f2 is XH (signed) * 2^32 as a double
+        fadd f1, f1, f2         # add both to get result
+        addi r1, r1, 16
+        blr
+        .type __i64_stod, @function
+        .size __i64_stod, .-__i64_stod
+
+### Conversion from double float to unsigned long	
+
+        .balign 16
+        .globl __i64_dtou
+__i64_dtou:
+        stfdu f1, -16(r1)       # extract LO (r4) and HI (r3) halves of double
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addi r1, r1, 16
+        cmpwi r3, 0             # is double < 0?
+        blt 1f                  # then it converts to 0
+  # extract unbiased exponent  ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+        rlwinm r5, r3, 12, 21, 31
+        addi r5, r5, -1075
+  # check range of exponent
+        cmpwi r5, -52           # if EXP < -52, double is < 1.0
+        blt 1f
+        cmpwi r5, 12            # if EXP >= 64 - 52, double is >= 2^64
+        bge 2f
+  # extract true mantissa
+        rlwinm r3, r3, 0, 12, 31  # HI &= ~0xFFF00000
+        oris r3, r3, 0x10         # HI |=  0x00100000
+  # shift it appropriately
+        cmpwi r5, 0
+        blt 3f
+        b __i64_shl             # if EXP >= 0, shift left by EXP
+3:      subfic r5, r5, 0
+        b __i64_shr             # if EXP < 0, shift right by -EXP
+  # Special cases
+1:      li r3, 0                # result is 0
+        li r4, 0
+        blr
+2:      li r3, -1               # result is MAX_UINT
+        li r4, -1
+        blr
+        .type __i64_dtou, @function
+        .size __i64_dtou, .-__i64_dtou
+
+### Conversion from double float to signed long	
+
+        .balign 16
+        .globl __i64_dtos
+__i64_dtos:
+        stfdu f1, -16(r1)       # extract LO (r4) and HI (r3) halves of double
+        lwz r3, 0(r1)
+        lwz r4, 4(r1)
+        addi r1, r1, 16
+        srawi r10, r3, 31       # save sign of double in r10
+ # extract unbiased exponent  ((HI & 0x7FF00000) >> 20) - (1023 + 52)
+        rlwinm r5, r3, 12, 21, 31
+        addi r5, r5, -1075
+ # check range of exponent
+        cmpwi r5, -52           # if EXP < -52, abs(double) is < 1.0
+        blt 1f
+        cmpwi r5, 11            # if EXP >= 63 - 52, abs(double) is >= 2^63
+        bge 2f
+  # extract true mantissa
+        rlwinm r3, r3, 0, 12, 31  # HI &= ~0xFFF00000
+        oris r3, r3, 0x10         # HI |=  0x00100000
+  # shift it appropriately
+        mflr r9                 # save retaddr in r9
+        cmpwi r5, 0
+        blt 3f
+        bl __i64_shl            # if EXP >= 0, shift left by EXP
+        b 4f
+3:      subfic r5, r5, 0
+        bl __i64_shr            # if EXP < 0, shift right by -EXP
+  # apply sign to result	
+4:      mtlr r9
+        xor r4, r4, r10
+        xor r3, r3, r10
+        subfc r4, r10, r4
+        subfe r3, r10, r3
+        blr
+  # Special cases
+1:      li r3, 0                # result is 0
+        li r4, 0
+        blr
+2:      cmpwi r10, 0            # result is MAX_SINT or MIN_SINT
+        bge 5f                  # depending on sign
+        li r4, -1               # result is MAX_SINT =  0x7FFF_FFFF
+        srwi r3, r4, 1
+        blr
+5:      lis r3, 0x8000          # result is MIN_SINT = 0x8000_0000
+        li r4, 0
+        blr
+        .type __i64_dtos, @function
+        .size __i64_dtos, .-__i64_dtos