summaryrefslogtreecommitdiff
path: root/runtime/powerpc/i64_udivmod.s
blob: 826d9896a1ef305182a77895317e3dab6559282a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# *****************************************************************
#
#               The Compcert verified compiler
#
#           Xavier Leroy, INRIA Paris-Rocquencourt
#
# Copyright (c) 2013 Institut National de Recherche en Informatique et
#  en Automatique.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the <organization> nor the
#       names of its contributors may be used to endorse or promote products
#       derived from this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT
# HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# *********************************************************************

# Helper functions for 64-bit integer arithmetic.  PowerPC version.

	.text
	
# Unsigned division and modulus

# This function computes both the quotient and the remainder of two
# unsigned 64-bit integers.

# Input:  numerator N in (r3,r4), divisor D in (r5,r6)
# Output: quotient Q in (r5,r6),  remainder R in (r3,r4)
# Destroys: all integer caller-save registers
	
	.globl  __i64_udivmod
	.balign 16
__i64_udivmod:
        cmplwi  r5, 0           # DH == 0 ?
	stwu    r1, -32(r1)
        mflr    r0
        stw     r0, 8(r1)
        stw     r31, 12(r1)
        beq     1f
# The general case	
        stw     r30, 16(r1)
        stw     r29, 20(r1)
        stw     r28, 24(r1)
        mr      r28, r3         # Save N in (r28, r29)
        mr      r29, r4
        mr      r30, r5         # Save D in (r30, r31)
        mr      r31, r6 
  # Scale N and D down, giving N' and D', such that 2^31 <= D' < 2^32
        cntlzw  r7, r5          # r7 = leading zeros in DH = 32 - shift amount
        subfic  r8, r7, 32      # r8 = shift amount
	slw     r0, r3, r7      # N' = N >> shift amount
        srw     r3, r3, r8
        srw     r4, r4, r8
        or      r4, r4, r0
	slw     r0, r5, r7      # D' = D >> shift amount
        srw     r6, r6, r8
        or      r5, r6, r0
  # Divide N' by D' to get an approximate quotient Q
        bl      __i64_udiv6432  # r3 = quotient, r4 = remainder
        mr      r6, r3          # low half of quotient Q
        li      r5, 0           # high half of quotient is 0
  # Tentative quotient is either correct or one too high
  # Compute Q * D in (r7, r8)
4:      mullw   r7, r6, r30     # r7 = Q * DH
        mullw   r8, r6, r31     # r8 = low 32 bits of Q * DL
        mulhwu  r0, r6, r31     # r0 = high 32 bits of Q * DL
        addc    r7, r7, r0
        subfe.  r0, r0, r0      # test carry: EQ iff carry
        beq     2f              # handle overflow case
   # Compute R = N - Q * D, with borrow
        subfc   r4, r8, r29
        subfe   r3, r7, r28
        subfe.  r0, r0, r0      # test borrow: EQ iff no borrow
        beq     3f              # no borrow: N >= Q * D, we are good
        addi    r6, r6, -1      # borrow: adjust Q down by 1
        addc    r4, r4, r31     # and R up by D
        adde    r3, r3, r30
   # Finished	
3:      lwz     r0, 8(r1)
        mtlr    r0
        lwz     r31, 12(r1)
        lwz     r30, 16(r1)
        lwz     r29, 20(r1)
        lwz     r28, 24(r1)
        addi    r1, r1, 32
        blr
   # Special case when Q * D overflows
2:      addi    r6, r6, -1      # adjust Q down by 1
        b       4b              # and redo computation and check of remainder
	.balign 16
# Special case 64 bits divided by 32 bits
1:      cmplwi  r3, 0           # NH == 0?
        beq     4f
        divwu   r31, r3, r6     # Divide NH by DL, quotient QH in r31
        mullw   r0, r31, r6
        subf    r3, r0, r3      # NH is remainder of this division
        mr      r5, r6
        bl      __i64_udiv6432  # divide NH : NL by DL
        mr      r5, r31         # high word of quotient
        mr      r6, r3          # low word of quotient
                                # r4 contains low word of remainder
        li      r3, 0           # high word of remainder = 0
	lwz     r0, 8(r1)
        mtlr    r0
        lwz     r31, 12(r1)
        addi    r1, r1, 32
        blr
	.balign 16
# Special case 32 bits divided by 32 bits
4:      mr      r0, r6
	divwu   r6, r4, r6      # low word of quotient
	li      r5, 0           # high word of quotient is 0
        mullw   r0, r6, r0
        subf    r4, r0, r4      # low word of remainder
        li      r3, 0           # high word of remainder is 0
        addi    r1, r1, 32
        blr
        
        .type __i64_udivmod, @function
        .size __i64_udivmod, .-__i64_udivmod

# Auxiliary division function: 64 bit integer divided by 32 bit integer	
# Not exported
# Input:  numerator N in (r3,r4), divisor D in r5
# Output: quotient Q in r3, remainder R in r4
# Destroys: all integer caller-save registers
# Assumes: high word of N is less than D	
        
	.balign 16
__i64_udiv6432: 
# Algorithm 9.3 from Hacker's Delight, section 9.4	
# Initially: u1 in r3, u0 in r4, v in r5
#   s = __builtin_clz(v);
	cntlzw  r6, r5          # s in r6
#   v = v << s;
        slw     r5, r5, r6
#   vn1 = v >> 16;              # vn1 in r7
        srwi    r7, r5, 16
#   vn0 = v & 0xFFFF;           # vn0 in r8
        rlwinm  r8, r5, 0, 16, 31
#   un32 = (u1 << s) | (u0 >> 32 - s);
	subfic  r0, r6, 32
        srw     r0, r4, r0
        slw     r3, r3, r6     # u1 dies, un32 in r3
        or      r3, r3, r0
#   un10 = u0 << s;
        slw     r4, r4, r6     # u0 dies, un10 in r4
#   un1 = un10 >> 16;
        srwi    r9, r4, 16     # un1 in r9
#   un0 = un10 & 0xFFFF;
        rlwinm  r4, r4, 0, 16, 31 # un10 dies, un0 in r4
#   q1 = un32/vn1;
        divwu   r10, r3, r7    # q in r10
#   rhat = un32 - q1*vn1;
        mullw   r0, r10, r7
        subf    r11, r0, r3    # rhat in r11
#  again1:
1:      
#   if (q1 >= b || q1*vn0 > b*rhat + un1) {
        cmplwi  r10, 0xFFFF
        bgt     2f
        mullw   r0, r10, r8
        slwi    r12, r11, 16
        add     r12, r12, r9
        cmplw   r0, r12
        ble     3f
2:      
#     q1 = q1 - 1;
        addi    r10, r10, -1
#     rhat = rhat + vn1;
        add     r11, r11, r7
#     if (rhat < b) goto again1;}
        cmplwi  r11, 0xFFFF
        ble     1b
3:      
#   un21 = un32*b + un1 - q1*v;
        slwi    r0, r3, 16     # un32 dies
        add     r9, r0, r9     # un1 dies
        mullw   r0, r10, r5
        subf    r9, r0, r9     # un21 in r9
#   q0 = un21/vn1;
	divwu   r3, r9, r7     # q0 in r3
#   rhat = un21 - q0*vn1;
        mullw   r0, r3, r7
        subf    r11, r0, r9    # rhat in r11
# again2:
4:      
#   if (q0 >= b || q0*vn0 > b*rhat + un0) {
        cmplwi  r3, 0xFFFF
        bgt     5f
        mullw   r0, r3, r8
        slwi    r12, r11, 16
        add     r12, r12, r4
        cmplw   r0, r12
        ble     6f
5:      
#     q0 = q0 - 1;
	addi    r3, r3, -1
#     rhat = rhat + vn1;
        add     r11, r11, r7
#     if (rhat < b) goto again2;}
	cmplwi  r11, 0xFFFF
        ble     4b
6:      
#   remainder = (un21*b + un0 - q0*v) >> s;
        slwi    r0, r9, 16
        add     r4, r0, r4  # un0 dies, remainder in r4
        mullw   r0, r3, r5
        subf    r4, r0, r4
        srw     r4, r4, r6
#   quotient = q1*b + q0;
        slwi    r0, r10, 16
        add     r3, r0, r3
        blr

	.type	__i64_udiv6432, @function
	.size	__i64_udiv6432,.-__i64_udiv6432