blob: 6a143e50bb7e8905bab7a5c235ba85208e5fea50 [file] [log] [blame]
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +08001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32// ALGORITHM DESCRIPTION
33// ---------------------
34//
35// X87 version:
36// Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt.
37//
38// SSE version:
39// Swap x, y if |x|<|y|
40// For x=2^k*x, get y=y*2^(-k)
41// Get S ~ sqrt(x^2+y^2) (leading 1 + leading 25 mantissa bits)
42//
43// Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) +
44// + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) )
45//
46// Result is 2^k*(S + Se), where Se = S*e
47// S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S )
48//
49// Return 2^k*(S+Se)
50//
51// For |y/x|<2^(-64), return x
52//
53// For cases where maximum biased exponent is either greater than 7fdh or
54// below 32, take a special path to check for special cases (0, NaN, Inf),
55// possible overflow, and more accurate computation for denormal results
56//
57// Special cases:
58// hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent
59// hypot(x,+-0) is equivalent to fabs(x)
60// hypot(x,y) = y if (x==NaN or x==INF) and y==INF
61// hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!)
62// hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF)
63//
64/******************************************************************************/
65
66#include <private/bionic_asm.h>
67# -- Begin static_func
68 .text
69 .align __bionic_asm_align
70 .type static_func, @function
71static_func:
72..B1.1:
73 call ..L2
74..L2:
75 popl %eax
76 lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
77 lea static_const_table@GOTOFF(%eax), %eax
78 ret
79 .size static_func,.-static_func
80# -- End static_func
81
82# -- Begin hypot
83ENTRY(hypot)
84# parameter 1: 8 + %ebp
85# parameter 2: 16 + %ebp
86..B2.1:
87..B2.2:
88 pushl %ebp
89 movl %esp, %ebp
90 subl $152, %esp
91 movl %ebx, 96(%esp)
92 call static_func
93 movl %eax, %ebx
94 movapd (%ebx), %xmm3
95 movsd 160(%esp), %xmm0
96 movsd 168(%esp), %xmm1
97 andpd %xmm3, %xmm0
98 andpd %xmm3, %xmm1
99 pextrw $3, %xmm0, %eax
100 pextrw $3, %xmm1, %edx
101 cmpl $24528, %eax
102 ja .L_2TAG_PACKET_0.0.2
103 cmpl $24528, %edx
104 ja .L_2TAG_PACKET_0.0.2
105.L_2TAG_PACKET_1.0.2:
106 fldl 160(%esp)
107 fldl 168(%esp)
108 fxch %st(1)
109 fmul %st(0), %st
110 fxch %st(1)
111 nop
112 fmul %st(0), %st
113 faddp %st, %st(1)
114 fsqrt
115 jmp .L_2TAG_PACKET_2.0.2
116.L_2TAG_PACKET_0.0.2:
117 cmpl $32752, %eax
118 movl %eax, %ecx
119 jae .L_2TAG_PACKET_3.0.2
120 subl %edx, %ecx
121 cmpl $32752, %edx
122 jae .L_2TAG_PACKET_3.0.2
123 addl $928, %ecx
124 addl %edx, %eax
125 cmpl $1856, %ecx
126 ja .L_2TAG_PACKET_4.0.2
127 cmpl $49056, %eax
128 jb .L_2TAG_PACKET_1.0.2
129 fldl 160(%esp)
130 fldl 168(%esp)
131 fxch %st(1)
132 fmul %st(0), %st
133 fxch %st(1)
134 nop
135 fmul %st(0), %st
136 faddp %st, %st(1)
137 fsqrt
138.L_2TAG_PACKET_5.0.2:
139 fstl (%esp)
140 fstpt 16(%esp)
141 xorl %eax, %eax
142 movw 24(%esp), %ax
143 cmpl $17407, %eax
144 jae .L_2TAG_PACKET_6.0.2
145 fldl (%esp)
146 jmp .L_2TAG_PACKET_7.0.2
147.L_2TAG_PACKET_4.0.2:
148 movsd %xmm0, 32(%esp)
149 movsd %xmm1, 40(%esp)
150 fldl 32(%esp)
151 faddl 40(%esp)
152 jmp .L_2TAG_PACKET_5.0.2
153.L_2TAG_PACKET_6.0.2:
154 movl $46, %edx
155.L_2TAG_PACKET_8.0.2:
156 movsd 160(%esp), %xmm0
157 movsd 168(%esp), %xmm1
158 fldl (%esp)
159 jmp .L_2TAG_PACKET_7.0.2
160.L_2TAG_PACKET_3.0.2:
161 shufpd $0, %xmm1, %xmm0
162 movdqa %xmm0, %xmm2
163 movdqa 16(%ebx), %xmm3
164 movsd %xmm0, 32(%esp)
165 movsd %xmm1, 40(%esp)
166 cmppd $3, %xmm0, %xmm2
167 cmppd $0, %xmm0, %xmm3
168 movmskpd %xmm2, %edx
169 movmskpd %xmm3, %eax
170 testl %edx, %edx
171 je .L_2TAG_PACKET_9.0.2
172 fldl 32(%esp)
173 fmull 40(%esp)
174 testl $1, %eax
175 jne .L_2TAG_PACKET_10.0.2
176 testl $2, %eax
177 jne .L_2TAG_PACKET_11.0.2
178 jmp .L_2TAG_PACKET_2.0.2
179.L_2TAG_PACKET_9.0.2:
180 fldl 32(%esp)
181 faddl 40(%esp)
182 jmp .L_2TAG_PACKET_2.0.2
183.L_2TAG_PACKET_10.0.2:
184 fstpl 40(%esp)
185 fldl 32(%esp)
186 jmp .L_2TAG_PACKET_7.0.2
187.L_2TAG_PACKET_11.0.2:
188 fstpl 32(%esp)
189 fldl 40(%esp)
190 jmp .L_2TAG_PACKET_7.0.2
191.L_2TAG_PACKET_2.0.2:
192.L_2TAG_PACKET_7.0.2:
193 movl 96(%esp), %ebx
194 movl %ebp, %esp
195 popl %ebp
196 ret
197..B2.3:
198END(hypot)
199# -- End hypot
200
201# Start file scope ASM
Christopher Ferris995b8132015-03-13 17:43:52 -0700202ALIAS_SYMBOL(hypotl, hypot);
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +0800203# End file scope ASM
204 .section .rodata, "a"
205 .align 16
206 .align 16
207static_const_table:
208 .long 4294967295
209 .long 2147483647
210 .long 4294967295
211 .long 2147483647
212 .long 0
213 .long 2146435072
214 .long 0
215 .long 2146435072
216 .type static_const_table,@object
217 .size static_const_table,32
218 .data
219 .section .note.GNU-stack, ""
220# End