| /* |
| Copyright (c) 2014, Intel Corporation |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| * Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| |
| * Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| |
| * Neither the name of Intel Corporation nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
| ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /******************************************************************************/ |
| // ALGORITHM DESCRIPTION |
| // --------------------- |
| // |
| // X87 version: |
| // Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt. |
| // |
| // SSE version: |
| // Swap x, y if |x|<|y| |
| // For x=2^k*x, get y=y*2^(-k) |
| // Get S ~ sqrt(x^2+y^2) (leading 1 + leading 25 mantissa bits) |
| // |
| // Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) + |
| // + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) ) |
| // |
| // Result is 2^k*(S + Se), where Se = S*e |
| // S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S ) |
| // |
| // Return 2^k*(S+Se) |
| // |
| // For |y/x|<2^(-64), return x |
| // |
| // For cases where maximum biased exponent is either greater than 7fdh or |
| // below 32, take a special path to check for special cases (0, NaN, Inf), |
| // possible overflow, and more accurate computation for denormal results |
| // |
| // Special cases: |
| // hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent |
| // hypot(x,+-0) is equivalent to fabs(x) |
| // hypot(x,y) = y if (x==NaN or x==INF) and y==INF |
| // hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!) |
| // hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF) |
| // |
| /******************************************************************************/ |
| |
| #include <private/bionic_asm.h> |
| # -- Begin static_func |
| .text |
| .align __bionic_asm_align |
| .type static_func, @function |
| static_func: |
| ..B1.1: |
| call ..L2 |
| ..L2: |
| popl %eax |
| lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax |
| lea static_const_table@GOTOFF(%eax), %eax |
| ret |
| .size static_func,.-static_func |
| # -- End static_func |
| |
| # -- Begin hypot |
| ENTRY(hypot) |
| # parameter 1: 8 + %ebp |
| # parameter 2: 16 + %ebp |
| ..B2.1: |
| ..B2.2: |
| pushl %ebp |
| movl %esp, %ebp |
| subl $152, %esp |
| movl %ebx, 96(%esp) |
| call static_func |
| movl %eax, %ebx |
| movapd (%ebx), %xmm3 |
| movsd 160(%esp), %xmm0 |
| movsd 168(%esp), %xmm1 |
| andpd %xmm3, %xmm0 |
| andpd %xmm3, %xmm1 |
| pextrw $3, %xmm0, %eax |
| pextrw $3, %xmm1, %edx |
| cmpl $24528, %eax |
| ja .L_2TAG_PACKET_0.0.2 |
| cmpl $24528, %edx |
| ja .L_2TAG_PACKET_0.0.2 |
| .L_2TAG_PACKET_1.0.2: |
| fldl 160(%esp) |
| fldl 168(%esp) |
| fxch %st(1) |
| fmul %st(0), %st |
| fxch %st(1) |
| nop |
| fmul %st(0), %st |
| faddp %st, %st(1) |
| fsqrt |
| jmp .L_2TAG_PACKET_2.0.2 |
| .L_2TAG_PACKET_0.0.2: |
| cmpl $32752, %eax |
| movl %eax, %ecx |
| jae .L_2TAG_PACKET_3.0.2 |
| subl %edx, %ecx |
| cmpl $32752, %edx |
| jae .L_2TAG_PACKET_3.0.2 |
| addl $928, %ecx |
| addl %edx, %eax |
| cmpl $1856, %ecx |
| ja .L_2TAG_PACKET_4.0.2 |
| cmpl $49056, %eax |
| jb .L_2TAG_PACKET_1.0.2 |
| fldl 160(%esp) |
| fldl 168(%esp) |
| fxch %st(1) |
| fmul %st(0), %st |
| fxch %st(1) |
| nop |
| fmul %st(0), %st |
| faddp %st, %st(1) |
| fsqrt |
| .L_2TAG_PACKET_5.0.2: |
| fstl (%esp) |
| fstpt 16(%esp) |
| xorl %eax, %eax |
| movw 24(%esp), %ax |
| cmpl $17407, %eax |
| jae .L_2TAG_PACKET_6.0.2 |
| fldl (%esp) |
| jmp .L_2TAG_PACKET_7.0.2 |
| .L_2TAG_PACKET_4.0.2: |
| movsd %xmm0, 32(%esp) |
| movsd %xmm1, 40(%esp) |
| fldl 32(%esp) |
| faddl 40(%esp) |
| jmp .L_2TAG_PACKET_5.0.2 |
| .L_2TAG_PACKET_6.0.2: |
| movl $46, %edx |
| .L_2TAG_PACKET_8.0.2: |
| movsd 160(%esp), %xmm0 |
| movsd 168(%esp), %xmm1 |
| fldl (%esp) |
| jmp .L_2TAG_PACKET_7.0.2 |
| .L_2TAG_PACKET_3.0.2: |
| shufpd $0, %xmm1, %xmm0 |
| movdqa %xmm0, %xmm2 |
| movdqa 16(%ebx), %xmm3 |
| movsd %xmm0, 32(%esp) |
| movsd %xmm1, 40(%esp) |
| cmppd $3, %xmm0, %xmm2 |
| cmppd $0, %xmm0, %xmm3 |
| movmskpd %xmm2, %edx |
| movmskpd %xmm3, %eax |
| testl %edx, %edx |
| je .L_2TAG_PACKET_9.0.2 |
| fldl 32(%esp) |
| fmull 40(%esp) |
| testl $1, %eax |
| jne .L_2TAG_PACKET_10.0.2 |
| testl $2, %eax |
| jne .L_2TAG_PACKET_11.0.2 |
| jmp .L_2TAG_PACKET_2.0.2 |
| .L_2TAG_PACKET_9.0.2: |
| fldl 32(%esp) |
| faddl 40(%esp) |
| jmp .L_2TAG_PACKET_2.0.2 |
| .L_2TAG_PACKET_10.0.2: |
| fstpl 40(%esp) |
| fldl 32(%esp) |
| jmp .L_2TAG_PACKET_7.0.2 |
| .L_2TAG_PACKET_11.0.2: |
| fstpl 32(%esp) |
| fldl 40(%esp) |
| jmp .L_2TAG_PACKET_7.0.2 |
| .L_2TAG_PACKET_2.0.2: |
| .L_2TAG_PACKET_7.0.2: |
| movl 96(%esp), %ebx |
| movl %ebp, %esp |
| popl %ebp |
| ret |
| ..B2.3: |
| END(hypot) |
| # -- End hypot |
| |
| # Start file scope ASM |
| .weak hypotl |
| .equ hypotl, hypot |
| # End file scope ASM |
| .section .rodata, "a" |
| .align 16 |
| .align 16 |
| static_const_table: |
| .long 4294967295 |
| .long 2147483647 |
| .long 4294967295 |
| .long 2147483647 |
| .long 0 |
| .long 2146435072 |
| .long 0 |
| .long 2146435072 |
| .type static_const_table,@object |
| .size static_const_table,32 |
| .data |
| .section .note.GNU-stack, "" |
| # End |