blob: 1f21cde00399ff61adcf4f7aa1e75ebe6d3e818c [file] [log] [blame]
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +08001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32// ALGORITHM DESCRIPTION
33// ---------------------
34//
35// Description:
36// Let K = 64 (table size).
37//
38// Four sub-domains:
39// 1. |x| < 1/(2*K)
40// expm1(x) ~ P(x)
41// 2. 1/(2*K) <= |x| <= 56*log(2)
42// x x/log(2) n
43// e - 1 = 2 = 2 * T[j] * (1 + P(y)) - 1
44// 3. 56*log(2) < x < MAX_LOG
45// x x x/log(2) n
46// e - 1 ~ e = 2 = 2 * T[j] * (1 + P(y))
47// 4. x < -56*log(2)
48// x x
49// e - 1 = -1 + e ~ -1
50// where
51// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
52// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
53// j/K
54// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
55//
56// P(y) is a minimax polynomial approximation of exp(x)-1
57// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
58//
59// In case 3, to avoid problems with arithmetic overflow and underflow,
60// n n1 n2
61// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
62// and BIAS is a value of exponent bias.
63//
64// Special cases:
65// expm1(NaN) is NaN
66// expm1(+INF) is +INF
67// expm1(-INF) is -1
68// expm1(x) is x for subnormals
69// for finite argument, only expm1(0)=0 is exact.
70// For IEEE double
71// if x > 709.782712893383973096 then expm1(x) overflow
72//
73/******************************************************************************/
74
75#include <private/bionic_asm.h>
76# -- Begin static_func
77 .text
78 .align __bionic_asm_align
79 .type static_func, @function
80static_func:
81..B1.1:
82 call ..L2
83..L2:
84 popl %eax
85 lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
86 lea static_const_table@GOTOFF(%eax), %eax
87 ret
88 .size static_func,.-static_func
89# -- End static_func
90
91# -- Begin expm1
92ENTRY(expm1)
93# parameter 1: 8 + %ebp
94..B2.1:
95..B2.2:
96 pushl %ebp
97 movl %esp, %ebp
98 subl $120, %esp
99 movl %ebx, 64(%esp)
100 call static_func
101 movl %eax, %ebx
102 movsd 128(%esp), %xmm0
103 unpcklpd %xmm0, %xmm0
104 movapd 64(%ebx), %xmm1
105 movapd 48(%ebx), %xmm6
106 movapd 80(%ebx), %xmm2
107 movapd 96(%ebx), %xmm3
108 pextrw $3, %xmm0, %eax
109 andl $32767, %eax
110 movl $16527, %edx
111 subl %eax, %edx
112 subl $16304, %eax
113 orl %eax, %edx
114 cmpl $-2147483648, %edx
115 jae .L_2TAG_PACKET_0.0.2
116 mulpd %xmm0, %xmm1
117 addpd %xmm6, %xmm1
118 movapd %xmm1, %xmm7
119 subpd %xmm6, %xmm1
120 mulpd %xmm1, %xmm2
121 movapd 112(%ebx), %xmm4
122 mulpd %xmm1, %xmm3
123 movapd 128(%ebx), %xmm5
124 subpd %xmm2, %xmm0
125 movd %xmm7, %eax
126 movl %eax, %ecx
127 andl $63, %ecx
128 shll $4, %ecx
129 sarl $6, %eax
130 movl %eax, %edx
131 subpd %xmm3, %xmm0
132 movapd 160(%ebx,%ecx), %xmm2
133 movsd 144(%ebx), %xmm3
134 mulpd %xmm0, %xmm4
135 movapd %xmm0, %xmm1
136 mulpd %xmm0, %xmm0
137 mulsd %xmm0, %xmm3
138 addpd %xmm4, %xmm5
139 mulsd %xmm0, %xmm0
140 movapd %xmm2, %xmm4
141 unpckhpd %xmm2, %xmm2
142 movdqa 16(%ebx), %xmm6
143 pand %xmm6, %xmm7
144 movdqa 32(%ebx), %xmm6
145 paddq %xmm6, %xmm7
146 psllq $46, %xmm7
147 mulsd %xmm0, %xmm3
148 mulpd %xmm5, %xmm0
149 addl $894, %edx
150 cmpl $1916, %edx
151 ja .L_2TAG_PACKET_1.0.2
152 addsd %xmm3, %xmm0
153 xorpd %xmm3, %xmm3
154 movl $16368, %eax
155 pinsrw $3, %eax, %xmm3
156 orpd %xmm7, %xmm2
157 mulsd %xmm4, %xmm7
158 movapd %xmm3, %xmm6
159 addsd %xmm1, %xmm3
160 pextrw $3, %xmm2, %edx
161 pshufd $238, %xmm0, %xmm5
162 psrlq $38, %xmm3
163 psllq $38, %xmm3
164 movapd %xmm2, %xmm4
165 subsd %xmm3, %xmm6
166 addsd %xmm5, %xmm0
167 addsd %xmm6, %xmm1
168 addsd %xmm7, %xmm4
169 mulsd %xmm3, %xmm7
170 mulsd %xmm2, %xmm3
171 xorpd %xmm5, %xmm5
172 movl $16368, %eax
173 pinsrw $3, %eax, %xmm5
174 addsd %xmm1, %xmm0
175 movl $17184, %ecx
176 subl %edx, %ecx
177 subl $16256, %edx
178 orl %edx, %ecx
179 jl .L_2TAG_PACKET_2.0.2
180 mulsd %xmm4, %xmm0
181 subsd %xmm5, %xmm3
182 addsd %xmm7, %xmm0
183 addsd %xmm3, %xmm0
184.L_2TAG_PACKET_3.0.2:
185 jmp .L_2TAG_PACKET_4.0.2
186.L_2TAG_PACKET_2.0.2:
187 cmpl $0, %edx
188 jl .L_2TAG_PACKET_5.0.2
189 mulsd %xmm4, %xmm0
190 subsd %xmm5, %xmm7
191 addsd %xmm7, %xmm0
192 addsd %xmm3, %xmm0
193 jmp .L_2TAG_PACKET_3.0.2
194.L_2TAG_PACKET_5.0.2:
195 mulsd %xmm4, %xmm0
196 addsd %xmm7, %xmm0
197 addsd %xmm3, %xmm0
198 subsd %xmm5, %xmm0
199 jmp .L_2TAG_PACKET_3.0.2
200.L_2TAG_PACKET_1.0.2:
201 movl 132(%esp), %ecx
202 addsd %xmm0, %xmm1
203 unpckhpd %xmm0, %xmm0
204 addsd %xmm1, %xmm0
205 cmpl $0, %ecx
206 jl .L_2TAG_PACKET_6.0.2
207 fstcw 24(%esp)
208 movzwl 24(%esp), %edx
209 orl $768, %edx
210 movw %dx, 28(%esp)
211 fldcw 28(%esp)
212 movl %eax, %edx
213 sarl $1, %eax
214 subl %eax, %edx
215 movdqa (%ebx), %xmm6
216 pandn %xmm2, %xmm6
217 addl $1023, %eax
218 movd %eax, %xmm3
219 psllq $52, %xmm3
220 orpd %xmm3, %xmm6
221 mulsd %xmm3, %xmm4
222 movsd %xmm0, 8(%esp)
223 fldl 8(%esp)
224 movsd %xmm6, 16(%esp)
225 fldl 16(%esp)
226 movsd %xmm4, 16(%esp)
227 fldl 16(%esp)
228 addl $1023, %edx
229 movd %edx, %xmm4
230 psllq $52, %xmm4
231 faddp %st, %st(1)
232 fmul %st, %st(1)
233 faddp %st, %st(1)
234 movsd %xmm4, 8(%esp)
235 fldl 8(%esp)
236 fmulp %st, %st(1)
237 fstpl 8(%esp)
238 movsd 8(%esp), %xmm0
239 fldcw 24(%esp)
240 pextrw $3, %xmm0, %ecx
241 andl $32752, %ecx
242 cmpl $32752, %ecx
243 jae .L_2TAG_PACKET_7.0.2
244 jmp .L_2TAG_PACKET_4.0.2
245 cmpl $-2147483648, %ecx
246 jb .L_2TAG_PACKET_7.0.2
247 jmp .L_2TAG_PACKET_4.0.2
248.L_2TAG_PACKET_7.0.2:
249 movl $41, %edx
250.L_2TAG_PACKET_8.0.2:
251 movsd %xmm0, (%esp)
252 movsd 128(%esp), %xmm0
253 fldl (%esp)
254 jmp .L_2TAG_PACKET_9.0.2
255.L_2TAG_PACKET_10.0.2:
256 cmpl $2146435072, %eax
257 jae .L_2TAG_PACKET_11.0.2
258 movsd 1272(%ebx), %xmm0
259 mulsd %xmm0, %xmm0
260 movl $41, %edx
261 jmp .L_2TAG_PACKET_8.0.2
262.L_2TAG_PACKET_11.0.2:
263 movl 132(%esp), %eax
264 movl 128(%esp), %edx
265 movl %eax, %ecx
266 andl $2147483647, %eax
267 cmpl $2146435072, %eax
268 ja .L_2TAG_PACKET_12.0.2
269 cmpl $0, %edx
270 jne .L_2TAG_PACKET_12.0.2
271 cmpl $0, %ecx
272 jl .L_2TAG_PACKET_13.0.2
273 movsd 1256(%ebx), %xmm0
274 jmp .L_2TAG_PACKET_4.0.2
275.L_2TAG_PACKET_13.0.2:
276 jmp .L_2TAG_PACKET_6.0.2
277.L_2TAG_PACKET_12.0.2:
278 movsd 128(%esp), %xmm0
279 addsd %xmm0, %xmm0
280 jmp .L_2TAG_PACKET_4.0.2
281.L_2TAG_PACKET_14.0.2:
282 addl $16304, %eax
283 cmpl $15504, %eax
284 jb .L_2TAG_PACKET_15.0.2
285 movapd 1184(%ebx), %xmm2
286 pshufd $68, %xmm0, %xmm1
287 movapd 1200(%ebx), %xmm3
288 movapd 1216(%ebx), %xmm4
289 movsd 1232(%ebx), %xmm5
290 mulsd %xmm1, %xmm1
291 xorpd %xmm6, %xmm6
292 movl $16352, %eax
293 pinsrw $3, %eax, %xmm6
294 mulpd %xmm0, %xmm2
295 xorpd %xmm7, %xmm7
296 movl $16368, %edx
297 pinsrw $3, %edx, %xmm7
298 addpd %xmm3, %xmm2
299 mulsd %xmm1, %xmm5
300 pshufd $228, %xmm1, %xmm3
301 mulpd %xmm1, %xmm1
302 mulsd %xmm0, %xmm6
303 mulpd %xmm0, %xmm2
304 addpd %xmm4, %xmm2
305 movapd %xmm7, %xmm4
306 addsd %xmm6, %xmm7
307 mulpd %xmm3, %xmm1
308 psrlq $27, %xmm7
309 psllq $27, %xmm7
310 movsd 1288(%ebx), %xmm3
311 subsd %xmm7, %xmm4
312 mulpd %xmm1, %xmm2
313 addsd %xmm4, %xmm6
314 pshufd $238, %xmm2, %xmm1
315 addsd %xmm2, %xmm6
316 andpd %xmm0, %xmm3
317 movapd %xmm0, %xmm4
318 addsd %xmm6, %xmm1
319 subsd %xmm3, %xmm0
320 addsd %xmm5, %xmm1
321 mulsd %xmm7, %xmm3
322 mulsd %xmm7, %xmm0
323 mulsd %xmm1, %xmm4
324 addsd %xmm4, %xmm0
325 addsd %xmm3, %xmm0
326 jmp .L_2TAG_PACKET_4.0.2
327.L_2TAG_PACKET_15.0.2:
328 cmpl $16, %eax
329 jae .L_2TAG_PACKET_3.0.2
330 movapd %xmm0, %xmm2
331 movd %xmm0, %eax
332 psrlq $31, %xmm2
333 movd %xmm2, %ecx
334 orl %ecx, %eax
335 je .L_2TAG_PACKET_3.0.2
336 movl $16, %edx
337 xorpd %xmm1, %xmm1
338 pinsrw $3, %edx, %xmm1
339 mulsd %xmm1, %xmm1
340 movl $42, %edx
341 jmp .L_2TAG_PACKET_8.0.2
342.L_2TAG_PACKET_0.0.2:
343 cmpl $0, %eax
344 jl .L_2TAG_PACKET_14.0.2
345 movl 132(%esp), %eax
346 cmpl $1083179008, %eax
347 jge .L_2TAG_PACKET_10.0.2
348 cmpl $-1048576, %eax
349 jae .L_2TAG_PACKET_11.0.2
350.L_2TAG_PACKET_6.0.2:
351 xorpd %xmm0, %xmm0
352 movl $49136, %eax
353 pinsrw $3, %eax, %xmm0
354 jmp .L_2TAG_PACKET_4.0.2
355.L_2TAG_PACKET_4.0.2:
356 movsd %xmm0, 48(%esp)
357 fldl 48(%esp)
358.L_2TAG_PACKET_9.0.2:
359 movl 64(%esp), %ebx
360 movl %ebp, %esp
361 popl %ebp
362 ret
363..B2.3:
364END(expm1)
365# -- End expm1
366
367# Start file scope ASM
Christopher Ferris995b8132015-03-13 17:43:52 -0700368ALIAS_SYMBOL(exmp1l, exmp1);
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +0800369# End file scope ASM
370 .section .rodata, "a"
371 .align 16
372 .align 16
373static_const_table:
374 .long 0
375 .long 4293918720
376 .long 0
377 .long 4293918720
378 .long 4294967232
379 .long 0
380 .long 4294967232
381 .long 0
382 .long 65472
383 .long 0
384 .long 65472
385 .long 0
386 .long 0
387 .long 1127743488
388 .long 0
389 .long 1127743488
390 .long 1697350398
391 .long 1079448903
392 .long 1697350398
393 .long 1079448903
394 .long 4277796864
395 .long 1065758274
396 .long 4277796864
397 .long 1065758274
398 .long 3164486458
399 .long 1025308570
400 .long 3164486458
401 .long 1025308570
402 .long 1963358694
403 .long 1065423121
404 .long 1431655765
405 .long 1069897045
406 .long 1431655765
407 .long 1067799893
408 .long 0
409 .long 1071644672
410 .long 381774871
411 .long 1062650220
412 .long 381774871
413 .long 1062650220
414 .long 0
415 .long 0
416 .long 0
417 .long 0
418 .long 1000070955
419 .long 1042145304
420 .long 1040187392
421 .long 11418
422 .long 988267849
423 .long 1039500660
424 .long 3539992576
425 .long 22960
426 .long 36755401
427 .long 1042114290
428 .long 402653184
429 .long 34629
430 .long 3634769483
431 .long 1042178627
432 .long 1820327936
433 .long 46424
434 .long 2155991225
435 .long 1041560680
436 .long 847249408
437 .long 58348
438 .long 2766913307
439 .long 1039293264
440 .long 3489660928
441 .long 70401
442 .long 3651174602
443 .long 1040488175
444 .long 2927624192
445 .long 82586
446 .long 3073892131
447 .long 1042240606
448 .long 1006632960
449 .long 94904
450 .long 1328391742
451 .long 1042019037
452 .long 3942645760
453 .long 107355
454 .long 2650893825
455 .long 1041903210
456 .long 822083584
457 .long 119943
458 .long 2397289153
459 .long 1041802037
460 .long 2281701376
461 .long 132667
462 .long 430997175
463 .long 1042110606
464 .long 1845493760
465 .long 145530
466 .long 1230936525
467 .long 1041801015
468 .long 1702887424
469 .long 158533
470 .long 740675935
471 .long 1040178913
472 .long 4110417920
473 .long 171677
474 .long 3489810261
475 .long 1041825986
476 .long 2793406464
477 .long 184965
478 .long 2532600530
479 .long 1040767882
480 .long 167772160
481 .long 198398
482 .long 3542557060
483 .long 1041827263
484 .long 2986344448
485 .long 211976
486 .long 1401563777
487 .long 1041061093
488 .long 922746880
489 .long 225703
490 .long 3129406026
491 .long 1041852413
492 .long 880803840
493 .long 239579
494 .long 900993572
495 .long 1039283234
496 .long 1275068416
497 .long 253606
498 .long 2115029358
499 .long 1042140042
500 .long 562036736
501 .long 267786
502 .long 1086643152
503 .long 1041785419
504 .long 1610612736
505 .long 282120
506 .long 82864366
507 .long 1041256244
508 .long 3045064704
509 .long 296610
510 .long 2392968152
511 .long 1040913683
512 .long 3573547008
513 .long 311258
514 .long 2905856183
515 .long 1040002214
516 .long 1988100096
517 .long 326066
518 .long 3742008261
519 .long 1040011137
520 .long 1451229184
521 .long 341035
522 .long 863393794
523 .long 1040880621
524 .long 914358272
525 .long 356167
526 .long 1446136837
527 .long 1041372426
528 .long 3707764736
529 .long 371463
530 .long 927855201
531 .long 1040617636
532 .long 360710144
533 .long 386927
534 .long 1492679939
535 .long 1041050306
536 .long 2952790016
537 .long 402558
538 .long 608827001
539 .long 1041582217
540 .long 2181038080
541 .long 418360
542 .long 606260204
543 .long 1042271987
544 .long 1711276032
545 .long 434334
546 .long 3163044019
547 .long 1041843851
548 .long 1006632960
549 .long 450482
550 .long 4148747325
551 .long 1041962972
552 .long 3900702720
553 .long 466805
554 .long 802924201
555 .long 1041275378
556 .long 1442840576
557 .long 483307
558 .long 3052749833
559 .long 1041940577
560 .long 1937768448
561 .long 499988
562 .long 2216116399
563 .long 1041486744
564 .long 914358272
565 .long 516851
566 .long 2729697836
567 .long 1041445764
568 .long 2566914048
569 .long 533897
570 .long 540608356
571 .long 1041310907
572 .long 2600468480
573 .long 551129
574 .long 2916344493
575 .long 1040535661
576 .long 1107296256
577 .long 568549
578 .long 731391814
579 .long 1039497014
580 .long 2566914048
581 .long 586158
582 .long 1024722704
583 .long 1041461625
584 .long 2961178624
585 .long 603959
586 .long 3806831748
587 .long 1041732499
588 .long 2675965952
589 .long 621954
590 .long 238953304
591 .long 1040316488
592 .long 2189426688
593 .long 640145
594 .long 749123235
595 .long 1041725785
596 .long 2063597568
597 .long 658534
598 .long 1168187977
599 .long 1041175214
600 .long 2986344448
601 .long 677123
602 .long 3506096399
603 .long 1042186095
604 .long 1426063360
605 .long 695915
606 .long 1470221620
607 .long 1041675499
608 .long 2566914048
609 .long 714911
610 .long 3182425146
611 .long 1041483134
612 .long 3087007744
613 .long 734114
614 .long 3131698208
615 .long 1042208657
616 .long 4068474880
617 .long 753526
618 .long 2300504125
619 .long 1041428596
620 .long 2415919104
621 .long 773150
622 .long 2290297931
623 .long 1037388400
624 .long 3716153344
625 .long 792987
626 .long 3532148223
627 .long 1041626194
628 .long 771751936
629 .long 813041
630 .long 1161884404
631 .long 1042015258
632 .long 3699376128
633 .long 833312
634 .long 876383176
635 .long 1037968878
636 .long 1241513984
637 .long 853805
638 .long 3379986796
639 .long 1042213153
640 .long 3699376128
641 .long 874520
642 .long 1545797737
643 .long 1041681569
644 .long 58720256
645 .long 895462
646 .long 2925146801
647 .long 1042212567
648 .long 855638016
649 .long 916631
650 .long 1316627971
651 .long 1038516204
652 .long 3883925504
653 .long 938030
654 .long 3267869137
655 .long 1040337004
656 .long 2726297600
657 .long 959663
658 .long 3720868999
659 .long 1041782409
660 .long 3992977408
661 .long 981531
662 .long 433316142
663 .long 1041994064
664 .long 1526726656
665 .long 1003638
666 .long 781232103
667 .long 1040093400
668 .long 2172649472
669 .long 1025985
670 .long 2773927732
671 .long 1053236707
672 .long 381774871
673 .long 1062650220
674 .long 379653899
675 .long 1056571845
676 .long 286331153
677 .long 1065423121
678 .long 436314138
679 .long 1059717536
680 .long 1431655765
681 .long 1067799893
682 .long 1431655765
683 .long 1069897045
684 .long 0
685 .long 1071644672
686 .long 0
687 .long 1072693248
688 .long 0
689 .long 2146435072
690 .long 0
691 .long 0
692 .long 4294967295
693 .long 2146435071
694 .long 0
695 .long 1048576
696 .long 4227858432
697 .long 4294967295
698 .type static_const_table,@object
699 .size static_const_table,1296
700 .data
701 .section .note.GNU-stack, ""
702# End