blob: 58819efc4b68b8d3a2fcfe671906d4c6376d3b35 [file] [log] [blame]
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +08001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32// ALGORITHM DESCRIPTION
33// ---------------------
34//
35// Description:
36// Let K = 64 (table size).
37//
38// Four sub-domains:
39// 1. |x| < 1/(2*K)
40// expm1(x) ~ P(x)
41// 2. 1/(2*K) <= |x| <= 56*log(2)
42// x x/log(2) n
43// e - 1 = 2 = 2 * T[j] * (1 + P(y)) - 1
44// 3. 56*log(2) < x < MAX_LOG
45// x x x/log(2) n
46// e - 1 ~ e = 2 = 2 * T[j] * (1 + P(y))
47// 4. x < -56*log(2)
48// x x
49// e - 1 = -1 + e ~ -1
50// where
51// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
52// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
53// j/K
54// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
55//
56// P(y) is a minimax polynomial approximation of exp(x)-1
57// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
58//
59// In case 3, to avoid problems with arithmetic overflow and underflow,
60// n n1 n2
61// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
62// and BIAS is a value of exponent bias.
63//
64// Special cases:
65// expm1(NaN) is NaN
66// expm1(+INF) is +INF
67// expm1(-INF) is -1
68// expm1(x) is x for subnormals
69// for finite argument, only expm1(0)=0 is exact.
70// For IEEE double
71// if x > 709.782712893383973096 then expm1(x) overflow
72//
73/******************************************************************************/
74
75#include <private/bionic_asm.h>
76# -- Begin static_func
77 .text
78 .align __bionic_asm_align
79 .type static_func, @function
80static_func:
81..B1.1:
82 call ..L2
83..L2:
84 popl %eax
85 lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
86 lea static_const_table@GOTOFF(%eax), %eax
87 ret
88 .size static_func,.-static_func
89# -- End static_func
90
91# -- Begin expm1
92ENTRY(expm1)
93# parameter 1: 8 + %ebp
94..B2.1:
95..B2.2:
96 pushl %ebp
97 movl %esp, %ebp
98 subl $120, %esp
99 movl %ebx, 64(%esp)
100 call static_func
101 movl %eax, %ebx
102 movsd 128(%esp), %xmm0
103 unpcklpd %xmm0, %xmm0
104 movapd 64(%ebx), %xmm1
105 movapd 48(%ebx), %xmm6
106 movapd 80(%ebx), %xmm2
107 movapd 96(%ebx), %xmm3
108 pextrw $3, %xmm0, %eax
109 andl $32767, %eax
110 movl $16527, %edx
111 subl %eax, %edx
112 subl $16304, %eax
113 orl %eax, %edx
114 cmpl $-2147483648, %edx
115 jae .L_2TAG_PACKET_0.0.2
116 mulpd %xmm0, %xmm1
117 addpd %xmm6, %xmm1
118 movapd %xmm1, %xmm7
119 subpd %xmm6, %xmm1
120 mulpd %xmm1, %xmm2
121 movapd 112(%ebx), %xmm4
122 mulpd %xmm1, %xmm3
123 movapd 128(%ebx), %xmm5
124 subpd %xmm2, %xmm0
125 movd %xmm7, %eax
126 movl %eax, %ecx
127 andl $63, %ecx
128 shll $4, %ecx
129 sarl $6, %eax
130 movl %eax, %edx
131 subpd %xmm3, %xmm0
132 movapd 160(%ebx,%ecx), %xmm2
133 movsd 144(%ebx), %xmm3
134 mulpd %xmm0, %xmm4
135 movapd %xmm0, %xmm1
136 mulpd %xmm0, %xmm0
137 mulsd %xmm0, %xmm3
138 addpd %xmm4, %xmm5
139 mulsd %xmm0, %xmm0
140 movapd %xmm2, %xmm4
141 unpckhpd %xmm2, %xmm2
142 movdqa 16(%ebx), %xmm6
143 pand %xmm6, %xmm7
144 movdqa 32(%ebx), %xmm6
145 paddq %xmm6, %xmm7
146 psllq $46, %xmm7
147 mulsd %xmm0, %xmm3
148 mulpd %xmm5, %xmm0
149 addl $894, %edx
150 cmpl $1916, %edx
151 ja .L_2TAG_PACKET_1.0.2
152 addsd %xmm3, %xmm0
153 xorpd %xmm3, %xmm3
154 movl $16368, %eax
155 pinsrw $3, %eax, %xmm3
156 orpd %xmm7, %xmm2
157 mulsd %xmm4, %xmm7
158 movapd %xmm3, %xmm6
159 addsd %xmm1, %xmm3
160 pextrw $3, %xmm2, %edx
161 pshufd $238, %xmm0, %xmm5
162 psrlq $38, %xmm3
163 psllq $38, %xmm3
164 movapd %xmm2, %xmm4
165 subsd %xmm3, %xmm6
166 addsd %xmm5, %xmm0
167 addsd %xmm6, %xmm1
168 addsd %xmm7, %xmm4
169 mulsd %xmm3, %xmm7
170 mulsd %xmm2, %xmm3
171 xorpd %xmm5, %xmm5
172 movl $16368, %eax
173 pinsrw $3, %eax, %xmm5
174 addsd %xmm1, %xmm0
175 movl $17184, %ecx
176 subl %edx, %ecx
177 subl $16256, %edx
178 orl %edx, %ecx
179 jl .L_2TAG_PACKET_2.0.2
180 mulsd %xmm4, %xmm0
181 subsd %xmm5, %xmm3
182 addsd %xmm7, %xmm0
183 addsd %xmm3, %xmm0
184.L_2TAG_PACKET_3.0.2:
185 jmp .L_2TAG_PACKET_4.0.2
186.L_2TAG_PACKET_2.0.2:
187 cmpl $0, %edx
188 jl .L_2TAG_PACKET_5.0.2
189 mulsd %xmm4, %xmm0
190 subsd %xmm5, %xmm7
191 addsd %xmm7, %xmm0
192 addsd %xmm3, %xmm0
193 jmp .L_2TAG_PACKET_3.0.2
194.L_2TAG_PACKET_5.0.2:
195 mulsd %xmm4, %xmm0
196 addsd %xmm7, %xmm0
197 addsd %xmm3, %xmm0
198 subsd %xmm5, %xmm0
199 jmp .L_2TAG_PACKET_3.0.2
200.L_2TAG_PACKET_1.0.2:
201 movl 132(%esp), %ecx
202 addsd %xmm0, %xmm1
203 unpckhpd %xmm0, %xmm0
204 addsd %xmm1, %xmm0
205 cmpl $0, %ecx
206 jl .L_2TAG_PACKET_6.0.2
207 fstcw 24(%esp)
208 movzwl 24(%esp), %edx
209 orl $768, %edx
210 movw %dx, 28(%esp)
211 fldcw 28(%esp)
212 movl %eax, %edx
213 sarl $1, %eax
214 subl %eax, %edx
215 movdqa (%ebx), %xmm6
216 pandn %xmm2, %xmm6
217 addl $1023, %eax
218 movd %eax, %xmm3
219 psllq $52, %xmm3
220 orpd %xmm3, %xmm6
221 mulsd %xmm3, %xmm4
222 movsd %xmm0, 8(%esp)
223 fldl 8(%esp)
224 movsd %xmm6, 16(%esp)
225 fldl 16(%esp)
226 movsd %xmm4, 16(%esp)
227 fldl 16(%esp)
228 addl $1023, %edx
229 movd %edx, %xmm4
230 psllq $52, %xmm4
231 faddp %st, %st(1)
232 fmul %st, %st(1)
233 faddp %st, %st(1)
234 movsd %xmm4, 8(%esp)
235 fldl 8(%esp)
236 fmulp %st, %st(1)
237 fstpl 8(%esp)
238 movsd 8(%esp), %xmm0
239 fldcw 24(%esp)
240 pextrw $3, %xmm0, %ecx
241 andl $32752, %ecx
242 cmpl $32752, %ecx
243 jae .L_2TAG_PACKET_7.0.2
244 jmp .L_2TAG_PACKET_4.0.2
245 cmpl $-2147483648, %ecx
246 jb .L_2TAG_PACKET_7.0.2
247 jmp .L_2TAG_PACKET_4.0.2
248.L_2TAG_PACKET_7.0.2:
249 movl $41, %edx
250.L_2TAG_PACKET_8.0.2:
251 movsd %xmm0, (%esp)
252 movsd 128(%esp), %xmm0
253 fldl (%esp)
254 jmp .L_2TAG_PACKET_9.0.2
255.L_2TAG_PACKET_10.0.2:
256 cmpl $2146435072, %eax
257 jae .L_2TAG_PACKET_11.0.2
258 movsd 1272(%ebx), %xmm0
259 mulsd %xmm0, %xmm0
260 movl $41, %edx
261 jmp .L_2TAG_PACKET_8.0.2
262.L_2TAG_PACKET_11.0.2:
263 movl 132(%esp), %eax
264 movl 128(%esp), %edx
265 movl %eax, %ecx
266 andl $2147483647, %eax
267 cmpl $2146435072, %eax
268 ja .L_2TAG_PACKET_12.0.2
269 cmpl $0, %edx
270 jne .L_2TAG_PACKET_12.0.2
271 cmpl $0, %ecx
272 jl .L_2TAG_PACKET_13.0.2
273 movsd 1256(%ebx), %xmm0
274 jmp .L_2TAG_PACKET_4.0.2
275.L_2TAG_PACKET_13.0.2:
276 jmp .L_2TAG_PACKET_6.0.2
277.L_2TAG_PACKET_12.0.2:
278 movsd 128(%esp), %xmm0
279 addsd %xmm0, %xmm0
280 jmp .L_2TAG_PACKET_4.0.2
281.L_2TAG_PACKET_14.0.2:
282 addl $16304, %eax
283 cmpl $15504, %eax
284 jb .L_2TAG_PACKET_15.0.2
285 movapd 1184(%ebx), %xmm2
286 pshufd $68, %xmm0, %xmm1
287 movapd 1200(%ebx), %xmm3
288 movapd 1216(%ebx), %xmm4
289 movsd 1232(%ebx), %xmm5
290 mulsd %xmm1, %xmm1
291 xorpd %xmm6, %xmm6
292 movl $16352, %eax
293 pinsrw $3, %eax, %xmm6
294 mulpd %xmm0, %xmm2
295 xorpd %xmm7, %xmm7
296 movl $16368, %edx
297 pinsrw $3, %edx, %xmm7
298 addpd %xmm3, %xmm2
299 mulsd %xmm1, %xmm5
300 pshufd $228, %xmm1, %xmm3
301 mulpd %xmm1, %xmm1
302 mulsd %xmm0, %xmm6
303 mulpd %xmm0, %xmm2
304 addpd %xmm4, %xmm2
305 movapd %xmm7, %xmm4
306 addsd %xmm6, %xmm7
307 mulpd %xmm3, %xmm1
308 psrlq $27, %xmm7
309 psllq $27, %xmm7
310 movsd 1288(%ebx), %xmm3
311 subsd %xmm7, %xmm4
312 mulpd %xmm1, %xmm2
313 addsd %xmm4, %xmm6
314 pshufd $238, %xmm2, %xmm1
315 addsd %xmm2, %xmm6
316 andpd %xmm0, %xmm3
317 movapd %xmm0, %xmm4
318 addsd %xmm6, %xmm1
319 subsd %xmm3, %xmm0
320 addsd %xmm5, %xmm1
321 mulsd %xmm7, %xmm3
322 mulsd %xmm7, %xmm0
323 mulsd %xmm1, %xmm4
324 addsd %xmm4, %xmm0
325 addsd %xmm3, %xmm0
326 jmp .L_2TAG_PACKET_4.0.2
327.L_2TAG_PACKET_15.0.2:
328 cmpl $16, %eax
329 jae .L_2TAG_PACKET_3.0.2
330 movapd %xmm0, %xmm2
331 movd %xmm0, %eax
332 psrlq $31, %xmm2
333 movd %xmm2, %ecx
334 orl %ecx, %eax
335 je .L_2TAG_PACKET_3.0.2
336 movl $16, %edx
337 xorpd %xmm1, %xmm1
338 pinsrw $3, %edx, %xmm1
339 mulsd %xmm1, %xmm1
340 movl $42, %edx
341 jmp .L_2TAG_PACKET_8.0.2
342.L_2TAG_PACKET_0.0.2:
343 cmpl $0, %eax
344 jl .L_2TAG_PACKET_14.0.2
345 movl 132(%esp), %eax
346 cmpl $1083179008, %eax
347 jge .L_2TAG_PACKET_10.0.2
348 cmpl $-1048576, %eax
349 jae .L_2TAG_PACKET_11.0.2
350.L_2TAG_PACKET_6.0.2:
351 xorpd %xmm0, %xmm0
352 movl $49136, %eax
353 pinsrw $3, %eax, %xmm0
354 jmp .L_2TAG_PACKET_4.0.2
355.L_2TAG_PACKET_4.0.2:
356 movsd %xmm0, 48(%esp)
357 fldl 48(%esp)
358.L_2TAG_PACKET_9.0.2:
359 movl 64(%esp), %ebx
360 movl %ebp, %esp
361 popl %ebp
362 ret
363..B2.3:
364END(expm1)
365# -- End expm1
366
367# Start file scope ASM
368.weak expm1l
369.equ expm1l, expm1
370# End file scope ASM
371 .section .rodata, "a"
372 .align 16
373 .align 16
374static_const_table:
375 .long 0
376 .long 4293918720
377 .long 0
378 .long 4293918720
379 .long 4294967232
380 .long 0
381 .long 4294967232
382 .long 0
383 .long 65472
384 .long 0
385 .long 65472
386 .long 0
387 .long 0
388 .long 1127743488
389 .long 0
390 .long 1127743488
391 .long 1697350398
392 .long 1079448903
393 .long 1697350398
394 .long 1079448903
395 .long 4277796864
396 .long 1065758274
397 .long 4277796864
398 .long 1065758274
399 .long 3164486458
400 .long 1025308570
401 .long 3164486458
402 .long 1025308570
403 .long 1963358694
404 .long 1065423121
405 .long 1431655765
406 .long 1069897045
407 .long 1431655765
408 .long 1067799893
409 .long 0
410 .long 1071644672
411 .long 381774871
412 .long 1062650220
413 .long 381774871
414 .long 1062650220
415 .long 0
416 .long 0
417 .long 0
418 .long 0
419 .long 1000070955
420 .long 1042145304
421 .long 1040187392
422 .long 11418
423 .long 988267849
424 .long 1039500660
425 .long 3539992576
426 .long 22960
427 .long 36755401
428 .long 1042114290
429 .long 402653184
430 .long 34629
431 .long 3634769483
432 .long 1042178627
433 .long 1820327936
434 .long 46424
435 .long 2155991225
436 .long 1041560680
437 .long 847249408
438 .long 58348
439 .long 2766913307
440 .long 1039293264
441 .long 3489660928
442 .long 70401
443 .long 3651174602
444 .long 1040488175
445 .long 2927624192
446 .long 82586
447 .long 3073892131
448 .long 1042240606
449 .long 1006632960
450 .long 94904
451 .long 1328391742
452 .long 1042019037
453 .long 3942645760
454 .long 107355
455 .long 2650893825
456 .long 1041903210
457 .long 822083584
458 .long 119943
459 .long 2397289153
460 .long 1041802037
461 .long 2281701376
462 .long 132667
463 .long 430997175
464 .long 1042110606
465 .long 1845493760
466 .long 145530
467 .long 1230936525
468 .long 1041801015
469 .long 1702887424
470 .long 158533
471 .long 740675935
472 .long 1040178913
473 .long 4110417920
474 .long 171677
475 .long 3489810261
476 .long 1041825986
477 .long 2793406464
478 .long 184965
479 .long 2532600530
480 .long 1040767882
481 .long 167772160
482 .long 198398
483 .long 3542557060
484 .long 1041827263
485 .long 2986344448
486 .long 211976
487 .long 1401563777
488 .long 1041061093
489 .long 922746880
490 .long 225703
491 .long 3129406026
492 .long 1041852413
493 .long 880803840
494 .long 239579
495 .long 900993572
496 .long 1039283234
497 .long 1275068416
498 .long 253606
499 .long 2115029358
500 .long 1042140042
501 .long 562036736
502 .long 267786
503 .long 1086643152
504 .long 1041785419
505 .long 1610612736
506 .long 282120
507 .long 82864366
508 .long 1041256244
509 .long 3045064704
510 .long 296610
511 .long 2392968152
512 .long 1040913683
513 .long 3573547008
514 .long 311258
515 .long 2905856183
516 .long 1040002214
517 .long 1988100096
518 .long 326066
519 .long 3742008261
520 .long 1040011137
521 .long 1451229184
522 .long 341035
523 .long 863393794
524 .long 1040880621
525 .long 914358272
526 .long 356167
527 .long 1446136837
528 .long 1041372426
529 .long 3707764736
530 .long 371463
531 .long 927855201
532 .long 1040617636
533 .long 360710144
534 .long 386927
535 .long 1492679939
536 .long 1041050306
537 .long 2952790016
538 .long 402558
539 .long 608827001
540 .long 1041582217
541 .long 2181038080
542 .long 418360
543 .long 606260204
544 .long 1042271987
545 .long 1711276032
546 .long 434334
547 .long 3163044019
548 .long 1041843851
549 .long 1006632960
550 .long 450482
551 .long 4148747325
552 .long 1041962972
553 .long 3900702720
554 .long 466805
555 .long 802924201
556 .long 1041275378
557 .long 1442840576
558 .long 483307
559 .long 3052749833
560 .long 1041940577
561 .long 1937768448
562 .long 499988
563 .long 2216116399
564 .long 1041486744
565 .long 914358272
566 .long 516851
567 .long 2729697836
568 .long 1041445764
569 .long 2566914048
570 .long 533897
571 .long 540608356
572 .long 1041310907
573 .long 2600468480
574 .long 551129
575 .long 2916344493
576 .long 1040535661
577 .long 1107296256
578 .long 568549
579 .long 731391814
580 .long 1039497014
581 .long 2566914048
582 .long 586158
583 .long 1024722704
584 .long 1041461625
585 .long 2961178624
586 .long 603959
587 .long 3806831748
588 .long 1041732499
589 .long 2675965952
590 .long 621954
591 .long 238953304
592 .long 1040316488
593 .long 2189426688
594 .long 640145
595 .long 749123235
596 .long 1041725785
597 .long 2063597568
598 .long 658534
599 .long 1168187977
600 .long 1041175214
601 .long 2986344448
602 .long 677123
603 .long 3506096399
604 .long 1042186095
605 .long 1426063360
606 .long 695915
607 .long 1470221620
608 .long 1041675499
609 .long 2566914048
610 .long 714911
611 .long 3182425146
612 .long 1041483134
613 .long 3087007744
614 .long 734114
615 .long 3131698208
616 .long 1042208657
617 .long 4068474880
618 .long 753526
619 .long 2300504125
620 .long 1041428596
621 .long 2415919104
622 .long 773150
623 .long 2290297931
624 .long 1037388400
625 .long 3716153344
626 .long 792987
627 .long 3532148223
628 .long 1041626194
629 .long 771751936
630 .long 813041
631 .long 1161884404
632 .long 1042015258
633 .long 3699376128
634 .long 833312
635 .long 876383176
636 .long 1037968878
637 .long 1241513984
638 .long 853805
639 .long 3379986796
640 .long 1042213153
641 .long 3699376128
642 .long 874520
643 .long 1545797737
644 .long 1041681569
645 .long 58720256
646 .long 895462
647 .long 2925146801
648 .long 1042212567
649 .long 855638016
650 .long 916631
651 .long 1316627971
652 .long 1038516204
653 .long 3883925504
654 .long 938030
655 .long 3267869137
656 .long 1040337004
657 .long 2726297600
658 .long 959663
659 .long 3720868999
660 .long 1041782409
661 .long 3992977408
662 .long 981531
663 .long 433316142
664 .long 1041994064
665 .long 1526726656
666 .long 1003638
667 .long 781232103
668 .long 1040093400
669 .long 2172649472
670 .long 1025985
671 .long 2773927732
672 .long 1053236707
673 .long 381774871
674 .long 1062650220
675 .long 379653899
676 .long 1056571845
677 .long 286331153
678 .long 1065423121
679 .long 436314138
680 .long 1059717536
681 .long 1431655765
682 .long 1067799893
683 .long 1431655765
684 .long 1069897045
685 .long 0
686 .long 1071644672
687 .long 0
688 .long 1072693248
689 .long 0
690 .long 2146435072
691 .long 0
692 .long 0
693 .long 4294967295
694 .long 2146435071
695 .long 0
696 .long 1048576
697 .long 4227858432
698 .long 4294967295
699 .type static_const_table,@object
700 .size static_const_table,1296
701 .data
702 .section .note.GNU-stack, ""
703# End