blob: 6882dfc9cc626fba35a3144038c3098bc61ab239 [file] [log] [blame]
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +08001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32// ALGORITHM DESCRIPTION
33// ---------------------
34//
35// Description:
36// Let K = 64 (table size).
37// x x/log(2) n
38// e = 2 = 2 * T[j] * (1 + P(y))
39// where
40// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
41// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
42// j/K
43// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
44//
45// P(y) is a minimax polynomial approximation of exp(x)-1
46// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
47//
48// To avoid problems with arithmetic overflow and underflow,
49// n n1 n2
50// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
51// where BIAS is a value of exponent bias.
52//
53// Special cases:
54// exp(NaN) = NaN
55// exp(+INF) = +INF
56// exp(-INF) = 0
57// exp(x) = 1 for subnormals
58// for finite argument, only exp(0)=1 is exact
59// For IEEE double
60// if x > 709.782712893383973096 then exp(x) overflow
61// if x < -745.133219101941108420 then exp(x) underflow
62//
63/******************************************************************************/
64
65#include <private/bionic_asm.h>
66# -- Begin exp
67ENTRY(exp)
68# parameter 1: %xmm0
69..B1.1:
70..___tag_value_exp.1:
71 subq $24, %rsp
72..___tag_value_exp.3:
73 movsd %xmm0, 8(%rsp)
74..B1.2:
75 unpcklpd %xmm0, %xmm0
76 movapd cv(%rip), %xmm1
77 movapd Shifter(%rip), %xmm6
78 movapd 16+cv(%rip), %xmm2
79 movapd 32+cv(%rip), %xmm3
80 pextrw $3, %xmm0, %eax
81 andl $32767, %eax
82 movl $16527, %edx
83 subl %eax, %edx
84 subl $15504, %eax
85 orl %eax, %edx
86 cmpl $-2147483648, %edx
87 jae .L_2TAG_PACKET_0.0.2
88 mulpd %xmm0, %xmm1
89 addpd %xmm6, %xmm1
90 movapd %xmm1, %xmm7
91 subpd %xmm6, %xmm1
92 mulpd %xmm1, %xmm2
93 movapd 64+cv(%rip), %xmm4
94 mulpd %xmm1, %xmm3
95 movapd 80+cv(%rip), %xmm5
96 subpd %xmm2, %xmm0
97 movd %xmm7, %eax
98 movl %eax, %ecx
99 andl $63, %ecx
100 shll $4, %ecx
101 sarl $6, %eax
102 movl %eax, %edx
103 movdqa mmask(%rip), %xmm6
104 pand %xmm6, %xmm7
105 movdqa bias(%rip), %xmm6
106 paddq %xmm6, %xmm7
107 psllq $46, %xmm7
108 subpd %xmm3, %xmm0
109 lea Tbl_addr(%rip), %r8
110 movapd (%rcx,%r8), %xmm2
111 mulpd %xmm0, %xmm4
112 movapd %xmm0, %xmm6
113 movapd %xmm0, %xmm1
114 mulpd %xmm6, %xmm6
115 mulpd %xmm6, %xmm0
116 addpd %xmm4, %xmm5
117 mulsd %xmm6, %xmm0
118 mulpd 48+cv(%rip), %xmm6
119 addsd %xmm2, %xmm1
120 unpckhpd %xmm2, %xmm2
121 mulpd %xmm5, %xmm0
122 addsd %xmm0, %xmm1
123 orpd %xmm7, %xmm2
124 unpckhpd %xmm0, %xmm0
125 addsd %xmm1, %xmm0
126 addsd %xmm6, %xmm0
127 addl $894, %edx
128 cmpl $1916, %edx
129 ja .L_2TAG_PACKET_1.0.2
130 mulsd %xmm2, %xmm0
131 addsd %xmm2, %xmm0
132 jmp ..B1.5
133.L_2TAG_PACKET_1.0.2:
134 xorpd %xmm3, %xmm3
135 movapd ALLONES(%rip), %xmm4
136 movl $-1022, %edx
137 subl %eax, %edx
138 movd %edx, %xmm5
139 psllq %xmm5, %xmm4
140 movl %eax, %ecx
141 sarl $1, %eax
142 pinsrw $3, %eax, %xmm3
143 movapd ebias(%rip), %xmm6
144 psllq $4, %xmm3
145 psubd %xmm3, %xmm2
146 mulsd %xmm2, %xmm0
147 cmpl $52, %edx
148 jg .L_2TAG_PACKET_2.0.2
149 andpd %xmm2, %xmm4
150 paddd %xmm6, %xmm3
151 subsd %xmm4, %xmm2
152 addsd %xmm2, %xmm0
153 cmpl $1023, %ecx
154 jge .L_2TAG_PACKET_3.0.2
155 pextrw $3, %xmm0, %ecx
156 andl $32768, %ecx
157 orl %ecx, %edx
158 cmpl $0, %edx
159 je .L_2TAG_PACKET_4.0.2
160 movapd %xmm0, %xmm6
161 addsd %xmm4, %xmm0
162 mulsd %xmm3, %xmm0
163 pextrw $3, %xmm0, %ecx
164 andl $32752, %ecx
165 cmpl $0, %ecx
166 je .L_2TAG_PACKET_5.0.2
167 jmp ..B1.5
168.L_2TAG_PACKET_5.0.2:
169 mulsd %xmm3, %xmm6
170 mulsd %xmm3, %xmm4
171 movq %xmm6, %xmm0
172 pxor %xmm4, %xmm6
173 psrad $31, %xmm6
174 pshufd $85, %xmm6, %xmm6
175 psllq $1, %xmm0
176 psrlq $1, %xmm0
177 pxor %xmm6, %xmm0
178 psrlq $63, %xmm6
179 paddq %xmm6, %xmm0
180 paddq %xmm4, %xmm0
181 movl $15, (%rsp)
182 jmp .L_2TAG_PACKET_6.0.2
183.L_2TAG_PACKET_4.0.2:
184 addsd %xmm4, %xmm0
185 mulsd %xmm3, %xmm0
186 jmp ..B1.5
187.L_2TAG_PACKET_3.0.2:
188 addsd %xmm4, %xmm0
189 mulsd %xmm3, %xmm0
190 pextrw $3, %xmm0, %ecx
191 andl $32752, %ecx
192 cmpl $32752, %ecx
193 jnb .L_2TAG_PACKET_7.0.2
194 jmp ..B1.5
195.L_2TAG_PACKET_2.0.2:
196 paddd %xmm6, %xmm3
197 addpd %xmm2, %xmm0
198 mulsd %xmm3, %xmm0
199 movl $15, (%rsp)
200 jmp .L_2TAG_PACKET_6.0.2
201.L_2TAG_PACKET_8.0.2:
202 cmpl $2146435072, %eax
203 jae .L_2TAG_PACKET_9.0.2
204 movl 12(%rsp), %eax
205 cmpl $-2147483648, %eax
206 jae .L_2TAG_PACKET_10.0.2
207 movsd XMAX(%rip), %xmm0
208 mulsd %xmm0, %xmm0
209.L_2TAG_PACKET_7.0.2:
210 movl $14, (%rsp)
211 jmp .L_2TAG_PACKET_6.0.2
212.L_2TAG_PACKET_10.0.2:
213 movsd XMIN(%rip), %xmm0
214 mulsd %xmm0, %xmm0
215 movl $15, (%rsp)
216 jmp .L_2TAG_PACKET_6.0.2
217.L_2TAG_PACKET_9.0.2:
218 movl 8(%rsp), %edx
219 cmpl $2146435072, %eax
220 ja .L_2TAG_PACKET_11.0.2
221 cmpl $0, %edx
222 jne .L_2TAG_PACKET_11.0.2
223 movl 12(%rsp), %eax
224 cmpl $2146435072, %eax
225 jne .L_2TAG_PACKET_12.0.2
226 movsd INF(%rip), %xmm0
227 jmp ..B1.5
228.L_2TAG_PACKET_12.0.2:
229 movsd ZERO(%rip), %xmm0
230 jmp ..B1.5
231.L_2TAG_PACKET_11.0.2:
232 movsd 8(%rsp), %xmm0
233 addsd %xmm0, %xmm0
234 jmp ..B1.5
235.L_2TAG_PACKET_0.0.2:
236 movl 12(%rsp), %eax
237 andl $2147483647, %eax
238 cmpl $1083179008, %eax
239 jae .L_2TAG_PACKET_8.0.2
240 movsd 8(%rsp), %xmm0
241 addsd ONE_val(%rip), %xmm0
242 jmp ..B1.5
243.L_2TAG_PACKET_6.0.2:
244 movq %xmm0, 16(%rsp)
245..B1.3:
246 movq 16(%rsp), %xmm0
247.L_2TAG_PACKET_13.0.2:
248..B1.5:
249 addq $24, %rsp
250..___tag_value_exp.4:
251 ret
252..___tag_value_exp.5:
253END(exp)
254# -- End exp
255 .section .rodata, "a"
256 .align 16
257 .align 16
258cv:
259 .long 1697350398
260 .long 1079448903
261 .long 1697350398
262 .long 1079448903
263 .long 4277796864
264 .long 1065758274
265 .long 4277796864
266 .long 1065758274
267 .long 3164486458
268 .long 1025308570
269 .long 3164486458
270 .long 1025308570
271 .long 4294967294
272 .long 1071644671
273 .long 4294967294
274 .long 1071644671
275 .long 3811088480
276 .long 1062650204
277 .long 1432067621
278 .long 1067799893
279 .long 3230715663
280 .long 1065423125
281 .long 1431604129
282 .long 1069897045
283 .type cv,@object
284 .size cv,96
285 .align 16
286Shifter:
287 .long 0
288 .long 1127743488
289 .long 0
290 .long 1127743488
291 .type Shifter,@object
292 .size Shifter,16
293 .align 16
294mmask:
295 .long 4294967232
296 .long 0
297 .long 4294967232
298 .long 0
299 .type mmask,@object
300 .size mmask,16
301 .align 16
302bias:
303 .long 65472
304 .long 0
305 .long 65472
306 .long 0
307 .type bias,@object
308 .size bias,16
309 .align 16
310Tbl_addr:
311 .long 0
312 .long 0
313 .long 0
314 .long 0
315 .long 235107661
316 .long 1018002367
317 .long 1048019040
318 .long 11418
319 .long 896005651
320 .long 1015861842
321 .long 3541402996
322 .long 22960
323 .long 1642514529
324 .long 1012987726
325 .long 410360776
326 .long 34629
327 .long 1568897900
328 .long 1016568486
329 .long 1828292879
330 .long 46424
331 .long 1882168529
332 .long 1010744893
333 .long 852742562
334 .long 58348
335 .long 509852888
336 .long 1017336174
337 .long 3490863952
338 .long 70401
339 .long 653277307
340 .long 1017431380
341 .long 2930322911
342 .long 82586
343 .long 1649557430
344 .long 1017729363
345 .long 1014845818
346 .long 94904
347 .long 1058231231
348 .long 1015777676
349 .long 3949972341
350 .long 107355
351 .long 1044000607
352 .long 1016786167
353 .long 828946858
354 .long 119943
355 .long 1151779725
356 .long 1015705409
357 .long 2288159958
358 .long 132667
359 .long 3819481236
360 .long 1016499965
361 .long 1853186616
362 .long 145530
363 .long 2552227826
364 .long 1015039787
365 .long 1709341917
366 .long 158533
367 .long 1829350193
368 .long 1015216097
369 .long 4112506593
370 .long 171677
371 .long 1913391795
372 .long 1015756674
373 .long 2799960843
374 .long 184965
375 .long 1303423926
376 .long 1015238005
377 .long 171030293
378 .long 198398
379 .long 1574172746
380 .long 1016061241
381 .long 2992903935
382 .long 211976
383 .long 3424156969
384 .long 1017196428
385 .long 926591434
386 .long 225703
387 .long 1938513547
388 .long 1017631273
389 .long 887463926
390 .long 239579
391 .long 2804567149
392 .long 1015390024
393 .long 1276261410
394 .long 253606
395 .long 631083525
396 .long 1017690182
397 .long 569847337
398 .long 267786
399 .long 1623370770
400 .long 1011049453
401 .long 1617004845
402 .long 282120
403 .long 3667985273
404 .long 1013894369
405 .long 3049340112
406 .long 296610
407 .long 3145379760
408 .long 1014403278
409 .long 3577096743
410 .long 311258
411 .long 2603100681
412 .long 1017152460
413 .long 1990012070
414 .long 326066
415 .long 3249202951
416 .long 1017448880
417 .long 1453150081
418 .long 341035
419 .long 419288974
420 .long 1016280325
421 .long 917841882
422 .long 356167
423 .long 3793507337
424 .long 1016095713
425 .long 3712504873
426 .long 371463
427 .long 728023093
428 .long 1016345318
429 .long 363667784
430 .long 386927
431 .long 2582678538
432 .long 1017123460
433 .long 2956612996
434 .long 402558
435 .long 7592966
436 .long 1016721543
437 .long 2186617380
438 .long 418360
439 .long 228611441
440 .long 1016696141
441 .long 1719614412
442 .long 434334
443 .long 2261665670
444 .long 1017457593
445 .long 1013258798
446 .long 450482
447 .long 544148907
448 .long 1017323666
449 .long 3907805043
450 .long 466805
451 .long 2383914918
452 .long 1017143586
453 .long 1447192520
454 .long 483307
455 .long 1176412038
456 .long 1017267372
457 .long 1944781190
458 .long 499988
459 .long 2882956373
460 .long 1013312481
461 .long 919555682
462 .long 516851
463 .long 3154077648
464 .long 1016528543
465 .long 2571947538
466 .long 533897
467 .long 348651999
468 .long 1016405780
469 .long 2604962540
470 .long 551129
471 .long 3253791412
472 .long 1015920431
473 .long 1110089947
474 .long 568549
475 .long 1509121860
476 .long 1014756995
477 .long 2568320822
478 .long 586158
479 .long 2617649212
480 .long 1017340090
481 .long 2966275556
482 .long 603959
483 .long 553214634
484 .long 1016457425
485 .long 2682146383
486 .long 621954
487 .long 730975783
488 .long 1014083580
489 .long 2191782032
490 .long 640145
491 .long 1486499517
492 .long 1016818996
493 .long 2069751140
494 .long 658534
495 .long 2595788928
496 .long 1016407932
497 .long 2990417244
498 .long 677123
499 .long 1853053619
500 .long 1015310724
501 .long 1434058175
502 .long 695915
503 .long 2462790535
504 .long 1015814775
505 .long 2572866477
506 .long 714911
507 .long 3693944214
508 .long 1017259110
509 .long 3092190714
510 .long 734114
511 .long 2979333550
512 .long 1017188654
513 .long 4076559942
514 .long 753526
515 .long 174054861
516 .long 1014300631
517 .long 2420883922
518 .long 773150
519 .long 816778419
520 .long 1014197934
521 .long 3716502172
522 .long 792987
523 .long 3507050924
524 .long 1015341199
525 .long 777507147
526 .long 813041
527 .long 1821514088
528 .long 1013410604
529 .long 3706687593
530 .long 833312
531 .long 920623539
532 .long 1016295433
533 .long 1242007931
534 .long 853805
535 .long 2789017511
536 .long 1014276997
537 .long 3707479175
538 .long 874520
539 .long 3586233004
540 .long 1015962192
541 .long 64696965
542 .long 895462
543 .long 474650514
544 .long 1016642419
545 .long 863738718
546 .long 916631
547 .long 1614448851
548 .long 1014281732
549 .long 3884662774
550 .long 938030
551 .long 2450082086
552 .long 1016164135
553 .long 2728693977
554 .long 959663
555 .long 1101668360
556 .long 1015989180
557 .long 3999357479
558 .long 981531
559 .long 835814894
560 .long 1015702697
561 .long 1533953344
562 .long 1003638
563 .long 1301400989
564 .long 1014466875
565 .long 2174652632
566 .long 1025985
567 .type Tbl_addr,@object
568 .size Tbl_addr,1024
569 .align 16
570ALLONES:
571 .long 4294967295
572 .long 4294967295
573 .long 4294967295
574 .long 4294967295
575 .type ALLONES,@object
576 .size ALLONES,16
577 .align 16
578ebias:
579 .long 0
580 .long 1072693248
581 .long 0
582 .long 1072693248
583 .type ebias,@object
584 .size ebias,16
585 .align 4
586XMAX:
587 .long 4294967295
588 .long 2146435071
589 .type XMAX,@object
590 .size XMAX,8
591 .align 4
592XMIN:
593 .long 0
594 .long 1048576
595 .type XMIN,@object
596 .size XMIN,8
597 .align 4
598INF:
599 .long 0
600 .long 2146435072
601 .type INF,@object
602 .size INF,8
603 .align 4
604ZERO:
605 .long 0
606 .long 0
607 .type ZERO,@object
608 .size ZERO,8
609 .align 4
610ONE_val:
611 .long 0
612 .long 1072693248
613 .type ONE_val,@object
614 .size ONE_val,8
615 .data
616 .section .note.GNU-stack, ""
617// -- Begin DWARF2 SEGMENT .eh_frame
618 .section .eh_frame,"a",@progbits
619.eh_frame_seg:
620 .align 1
621 .4byte 0x00000014
622 .8byte 0x00527a0100000000
623 .8byte 0x08070c1b01107801
624 .4byte 0x00000190
625 .4byte 0x0000001c
626 .4byte 0x0000001c
627 .4byte ..___tag_value_exp.1-.
628 .4byte ..___tag_value_exp.5-..___tag_value_exp.1
629 .2byte 0x0400
630 .4byte ..___tag_value_exp.3-..___tag_value_exp.1
631 .2byte 0x200e
632 .byte 0x04
633 .4byte ..___tag_value_exp.4-..___tag_value_exp.3
634 .2byte 0x080e
635 .byte 0x00
636# End