blob: c4fbe4744f46a2c8a1f1478a1908ec1fa11a2d78 [file] [log] [blame]
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +08001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32// ALGORITHM DESCRIPTION
33// ---------------------
34//
35// Description:
36// Let K = 64 (table size).
37// x x/log(2) n
38// e = 2 = 2 * T[j] * (1 + P(y))
39// where
40// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
41// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
42// j/K
43// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
44//
45// P(y) is a minimax polynomial approximation of exp(x)-1
46// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
47//
48// To avoid problems with arithmetic overflow and underflow,
49// n n1 n2
50// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
51// where BIAS is a value of exponent bias.
52//
53// Special cases:
54// exp(NaN) = NaN
55// exp(+INF) = +INF
56// exp(-INF) = 0
57// exp(x) = 1 for subnormals
58// for finite argument, only exp(0)=1 is exact
59// For IEEE double
60// if x > 709.782712893383973096 then exp(x) overflow
61// if x < -745.133219101941108420 then exp(x) underflow
62//
63/******************************************************************************/
64
65#include <private/bionic_asm.h>
66# -- Begin static_func
67 .text
68 .align __bionic_asm_align
69 .type static_func, @function
70static_func:
71..B1.1:
72 call ..L2
73..L2:
74 popl %eax
75 lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
76 lea static_const_table@GOTOFF(%eax), %eax
77 ret
78 .size static_func,.-static_func
79# -- End static_func
80
81# -- Begin exp
82ENTRY(exp)
83# parameter 1: 8 + %ebp
84..B2.1:
85..B2.2:
86 pushl %ebp
87 movl %esp, %ebp
88 subl $120, %esp
89 movl %ebx, 64(%esp)
90 call static_func
91 movl %eax, %ebx
92 movsd 128(%esp), %xmm0
93 unpcklpd %xmm0, %xmm0
94 movapd 64(%ebx), %xmm1
95 movapd 48(%ebx), %xmm6
96 movapd 80(%ebx), %xmm2
97 movapd 96(%ebx), %xmm3
98 pextrw $3, %xmm0, %eax
99 andl $32767, %eax
100 movl $16527, %edx
101 subl %eax, %edx
102 subl $15504, %eax
103 orl %eax, %edx
104 cmpl $-2147483648, %edx
105 jae .L_2TAG_PACKET_0.0.2
106 mulpd %xmm0, %xmm1
107 addpd %xmm6, %xmm1
108 movapd %xmm1, %xmm7
109 subpd %xmm6, %xmm1
110 mulpd %xmm1, %xmm2
111 movapd 128(%ebx), %xmm4
112 mulpd %xmm1, %xmm3
113 movapd 144(%ebx), %xmm5
114 subpd %xmm2, %xmm0
115 movd %xmm7, %eax
116 movl %eax, %ecx
117 andl $63, %ecx
118 shll $4, %ecx
119 sarl $6, %eax
120 movl %eax, %edx
121 movdqa 16(%ebx), %xmm6
122 pand %xmm6, %xmm7
123 movdqa 32(%ebx), %xmm6
124 paddq %xmm6, %xmm7
125 psllq $46, %xmm7
126 subpd %xmm3, %xmm0
127 movapd 160(%ebx,%ecx), %xmm2
128 mulpd %xmm0, %xmm4
129 movapd %xmm0, %xmm6
130 movapd %xmm0, %xmm1
131 mulpd %xmm6, %xmm6
132 mulpd %xmm6, %xmm0
133 addpd %xmm4, %xmm5
134 mulsd %xmm6, %xmm0
135 mulpd 112(%ebx), %xmm6
136 addsd %xmm2, %xmm1
137 unpckhpd %xmm2, %xmm2
138 mulpd %xmm5, %xmm0
139 addsd %xmm0, %xmm1
140 orpd %xmm7, %xmm2
141 unpckhpd %xmm0, %xmm0
142 addsd %xmm1, %xmm0
143 addsd %xmm6, %xmm0
144 addl $894, %edx
145 cmpl $1916, %edx
146 ja .L_2TAG_PACKET_1.0.2
147 mulsd %xmm2, %xmm0
148 addsd %xmm2, %xmm0
149 jmp .L_2TAG_PACKET_2.0.2
150.L_2TAG_PACKET_1.0.2:
151 fstcw 24(%esp)
152 movzwl 24(%esp), %edx
153 orl $768, %edx
154 movw %dx, 28(%esp)
155 fldcw 28(%esp)
156 movl %eax, %edx
157 sarl $1, %eax
158 subl %eax, %edx
159 movdqa (%ebx), %xmm6
160 pandn %xmm2, %xmm6
161 addl $1023, %eax
162 movd %eax, %xmm3
163 psllq $52, %xmm3
164 orpd %xmm3, %xmm6
165 addl $1023, %edx
166 movd %edx, %xmm4
167 psllq $52, %xmm4
168 movsd %xmm0, 8(%esp)
169 fldl 8(%esp)
170 movsd %xmm6, 16(%esp)
171 fldl 16(%esp)
172 fmul %st, %st(1)
173 faddp %st, %st(1)
174 movsd %xmm4, 8(%esp)
175 fldl 8(%esp)
176 fmulp %st, %st(1)
177 fstpl 8(%esp)
178 movsd 8(%esp), %xmm0
179 fldcw 24(%esp)
180 pextrw $3, %xmm0, %ecx
181 andl $32752, %ecx
182 cmpl $32752, %ecx
183 jae .L_2TAG_PACKET_3.0.2
184 cmpl $0, %ecx
185 je .L_2TAG_PACKET_4.0.2
186 jmp .L_2TAG_PACKET_2.0.2
187 cmpl $-2147483648, %ecx
188 jb .L_2TAG_PACKET_3.0.2
189 cmpl $-1064950997, %ecx
190 jb .L_2TAG_PACKET_2.0.2
191 ja .L_2TAG_PACKET_4.0.2
192 movl 128(%esp), %edx
193 cmpl $-17155601, %edx
194 jb .L_2TAG_PACKET_2.0.2
195 jmp .L_2TAG_PACKET_4.0.2
196.L_2TAG_PACKET_3.0.2:
197 movl $14, %edx
198 jmp .L_2TAG_PACKET_5.0.2
199.L_2TAG_PACKET_4.0.2:
200 movl $15, %edx
201.L_2TAG_PACKET_5.0.2:
202 movsd %xmm0, (%esp)
203 movsd 128(%esp), %xmm0
204 fldl (%esp)
205 jmp .L_2TAG_PACKET_6.0.2
206.L_2TAG_PACKET_7.0.2:
207 cmpl $2146435072, %eax
208 jae .L_2TAG_PACKET_8.0.2
209 movl 132(%esp), %eax
210 cmpl $-2147483648, %eax
211 jae .L_2TAG_PACKET_9.0.2
212 movsd 1208(%ebx), %xmm0
213 mulsd %xmm0, %xmm0
214 movl $14, %edx
215 jmp .L_2TAG_PACKET_5.0.2
216.L_2TAG_PACKET_9.0.2:
217 movsd 1216(%ebx), %xmm0
218 mulsd %xmm0, %xmm0
219 movl $15, %edx
220 jmp .L_2TAG_PACKET_5.0.2
221.L_2TAG_PACKET_8.0.2:
222 movl 128(%esp), %edx
223 cmpl $2146435072, %eax
224 ja .L_2TAG_PACKET_10.0.2
225 cmpl $0, %edx
226 jne .L_2TAG_PACKET_10.0.2
227 movl 132(%esp), %eax
228 cmpl $2146435072, %eax
229 jne .L_2TAG_PACKET_11.0.2
230 movsd 1192(%ebx), %xmm0
231 jmp .L_2TAG_PACKET_2.0.2
232.L_2TAG_PACKET_11.0.2:
233 movsd 1200(%ebx), %xmm0
234 jmp .L_2TAG_PACKET_2.0.2
235.L_2TAG_PACKET_10.0.2:
236 movsd 128(%esp), %xmm0
237 addsd %xmm0, %xmm0
238 jmp .L_2TAG_PACKET_2.0.2
239.L_2TAG_PACKET_0.0.2:
240 movl 132(%esp), %eax
241 andl $2147483647, %eax
242 cmpl $1083179008, %eax
243 jae .L_2TAG_PACKET_7.0.2
244 movsd 128(%esp), %xmm0
245 addsd 1184(%ebx), %xmm0
246 jmp .L_2TAG_PACKET_2.0.2
247.L_2TAG_PACKET_2.0.2:
248 movsd %xmm0, 48(%esp)
249 fldl 48(%esp)
250.L_2TAG_PACKET_6.0.2:
251 movl 64(%esp), %ebx
252 movl %ebp, %esp
253 popl %ebp
254 ret
255..B2.3:
256END(exp)
257# -- End exp
258
259# Start file scope ASM
260.weak expl
261.equ expl, exp
262# End file scope ASM
263 .section .rodata, "a"
264 .align 16
265 .align 16
266static_const_table:
267 .long 0
268 .long 4293918720
269 .long 0
270 .long 4293918720
271 .long 4294967232
272 .long 0
273 .long 4294967232
274 .long 0
275 .long 65472
276 .long 0
277 .long 65472
278 .long 0
279 .long 0
280 .long 1127743488
281 .long 0
282 .long 1127743488
283 .long 1697350398
284 .long 1079448903
285 .long 1697350398
286 .long 1079448903
287 .long 4277796864
288 .long 1065758274
289 .long 4277796864
290 .long 1065758274
291 .long 3164486458
292 .long 1025308570
293 .long 3164486458
294 .long 1025308570
295 .long 4294967294
296 .long 1071644671
297 .long 4294967294
298 .long 1071644671
299 .long 3811088480
300 .long 1062650204
301 .long 1432067621
302 .long 1067799893
303 .long 3230715663
304 .long 1065423125
305 .long 1431604129
306 .long 1069897045
307 .long 0
308 .long 0
309 .long 0
310 .long 0
311 .long 235107661
312 .long 1018002367
313 .long 1048019040
314 .long 11418
315 .long 896005651
316 .long 1015861842
317 .long 3541402996
318 .long 22960
319 .long 1642514529
320 .long 1012987726
321 .long 410360776
322 .long 34629
323 .long 1568897900
324 .long 1016568486
325 .long 1828292879
326 .long 46424
327 .long 1882168529
328 .long 1010744893
329 .long 852742562
330 .long 58348
331 .long 509852888
332 .long 1017336174
333 .long 3490863952
334 .long 70401
335 .long 653277307
336 .long 1017431380
337 .long 2930322911
338 .long 82586
339 .long 1649557430
340 .long 1017729363
341 .long 1014845818
342 .long 94904
343 .long 1058231231
344 .long 1015777676
345 .long 3949972341
346 .long 107355
347 .long 1044000607
348 .long 1016786167
349 .long 828946858
350 .long 119943
351 .long 1151779725
352 .long 1015705409
353 .long 2288159958
354 .long 132667
355 .long 3819481236
356 .long 1016499965
357 .long 1853186616
358 .long 145530
359 .long 2552227826
360 .long 1015039787
361 .long 1709341917
362 .long 158533
363 .long 1829350193
364 .long 1015216097
365 .long 4112506593
366 .long 171677
367 .long 1913391795
368 .long 1015756674
369 .long 2799960843
370 .long 184965
371 .long 1303423926
372 .long 1015238005
373 .long 171030293
374 .long 198398
375 .long 1574172746
376 .long 1016061241
377 .long 2992903935
378 .long 211976
379 .long 3424156969
380 .long 1017196428
381 .long 926591434
382 .long 225703
383 .long 1938513547
384 .long 1017631273
385 .long 887463926
386 .long 239579
387 .long 2804567149
388 .long 1015390024
389 .long 1276261410
390 .long 253606
391 .long 631083525
392 .long 1017690182
393 .long 569847337
394 .long 267786
395 .long 1623370770
396 .long 1011049453
397 .long 1617004845
398 .long 282120
399 .long 3667985273
400 .long 1013894369
401 .long 3049340112
402 .long 296610
403 .long 3145379760
404 .long 1014403278
405 .long 3577096743
406 .long 311258
407 .long 2603100681
408 .long 1017152460
409 .long 1990012070
410 .long 326066
411 .long 3249202951
412 .long 1017448880
413 .long 1453150081
414 .long 341035
415 .long 419288974
416 .long 1016280325
417 .long 917841882
418 .long 356167
419 .long 3793507337
420 .long 1016095713
421 .long 3712504873
422 .long 371463
423 .long 728023093
424 .long 1016345318
425 .long 363667784
426 .long 386927
427 .long 2582678538
428 .long 1017123460
429 .long 2956612996
430 .long 402558
431 .long 7592966
432 .long 1016721543
433 .long 2186617380
434 .long 418360
435 .long 228611441
436 .long 1016696141
437 .long 1719614412
438 .long 434334
439 .long 2261665670
440 .long 1017457593
441 .long 1013258798
442 .long 450482
443 .long 544148907
444 .long 1017323666
445 .long 3907805043
446 .long 466805
447 .long 2383914918
448 .long 1017143586
449 .long 1447192520
450 .long 483307
451 .long 1176412038
452 .long 1017267372
453 .long 1944781190
454 .long 499988
455 .long 2882956373
456 .long 1013312481
457 .long 919555682
458 .long 516851
459 .long 3154077648
460 .long 1016528543
461 .long 2571947538
462 .long 533897
463 .long 348651999
464 .long 1016405780
465 .long 2604962540
466 .long 551129
467 .long 3253791412
468 .long 1015920431
469 .long 1110089947
470 .long 568549
471 .long 1509121860
472 .long 1014756995
473 .long 2568320822
474 .long 586158
475 .long 2617649212
476 .long 1017340090
477 .long 2966275556
478 .long 603959
479 .long 553214634
480 .long 1016457425
481 .long 2682146383
482 .long 621954
483 .long 730975783
484 .long 1014083580
485 .long 2191782032
486 .long 640145
487 .long 1486499517
488 .long 1016818996
489 .long 2069751140
490 .long 658534
491 .long 2595788928
492 .long 1016407932
493 .long 2990417244
494 .long 677123
495 .long 1853053619
496 .long 1015310724
497 .long 1434058175
498 .long 695915
499 .long 2462790535
500 .long 1015814775
501 .long 2572866477
502 .long 714911
503 .long 3693944214
504 .long 1017259110
505 .long 3092190714
506 .long 734114
507 .long 2979333550
508 .long 1017188654
509 .long 4076559942
510 .long 753526
511 .long 174054861
512 .long 1014300631
513 .long 2420883922
514 .long 773150
515 .long 816778419
516 .long 1014197934
517 .long 3716502172
518 .long 792987
519 .long 3507050924
520 .long 1015341199
521 .long 777507147
522 .long 813041
523 .long 1821514088
524 .long 1013410604
525 .long 3706687593
526 .long 833312
527 .long 920623539
528 .long 1016295433
529 .long 1242007931
530 .long 853805
531 .long 2789017511
532 .long 1014276997
533 .long 3707479175
534 .long 874520
535 .long 3586233004
536 .long 1015962192
537 .long 64696965
538 .long 895462
539 .long 474650514
540 .long 1016642419
541 .long 863738718
542 .long 916631
543 .long 1614448851
544 .long 1014281732
545 .long 3884662774
546 .long 938030
547 .long 2450082086
548 .long 1016164135
549 .long 2728693977
550 .long 959663
551 .long 1101668360
552 .long 1015989180
553 .long 3999357479
554 .long 981531
555 .long 835814894
556 .long 1015702697
557 .long 1533953344
558 .long 1003638
559 .long 1301400989
560 .long 1014466875
561 .long 2174652632
562 .long 1025985
563 .long 0
564 .long 1072693248
565 .long 0
566 .long 2146435072
567 .long 0
568 .long 0
569 .long 4294967295
570 .long 2146435071
571 .long 0
572 .long 1048576
573 .type static_const_table,@object
574 .size static_const_table,1224
575 .data
576 .section .note.GNU-stack, ""
577# End