blob: eab619d859a74eb2bbe8ca4327ade9e7b264c8fe [file] [log] [blame]
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +08001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32// ALGORITHM DESCRIPTION
33// ---------------------
34//
35// Description:
36// Let K = 64 (table size).
37// x x/log(2) n
38// e = 2 = 2 * T[j] * (1 + P(y))
39// where
40// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
41// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
42// j/K
43// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
44//
45// P(y) is a minimax polynomial approximation of exp(x)-1
46// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
47//
48// To avoid problems with arithmetic overflow and underflow,
49// n n1 n2
50// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
51// where BIAS is a value of exponent bias.
52//
53// Special cases:
54// exp(NaN) = NaN
55// exp(+INF) = +INF
56// exp(-INF) = 0
57// exp(x) = 1 for subnormals
58// for finite argument, only exp(0)=1 is exact
59// For IEEE double
60// if x > 709.782712893383973096 then exp(x) overflow
61// if x < -745.133219101941108420 then exp(x) underflow
62//
63/******************************************************************************/
64
65#include <private/bionic_asm.h>
66# -- Begin static_func
67 .text
68 .align __bionic_asm_align
69 .type static_func, @function
70static_func:
71..B1.1:
72 call ..L2
73..L2:
74 popl %eax
75 lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
76 lea static_const_table@GOTOFF(%eax), %eax
77 ret
78 .size static_func,.-static_func
79# -- End static_func
80
81# -- Begin exp
82ENTRY(exp)
83# parameter 1: 8 + %ebp
84..B2.1:
85..B2.2:
86 pushl %ebp
87 movl %esp, %ebp
88 subl $120, %esp
89 movl %ebx, 64(%esp)
90 call static_func
91 movl %eax, %ebx
92 movsd 128(%esp), %xmm0
93 unpcklpd %xmm0, %xmm0
94 movapd 64(%ebx), %xmm1
95 movapd 48(%ebx), %xmm6
96 movapd 80(%ebx), %xmm2
97 movapd 96(%ebx), %xmm3
98 pextrw $3, %xmm0, %eax
99 andl $32767, %eax
100 movl $16527, %edx
101 subl %eax, %edx
102 subl $15504, %eax
103 orl %eax, %edx
104 cmpl $-2147483648, %edx
105 jae .L_2TAG_PACKET_0.0.2
106 mulpd %xmm0, %xmm1
107 addpd %xmm6, %xmm1
108 movapd %xmm1, %xmm7
109 subpd %xmm6, %xmm1
110 mulpd %xmm1, %xmm2
111 movapd 128(%ebx), %xmm4
112 mulpd %xmm1, %xmm3
113 movapd 144(%ebx), %xmm5
114 subpd %xmm2, %xmm0
115 movd %xmm7, %eax
116 movl %eax, %ecx
117 andl $63, %ecx
118 shll $4, %ecx
119 sarl $6, %eax
120 movl %eax, %edx
121 movdqa 16(%ebx), %xmm6
122 pand %xmm6, %xmm7
123 movdqa 32(%ebx), %xmm6
124 paddq %xmm6, %xmm7
125 psllq $46, %xmm7
126 subpd %xmm3, %xmm0
127 movapd 160(%ebx,%ecx), %xmm2
128 mulpd %xmm0, %xmm4
129 movapd %xmm0, %xmm6
130 movapd %xmm0, %xmm1
131 mulpd %xmm6, %xmm6
132 mulpd %xmm6, %xmm0
133 addpd %xmm4, %xmm5
134 mulsd %xmm6, %xmm0
135 mulpd 112(%ebx), %xmm6
136 addsd %xmm2, %xmm1
137 unpckhpd %xmm2, %xmm2
138 mulpd %xmm5, %xmm0
139 addsd %xmm0, %xmm1
140 orpd %xmm7, %xmm2
141 unpckhpd %xmm0, %xmm0
142 addsd %xmm1, %xmm0
143 addsd %xmm6, %xmm0
144 addl $894, %edx
145 cmpl $1916, %edx
146 ja .L_2TAG_PACKET_1.0.2
147 mulsd %xmm2, %xmm0
148 addsd %xmm2, %xmm0
149 jmp .L_2TAG_PACKET_2.0.2
150.L_2TAG_PACKET_1.0.2:
151 fstcw 24(%esp)
152 movzwl 24(%esp), %edx
153 orl $768, %edx
154 movw %dx, 28(%esp)
155 fldcw 28(%esp)
156 movl %eax, %edx
157 sarl $1, %eax
158 subl %eax, %edx
159 movdqa (%ebx), %xmm6
160 pandn %xmm2, %xmm6
161 addl $1023, %eax
162 movd %eax, %xmm3
163 psllq $52, %xmm3
164 orpd %xmm3, %xmm6
165 addl $1023, %edx
166 movd %edx, %xmm4
167 psllq $52, %xmm4
168 movsd %xmm0, 8(%esp)
169 fldl 8(%esp)
170 movsd %xmm6, 16(%esp)
171 fldl 16(%esp)
172 fmul %st, %st(1)
173 faddp %st, %st(1)
174 movsd %xmm4, 8(%esp)
175 fldl 8(%esp)
176 fmulp %st, %st(1)
177 fstpl 8(%esp)
178 movsd 8(%esp), %xmm0
179 fldcw 24(%esp)
180 pextrw $3, %xmm0, %ecx
181 andl $32752, %ecx
182 cmpl $32752, %ecx
183 jae .L_2TAG_PACKET_3.0.2
184 cmpl $0, %ecx
185 je .L_2TAG_PACKET_4.0.2
186 jmp .L_2TAG_PACKET_2.0.2
187 cmpl $-2147483648, %ecx
188 jb .L_2TAG_PACKET_3.0.2
189 cmpl $-1064950997, %ecx
190 jb .L_2TAG_PACKET_2.0.2
191 ja .L_2TAG_PACKET_4.0.2
192 movl 128(%esp), %edx
193 cmpl $-17155601, %edx
194 jb .L_2TAG_PACKET_2.0.2
195 jmp .L_2TAG_PACKET_4.0.2
196.L_2TAG_PACKET_3.0.2:
197 movl $14, %edx
198 jmp .L_2TAG_PACKET_5.0.2
199.L_2TAG_PACKET_4.0.2:
200 movl $15, %edx
201.L_2TAG_PACKET_5.0.2:
202 movsd %xmm0, (%esp)
203 movsd 128(%esp), %xmm0
204 fldl (%esp)
205 jmp .L_2TAG_PACKET_6.0.2
206.L_2TAG_PACKET_7.0.2:
207 cmpl $2146435072, %eax
208 jae .L_2TAG_PACKET_8.0.2
209 movl 132(%esp), %eax
210 cmpl $-2147483648, %eax
211 jae .L_2TAG_PACKET_9.0.2
212 movsd 1208(%ebx), %xmm0
213 mulsd %xmm0, %xmm0
214 movl $14, %edx
215 jmp .L_2TAG_PACKET_5.0.2
216.L_2TAG_PACKET_9.0.2:
217 movsd 1216(%ebx), %xmm0
218 mulsd %xmm0, %xmm0
219 movl $15, %edx
220 jmp .L_2TAG_PACKET_5.0.2
221.L_2TAG_PACKET_8.0.2:
222 movl 128(%esp), %edx
223 cmpl $2146435072, %eax
224 ja .L_2TAG_PACKET_10.0.2
225 cmpl $0, %edx
226 jne .L_2TAG_PACKET_10.0.2
227 movl 132(%esp), %eax
228 cmpl $2146435072, %eax
229 jne .L_2TAG_PACKET_11.0.2
230 movsd 1192(%ebx), %xmm0
231 jmp .L_2TAG_PACKET_2.0.2
232.L_2TAG_PACKET_11.0.2:
233 movsd 1200(%ebx), %xmm0
234 jmp .L_2TAG_PACKET_2.0.2
235.L_2TAG_PACKET_10.0.2:
236 movsd 128(%esp), %xmm0
237 addsd %xmm0, %xmm0
238 jmp .L_2TAG_PACKET_2.0.2
239.L_2TAG_PACKET_0.0.2:
240 movl 132(%esp), %eax
241 andl $2147483647, %eax
242 cmpl $1083179008, %eax
243 jae .L_2TAG_PACKET_7.0.2
244 movsd 128(%esp), %xmm0
245 addsd 1184(%ebx), %xmm0
246 jmp .L_2TAG_PACKET_2.0.2
247.L_2TAG_PACKET_2.0.2:
248 movsd %xmm0, 48(%esp)
249 fldl 48(%esp)
250.L_2TAG_PACKET_6.0.2:
251 movl 64(%esp), %ebx
252 movl %ebp, %esp
253 popl %ebp
254 ret
255..B2.3:
256END(exp)
257# -- End exp
258
259# Start file scope ASM
Christopher Ferris995b8132015-03-13 17:43:52 -0700260ALIAS_SYMBOL(expl, exp);
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +0800261# End file scope ASM
262 .section .rodata, "a"
263 .align 16
264 .align 16
265static_const_table:
266 .long 0
267 .long 4293918720
268 .long 0
269 .long 4293918720
270 .long 4294967232
271 .long 0
272 .long 4294967232
273 .long 0
274 .long 65472
275 .long 0
276 .long 65472
277 .long 0
278 .long 0
279 .long 1127743488
280 .long 0
281 .long 1127743488
282 .long 1697350398
283 .long 1079448903
284 .long 1697350398
285 .long 1079448903
286 .long 4277796864
287 .long 1065758274
288 .long 4277796864
289 .long 1065758274
290 .long 3164486458
291 .long 1025308570
292 .long 3164486458
293 .long 1025308570
294 .long 4294967294
295 .long 1071644671
296 .long 4294967294
297 .long 1071644671
298 .long 3811088480
299 .long 1062650204
300 .long 1432067621
301 .long 1067799893
302 .long 3230715663
303 .long 1065423125
304 .long 1431604129
305 .long 1069897045
306 .long 0
307 .long 0
308 .long 0
309 .long 0
310 .long 235107661
311 .long 1018002367
312 .long 1048019040
313 .long 11418
314 .long 896005651
315 .long 1015861842
316 .long 3541402996
317 .long 22960
318 .long 1642514529
319 .long 1012987726
320 .long 410360776
321 .long 34629
322 .long 1568897900
323 .long 1016568486
324 .long 1828292879
325 .long 46424
326 .long 1882168529
327 .long 1010744893
328 .long 852742562
329 .long 58348
330 .long 509852888
331 .long 1017336174
332 .long 3490863952
333 .long 70401
334 .long 653277307
335 .long 1017431380
336 .long 2930322911
337 .long 82586
338 .long 1649557430
339 .long 1017729363
340 .long 1014845818
341 .long 94904
342 .long 1058231231
343 .long 1015777676
344 .long 3949972341
345 .long 107355
346 .long 1044000607
347 .long 1016786167
348 .long 828946858
349 .long 119943
350 .long 1151779725
351 .long 1015705409
352 .long 2288159958
353 .long 132667
354 .long 3819481236
355 .long 1016499965
356 .long 1853186616
357 .long 145530
358 .long 2552227826
359 .long 1015039787
360 .long 1709341917
361 .long 158533
362 .long 1829350193
363 .long 1015216097
364 .long 4112506593
365 .long 171677
366 .long 1913391795
367 .long 1015756674
368 .long 2799960843
369 .long 184965
370 .long 1303423926
371 .long 1015238005
372 .long 171030293
373 .long 198398
374 .long 1574172746
375 .long 1016061241
376 .long 2992903935
377 .long 211976
378 .long 3424156969
379 .long 1017196428
380 .long 926591434
381 .long 225703
382 .long 1938513547
383 .long 1017631273
384 .long 887463926
385 .long 239579
386 .long 2804567149
387 .long 1015390024
388 .long 1276261410
389 .long 253606
390 .long 631083525
391 .long 1017690182
392 .long 569847337
393 .long 267786
394 .long 1623370770
395 .long 1011049453
396 .long 1617004845
397 .long 282120
398 .long 3667985273
399 .long 1013894369
400 .long 3049340112
401 .long 296610
402 .long 3145379760
403 .long 1014403278
404 .long 3577096743
405 .long 311258
406 .long 2603100681
407 .long 1017152460
408 .long 1990012070
409 .long 326066
410 .long 3249202951
411 .long 1017448880
412 .long 1453150081
413 .long 341035
414 .long 419288974
415 .long 1016280325
416 .long 917841882
417 .long 356167
418 .long 3793507337
419 .long 1016095713
420 .long 3712504873
421 .long 371463
422 .long 728023093
423 .long 1016345318
424 .long 363667784
425 .long 386927
426 .long 2582678538
427 .long 1017123460
428 .long 2956612996
429 .long 402558
430 .long 7592966
431 .long 1016721543
432 .long 2186617380
433 .long 418360
434 .long 228611441
435 .long 1016696141
436 .long 1719614412
437 .long 434334
438 .long 2261665670
439 .long 1017457593
440 .long 1013258798
441 .long 450482
442 .long 544148907
443 .long 1017323666
444 .long 3907805043
445 .long 466805
446 .long 2383914918
447 .long 1017143586
448 .long 1447192520
449 .long 483307
450 .long 1176412038
451 .long 1017267372
452 .long 1944781190
453 .long 499988
454 .long 2882956373
455 .long 1013312481
456 .long 919555682
457 .long 516851
458 .long 3154077648
459 .long 1016528543
460 .long 2571947538
461 .long 533897
462 .long 348651999
463 .long 1016405780
464 .long 2604962540
465 .long 551129
466 .long 3253791412
467 .long 1015920431
468 .long 1110089947
469 .long 568549
470 .long 1509121860
471 .long 1014756995
472 .long 2568320822
473 .long 586158
474 .long 2617649212
475 .long 1017340090
476 .long 2966275556
477 .long 603959
478 .long 553214634
479 .long 1016457425
480 .long 2682146383
481 .long 621954
482 .long 730975783
483 .long 1014083580
484 .long 2191782032
485 .long 640145
486 .long 1486499517
487 .long 1016818996
488 .long 2069751140
489 .long 658534
490 .long 2595788928
491 .long 1016407932
492 .long 2990417244
493 .long 677123
494 .long 1853053619
495 .long 1015310724
496 .long 1434058175
497 .long 695915
498 .long 2462790535
499 .long 1015814775
500 .long 2572866477
501 .long 714911
502 .long 3693944214
503 .long 1017259110
504 .long 3092190714
505 .long 734114
506 .long 2979333550
507 .long 1017188654
508 .long 4076559942
509 .long 753526
510 .long 174054861
511 .long 1014300631
512 .long 2420883922
513 .long 773150
514 .long 816778419
515 .long 1014197934
516 .long 3716502172
517 .long 792987
518 .long 3507050924
519 .long 1015341199
520 .long 777507147
521 .long 813041
522 .long 1821514088
523 .long 1013410604
524 .long 3706687593
525 .long 833312
526 .long 920623539
527 .long 1016295433
528 .long 1242007931
529 .long 853805
530 .long 2789017511
531 .long 1014276997
532 .long 3707479175
533 .long 874520
534 .long 3586233004
535 .long 1015962192
536 .long 64696965
537 .long 895462
538 .long 474650514
539 .long 1016642419
540 .long 863738718
541 .long 916631
542 .long 1614448851
543 .long 1014281732
544 .long 3884662774
545 .long 938030
546 .long 2450082086
547 .long 1016164135
548 .long 2728693977
549 .long 959663
550 .long 1101668360
551 .long 1015989180
552 .long 3999357479
553 .long 981531
554 .long 835814894
555 .long 1015702697
556 .long 1533953344
557 .long 1003638
558 .long 1301400989
559 .long 1014466875
560 .long 2174652632
561 .long 1025985
562 .long 0
563 .long 1072693248
564 .long 0
565 .long 2146435072
566 .long 0
567 .long 0
568 .long 4294967295
569 .long 2146435071
570 .long 0
571 .long 1048576
572 .type static_const_table,@object
573 .size static_const_table,1224
574 .data
575 .section .note.GNU-stack, ""
576# End