blob: 4aa4373e4cd1b213392abfa781fcd753998b52f3 [file] [log] [blame]
Jingwei Zhang5d4f0e62014-10-31 18:29:18 +08001/*
2Copyright (c) 2014, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31/******************************************************************************/
32// ALGORITHM DESCRIPTION
33// ---------------------
34//
35// Assume x=2^{3*k+j} * 1.b1 b2 ... b5 b6 ... b52, where j = 0,1,2.
36// Let r=(x*2^{-3k-j} - 1.b1 b2 ... b5 1)* rcp[b1 b2 ..b5],
37// where rcp[b1 b2 .. b5]=1/(1.b1 b2 b3 b4 b5 1) in double precision
38// cbrt(2^j * 1. b1 b2 .. b5 1) is approximated as T[j][b1..b5]+D[j][b1..b5]
39// (T stores the high 53 bits, D stores the low order bits)
40// Result=2^k*T+(2^k*T*r)*P+2^k*D
41// where P=p1+p2*r+..+p8*r^7
42//
43// Special cases:
44// cbrt(NaN) = quiet NaN, and raise invalid exception
45// cbrt(INF) = that INF
46// cbrt(+/-0) = +/-0
47//
48/******************************************************************************/
49
50#include <private/bionic_asm.h>
51# -- Begin cbrt
52ENTRY(cbrt)
53# parameter 1: %xmm0
54..B1.1:
55..___tag_value_cbrt.1:
56 subq $24, %rsp
57..___tag_value_cbrt.3:
58 movsd %xmm0, (%rsp)
59..B1.2:
60 movq %xmm0, %xmm7
61 movl $524032, %edx
62 movsd EXP_MSK3(%rip), %xmm5
63 movsd EXP_MSK2(%rip), %xmm3
64 psrlq $44, %xmm7
65 pextrw $0, %xmm7, %ecx
66 movd %xmm7, %eax
67 movsd EXP_MASK(%rip), %xmm1
68 movsd SIG_MASK(%rip), %xmm2
69 andl $248, %ecx
70 lea rcp_table(%rip), %r8
71 movsd (%rcx,%r8), %xmm4
72 movq %rax, %r9
73 andl %eax, %edx
74 cmpl $0, %edx
75 je .L_2TAG_PACKET_0.0.1
76 cmpl $524032, %edx
77 je .L_2TAG_PACKET_1.0.1
78 shrl $8, %edx
79 shrq $8, %r9
80 andpd %xmm0, %xmm2
81 andpd %xmm5, %xmm0
82 orpd %xmm2, %xmm3
83 orpd %xmm0, %xmm1
84 movapd coeff_table(%rip), %xmm5
85 movl $5462, %eax
86 movapd 16+coeff_table(%rip), %xmm6
87 mull %edx
88 movq %r9, %rdx
89 andq $2047, %r9
90 shrl $14, %eax
91 andl $2048, %edx
92 subq %rax, %r9
93 subq %rax, %r9
94 subq %rax, %r9
95 shlq $8, %r9
96 addl $682, %eax
97 orl %edx, %eax
98 movd %eax, %xmm7
99 addq %r9, %rcx
100 psllq $52, %xmm7
101.L_2TAG_PACKET_2.0.1:
102 movapd 32+coeff_table(%rip), %xmm2
103 movapd 48+coeff_table(%rip), %xmm0
104 subsd %xmm3, %xmm1
105 movq %xmm7, %xmm3
106 lea cbrt_table(%rip), %r8
107 mulsd (%rcx,%r8), %xmm7
108 mulsd %xmm4, %xmm1
109 lea D_table(%rip), %r8
110 mulsd (%rcx,%r8), %xmm3
111 movapd %xmm1, %xmm4
112 unpcklpd %xmm1, %xmm1
113 mulpd %xmm1, %xmm5
114 mulpd %xmm1, %xmm6
115 mulpd %xmm1, %xmm1
116 addpd %xmm5, %xmm2
117 addpd %xmm6, %xmm0
118 mulpd %xmm1, %xmm2
119 mulpd %xmm1, %xmm1
120 mulsd %xmm7, %xmm4
121 addpd %xmm2, %xmm0
122 mulsd %xmm0, %xmm1
123 unpckhpd %xmm0, %xmm0
124 addsd %xmm1, %xmm0
125 mulsd %xmm4, %xmm0
126 addsd %xmm3, %xmm0
127 addsd %xmm7, %xmm0
128 jmp ..B1.4
129.L_2TAG_PACKET_0.0.1:
130 mulsd SCALE63(%rip), %xmm0
131 movq %xmm0, %xmm7
132 movl $524032, %edx
133 psrlq $44, %xmm7
134 pextrw $0, %xmm7, %ecx
135 movd %xmm7, %eax
136 andl $248, %ecx
137 lea rcp_table(%rip), %r8
138 movsd (%rcx,%r8), %xmm4
139 movq %rax, %r9
140 andl %eax, %edx
141 shrl $8, %edx
142 shrq $8, %r9
143 cmpl $0, %edx
144 je .L_2TAG_PACKET_3.0.1
145 andpd %xmm0, %xmm2
146 andpd %xmm5, %xmm0
147 orpd %xmm2, %xmm3
148 orpd %xmm0, %xmm1
149 movapd coeff_table(%rip), %xmm5
150 movl $5462, %eax
151 movapd 16+coeff_table(%rip), %xmm6
152 mull %edx
153 movq %r9, %rdx
154 andq $2047, %r9
155 shrl $14, %eax
156 andl $2048, %edx
157 subq %rax, %r9
158 subq %rax, %r9
159 subq %rax, %r9
160 shlq $8, %r9
161 addl $661, %eax
162 orl %edx, %eax
163 movd %eax, %xmm7
164 addq %r9, %rcx
165 psllq $52, %xmm7
166 jmp .L_2TAG_PACKET_2.0.1
167.L_2TAG_PACKET_3.0.1:
168 cmpq $0, %r9
169 jne .L_2TAG_PACKET_4.0.1
170 xorpd %xmm0, %xmm0
171 jmp ..B1.4
172.L_2TAG_PACKET_4.0.1:
173 movsd ZERON(%rip), %xmm0
174 jmp ..B1.4
175.L_2TAG_PACKET_1.0.1:
176 movl 4(%rsp), %eax
177 movl (%rsp), %edx
178 movl %eax, %ecx
179 andl $2147483647, %ecx
180 cmpl $2146435072, %ecx
181 ja .L_2TAG_PACKET_5.0.1
182 cmpl $0, %edx
183 jne .L_2TAG_PACKET_5.0.1
184 cmpl $2146435072, %eax
185 jne .L_2TAG_PACKET_6.0.1
186 movsd INF(%rip), %xmm0
187 jmp ..B1.4
188.L_2TAG_PACKET_6.0.1:
189 movsd NEG_INF(%rip), %xmm0
190 jmp ..B1.4
191.L_2TAG_PACKET_5.0.1:
192 movsd (%rsp), %xmm0
193 addsd %xmm0, %xmm0
194 movq %xmm0, 8(%rsp)
195.L_2TAG_PACKET_7.0.1:
196..B1.4:
197 addq $24, %rsp
198..___tag_value_cbrt.4:
199 ret
200..___tag_value_cbrt.5:
201END(cbrt)
202# -- End cbrt
203 .section .rodata, "a"
204 .align 16
205 .align 16
206coeff_table:
207 .long 1553778919
208 .long 3213899486
209 .long 3534952507
210 .long 3215266280
211 .long 1646371399
212 .long 3214412045
213 .long 477218588
214 .long 3216798151
215 .long 3582521621
216 .long 1066628362
217 .long 1007461464
218 .long 1068473053
219 .long 889629714
220 .long 1067378449
221 .long 1431655765
222 .long 1070945621
223 .type coeff_table,@object
224 .size coeff_table,64
225 .align 4
226EXP_MSK3:
227 .long 4294967295
228 .long 1048575
229 .type EXP_MSK3,@object
230 .size EXP_MSK3,8
231 .align 4
232EXP_MSK2:
233 .long 0
234 .long 3220193280
235 .type EXP_MSK2,@object
236 .size EXP_MSK2,8
237 .align 4
238EXP_MASK:
239 .long 0
240 .long 3220176896
241 .type EXP_MASK,@object
242 .size EXP_MASK,8
243 .align 4
244SIG_MASK:
245 .long 0
246 .long 1032192
247 .type SIG_MASK,@object
248 .size SIG_MASK,8
249 .align 4
250rcp_table:
251 .long 528611360
252 .long 3220144632
253 .long 2884679527
254 .long 3220082993
255 .long 1991868891
256 .long 3220024928
257 .long 2298714891
258 .long 3219970134
259 .long 58835168
260 .long 3219918343
261 .long 3035110223
262 .long 3219869313
263 .long 1617585086
264 .long 3219822831
265 .long 2500867033
266 .long 3219778702
267 .long 4241943008
268 .long 3219736752
269 .long 258732970
270 .long 3219696825
271 .long 404232216
272 .long 3219658776
273 .long 2172167368
274 .long 3219622476
275 .long 1544257904
276 .long 3219587808
277 .long 377579543
278 .long 3219554664
279 .long 1616385542
280 .long 3219522945
281 .long 813783277
282 .long 3219492562
283 .long 3940743189
284 .long 3219463431
285 .long 2689777499
286 .long 3219435478
287 .long 1700977147
288 .long 3219408632
289 .long 3169102082
290 .long 3219382828
291 .long 327235604
292 .long 3219358008
293 .long 1244336319
294 .long 3219334115
295 .long 1300311200
296 .long 3219311099
297 .long 3095471925
298 .long 3219288912
299 .long 2166487928
300 .long 3219267511
301 .long 2913108253
302 .long 3219246854
303 .long 293672978
304 .long 3219226904
305 .long 288737297
306 .long 3219207624
307 .long 1810275472
308 .long 3219188981
309 .long 174592167
310 .long 3219170945
311 .long 3539053052
312 .long 3219153485
313 .long 2164392968
314 .long 3219136576
315 .type rcp_table,@object
316 .size rcp_table,256
317 .align 4
318cbrt_table:
319 .long 572345495
320 .long 1072698681
321 .long 1998204467
322 .long 1072709382
323 .long 3861501553
324 .long 1072719872
325 .long 2268192434
326 .long 1072730162
327 .long 2981979308
328 .long 1072740260
329 .long 270859143
330 .long 1072750176
331 .long 2958651392
332 .long 1072759916
333 .long 313113243
334 .long 1072769490
335 .long 919449400
336 .long 1072778903
337 .long 2809328903
338 .long 1072788162
339 .long 2222981587
340 .long 1072797274
341 .long 2352530781
342 .long 1072806244
343 .long 594152517
344 .long 1072815078
345 .long 1555767199
346 .long 1072823780
347 .long 4282421314
348 .long 1072832355
349 .long 2355578597
350 .long 1072840809
351 .long 1162590619
352 .long 1072849145
353 .long 797864051
354 .long 1072857367
355 .long 431273680
356 .long 1072865479
357 .long 2669831148
358 .long 1072873484
359 .long 733477752
360 .long 1072881387
361 .long 4280220604
362 .long 1072889189
363 .long 801961634
364 .long 1072896896
365 .long 2915370760
366 .long 1072904508
367 .long 1159613482
368 .long 1072912030
369 .long 2689944798
370 .long 1072919463
371 .long 1248687822
372 .long 1072926811
373 .long 2967951030
374 .long 1072934075
375 .long 630170432
376 .long 1072941259
377 .long 3760898254
378 .long 1072948363
379 .long 0
380 .long 1072955392
381 .long 2370273294
382 .long 1072962345
383 .long 1261754802
384 .long 1072972640
385 .long 546334065
386 .long 1072986123
387 .long 1054893830
388 .long 1072999340
389 .long 1571187597
390 .long 1073012304
391 .long 1107975175
392 .long 1073025027
393 .long 3606909377
394 .long 1073037519
395 .long 1113616747
396 .long 1073049792
397 .long 4154744632
398 .long 1073061853
399 .long 3358931423
400 .long 1073073713
401 .long 4060702372
402 .long 1073085379
403 .long 747576176
404 .long 1073096860
405 .long 3023138255
406 .long 1073108161
407 .long 1419988548
408 .long 1073119291
409 .long 1914185305
410 .long 1073130255
411 .long 294389948
412 .long 1073141060
413 .long 3761802570
414 .long 1073151710
415 .long 978281566
416 .long 1073162213
417 .long 823148820
418 .long 1073172572
419 .long 2420954441
420 .long 1073182792
421 .long 3815449908
422 .long 1073192878
423 .long 2046058587
424 .long 1073202835
425 .long 1807524753
426 .long 1073212666
427 .long 2628681401
428 .long 1073222375
429 .long 3225667357
430 .long 1073231966
431 .long 1555307421
432 .long 1073241443
433 .long 3454043099
434 .long 1073250808
435 .long 1208137896
436 .long 1073260066
437 .long 3659916772
438 .long 1073269218
439 .long 1886261264
440 .long 1073278269
441 .long 3593647839
442 .long 1073287220
443 .long 3086012205
444 .long 1073296075
445 .long 2769796922
446 .long 1073304836
447 .long 888716057
448 .long 1073317807
449 .long 2201465623
450 .long 1073334794
451 .long 164369365
452 .long 1073351447
453 .long 3462666733
454 .long 1073367780
455 .long 2773905457
456 .long 1073383810
457 .long 1342879088
458 .long 1073399550
459 .long 2543933975
460 .long 1073415012
461 .long 1684477781
462 .long 1073430209
463 .long 3532178543
464 .long 1073445151
465 .long 1147747300
466 .long 1073459850
467 .long 1928031793
468 .long 1073474314
469 .long 2079717015
470 .long 1073488553
471 .long 4016765315
472 .long 1073502575
473 .long 3670431139
474 .long 1073516389
475 .long 3549227225
476 .long 1073530002
477 .long 11637607
478 .long 1073543422
479 .long 588220169
480 .long 1073556654
481 .long 2635407503
482 .long 1073569705
483 .long 2042029317
484 .long 1073582582
485 .long 1925128962
486 .long 1073595290
487 .long 4136375664
488 .long 1073607834
489 .long 759964600
490 .long 1073620221
491 .long 4257606771
492 .long 1073632453
493 .long 297278907
494 .long 1073644538
495 .long 3655053093
496 .long 1073656477
497 .long 2442253172
498 .long 1073668277
499 .long 1111876799
500 .long 1073679941
501 .long 3330973139
502 .long 1073691472
503 .long 3438879452
504 .long 1073702875
505 .long 3671565478
506 .long 1073714153
507 .long 1317849547
508 .long 1073725310
509 .long 1642364115
510 .long 1073736348
511 .type cbrt_table,@object
512 .size cbrt_table,768
513 .align 4
514D_table:
515 .long 4050900474
516 .long 1014427190
517 .long 1157977860
518 .long 1016444461
519 .long 1374568199
520 .long 1017271387
521 .long 2809163288
522 .long 1016882676
523 .long 3742377377
524 .long 1013168191
525 .long 3101606597
526 .long 1017541672
527 .long 65224358
528 .long 1017217597
529 .long 2691591250
530 .long 1017266643
531 .long 4020758549
532 .long 1017689313
533 .long 1316310992
534 .long 1018030788
535 .long 1031537856
536 .long 1014090882
537 .long 3261395239
538 .long 1016413641
539 .long 886424999
540 .long 1016313335
541 .long 3114776834
542 .long 1014195875
543 .long 1681120620
544 .long 1017825416
545 .long 1329600273
546 .long 1016625740
547 .long 465474623
548 .long 1017097119
549 .long 4251633980
550 .long 1017169077
551 .long 1986990133
552 .long 1017710645
553 .long 752958613
554 .long 1017159641
555 .long 2216216792
556 .long 1018020163
557 .long 4282860129
558 .long 1015924861
559 .long 1557627859
560 .long 1016039538
561 .long 3889219754
562 .long 1018086237
563 .long 3684996408
564 .long 1017353275
565 .long 723532103
566 .long 1017717141
567 .long 2951149676
568 .long 1012528470
569 .long 831890937
570 .long 1017830553
571 .long 1031212645
572 .long 1017387331
573 .long 2741737450
574 .long 1017604974
575 .long 2863311531
576 .long 1003776682
577 .long 4276736099
578 .long 1013153088
579 .long 4111778382
580 .long 1015673686
581 .long 1728065769
582 .long 1016413986
583 .long 2708718031
584 .long 1018078833
585 .long 1069335005
586 .long 1015291224
587 .long 700037144
588 .long 1016482032
589 .long 2904566452
590 .long 1017226861
591 .long 4074156649
592 .long 1017622651
593 .long 25019565
594 .long 1015245366
595 .long 3601952608
596 .long 1015771755
597 .long 3267129373
598 .long 1017904664
599 .long 503203103
600 .long 1014921629
601 .long 2122011730
602 .long 1018027866
603 .long 3927295461
604 .long 1014189456
605 .long 2790625147
606 .long 1016024251
607 .long 1330460186
608 .long 1016940346
609 .long 4033568463
610 .long 1015538390
611 .long 3695818227
612 .long 1017509621
613 .long 257573361
614 .long 1017208868
615 .long 3227697852
616 .long 1017337964
617 .long 234118548
618 .long 1017169577
619 .long 4009025803
620 .long 1017278524
621 .long 1948343394
622 .long 1017749310
623 .long 678398162
624 .long 1018144239
625 .long 3083864863
626 .long 1016669086
627 .long 2415453452
628 .long 1017890370
629 .long 175467344
630 .long 1017330033
631 .long 3197359580
632 .long 1010339928
633 .long 2071276951
634 .long 1015941358
635 .long 268372543
636 .long 1016737773
637 .long 938132959
638 .long 1017389108
639 .long 1816750559
640 .long 1017337448
641 .long 4119203749
642 .long 1017152174
643 .long 2578653878
644 .long 1013108497
645 .long 2470331096
646 .long 1014678606
647 .long 123855735
648 .long 1016553320
649 .long 1265650889
650 .long 1014782687
651 .long 3414398172
652 .long 1017182638
653 .long 1040773369
654 .long 1016158401
655 .long 3483628886
656 .long 1016886550
657 .long 4140499405
658 .long 1016191425
659 .long 3893477850
660 .long 1016964495
661 .long 3935319771
662 .long 1009634717
663 .long 2978982660
664 .long 1015027112
665 .long 2452709923
666 .long 1017990229
667 .long 3190365712
668 .long 1015835149
669 .long 4237588139
670 .long 1015832925
671 .long 2610678389
672 .long 1017962711
673 .long 2127316774
674 .long 1017405770
675 .long 824267502
676 .long 1017959463
677 .long 2165924042
678 .long 1017912225
679 .long 2774007076
680 .long 1013257418
681 .long 4123916326
682 .long 1017582284
683 .long 1976417958
684 .long 1016959909
685 .long 4092806412
686 .long 1017711279
687 .long 119251817
688 .long 1015363631
689 .long 3475418768
690 .long 1017675415
691 .long 1972580503
692 .long 1015470684
693 .long 815541017
694 .long 1017517969
695 .long 2429917451
696 .long 1017397776
697 .long 4062888482
698 .long 1016749897
699 .long 68284153
700 .long 1017925678
701 .long 2207779246
702 .long 1016320298
703 .long 1183466520
704 .long 1017408657
705 .long 143326427
706 .long 1017060403
707 .type D_table,@object
708 .size D_table,768
709 .align 4
710SCALE63:
711 .long 0
712 .long 1138753536
713 .type SCALE63,@object
714 .size SCALE63,8
715 .align 4
716ZERON:
717 .long 0
718 .long 2147483648
719 .type ZERON,@object
720 .size ZERON,8
721 .align 4
722INF:
723 .long 0
724 .long 2146435072
725 .type INF,@object
726 .size INF,8
727 .align 4
728NEG_INF:
729 .long 0
730 .long 4293918720
731 .type NEG_INF,@object
732 .size NEG_INF,8
733 .data
734 .section .note.GNU-stack, ""
735// -- Begin DWARF2 SEGMENT .eh_frame
736 .section .eh_frame,"a",@progbits
737.eh_frame_seg:
738 .align 1
739 .4byte 0x00000014
740 .8byte 0x00527a0100000000
741 .8byte 0x08070c1b01107801
742 .4byte 0x00000190
743 .4byte 0x0000001c
744 .4byte 0x0000001c
745 .4byte ..___tag_value_cbrt.1-.
746 .4byte ..___tag_value_cbrt.5-..___tag_value_cbrt.1
747 .2byte 0x0400
748 .4byte ..___tag_value_cbrt.3-..___tag_value_cbrt.1
749 .2byte 0x200e
750 .byte 0x04
751 .4byte ..___tag_value_cbrt.4-..___tag_value_cbrt.3
752 .2byte 0x080e
753 .byte 0x00
754# End