Szabolcs Nagy | 1b94597 | 2018-05-14 14:46:40 +0100 | [diff] [blame] | 1 | /* |
| 2 | * rredf.h - trigonometric range reduction function written new for RVCT 4.1 |
George Lander | da55ef9 | 2015-11-19 12:05:06 +0000 | [diff] [blame] | 3 | * |
Szabolcs Nagy | 11253b0 | 2018-11-12 11:10:57 +0000 | [diff] [blame] | 4 | * Copyright (c) 2009-2018, Arm Limited. |
| 5 | * SPDX-License-Identifier: MIT |
George Lander | da55ef9 | 2015-11-19 12:05:06 +0000 | [diff] [blame] | 6 | */ |
| 7 | |
| 8 | /* |
| 9 | * This header file defines an inline function which all three of |
| 10 | * the single-precision trig functions (sinf, cosf, tanf) should use |
| 11 | * to perform range reduction. The inline function handles the |
| 12 | * quickest and most common cases inline, before handing off to an |
| 13 | * out-of-line function defined in rredf.c for everything else. Thus |
| 14 | * a reasonable compromise is struck between speed and space. (I |
| 15 | * hope.) In particular, this approach avoids a function call |
| 16 | * overhead in the common case. |
| 17 | */ |
| 18 | |
| 19 | #ifndef _included_rredf_h |
| 20 | #define _included_rredf_h |
| 21 | |
| 22 | #include "math_private.h" |
| 23 | |
| 24 | #ifdef __cplusplus |
| 25 | extern "C" { |
| 26 | #endif /* __cplusplus */ |
| 27 | |
Szabolcs Nagy | 0d51c04 | 2018-04-25 13:26:22 +0100 | [diff] [blame] | 28 | extern float __mathlib_rredf2(float x, int *q, unsigned k); |
George Lander | da55ef9 | 2015-11-19 12:05:06 +0000 | [diff] [blame] | 29 | |
| 30 | /* |
| 31 | * Semantics of the function: |
| 32 | * - x is the single-precision input value provided by the user |
| 33 | * - the return value is in the range [-pi/4,pi/4], and is equal |
| 34 | * (within reasonable accuracy bounds) to x minus n*pi/2 for some |
| 35 | * integer n. (FIXME: perhaps some slippage on the output |
| 36 | * interval is acceptable, requiring more range from the |
| 37 | * following polynomial approximations but permitting more |
| 38 | * approximate rred decisions?) |
| 39 | * - *q is set to a positive value whose low two bits match those |
| 40 | * of n. Alternatively, it comes back as -1 indicating that the |
| 41 | * input value was trivial in some way (infinity, NaN, or so |
| 42 | * small that we can safely return sin(x)=tan(x)=x,cos(x)=1). |
| 43 | */ |
Szabolcs Nagy | 0d51c04 | 2018-04-25 13:26:22 +0100 | [diff] [blame] | 44 | static __inline float __mathlib_rredf(float x, int *q) |
George Lander | da55ef9 | 2015-11-19 12:05:06 +0000 | [diff] [blame] | 45 | { |
| 46 | /* |
| 47 | * First, extract the bit pattern of x as an integer, so that we |
| 48 | * can repeatedly compare things to it without multiple |
| 49 | * overheads in retrieving comparison results from the VFP. |
| 50 | */ |
| 51 | unsigned k = fai(x); |
| 52 | |
| 53 | /* |
| 54 | * Deal immediately with the simplest possible case, in which x |
| 55 | * is already within the interval [-pi/4,pi/4]. This also |
| 56 | * identifies the subcase of ludicrously small x. |
| 57 | */ |
| 58 | if ((k << 1) < (0x3f490fdb << 1)) { |
| 59 | if ((k << 1) < (0x39800000 << 1)) |
| 60 | *q = -1; |
| 61 | else |
| 62 | *q = 0; |
| 63 | return x; |
| 64 | } |
| 65 | |
| 66 | /* |
| 67 | * The next plan is to multiply x by 2/pi and convert to an |
| 68 | * integer, which gives us n; then we subtract n*pi/2 from x to |
| 69 | * get our output value. |
| 70 | * |
| 71 | * By representing pi/2 in that final step by a prec-and-a-half |
| 72 | * approximation, we can arrange good accuracy for n strictly |
| 73 | * less than 2^13 (so that an FP representation of n has twelve |
| 74 | * zero bits at the bottom). So our threshold for this strategy |
| 75 | * is 2^13 * pi/2 - pi/4, otherwise known as 8191.75 * pi/2 or |
| 76 | * 4095.875*pi. (Or, for those perverse people interested in |
| 77 | * actual numbers rather than multiples of pi/2, about 12867.5.) |
| 78 | */ |
| 79 | if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1)) { |
| 80 | float nf = 0.636619772367581343f * x; |
| 81 | /* |
| 82 | * The difference between that single-precision constant and |
| 83 | * the real 2/pi is about 2.568e-8. Hence the product nf has a |
| 84 | * potential error of 2.568e-8|x| even before rounding; since |
| 85 | * |x| < 4096 pi, that gives us an error bound of about |
| 86 | * 0.0003305. |
| 87 | * |
| 88 | * nf is then rounded to single precision, with a max error of |
| 89 | * 1/2 ULP, and since nf goes up to just under 8192, half a |
| 90 | * ULP could be as big as 2^-12 ~= 0.0002441. |
| 91 | * |
| 92 | * So by the time we convert nf to an integer, it could be off |
| 93 | * by that much, causing the wrong integer to be selected, and |
| 94 | * causing us to return a value a little bit outside the |
| 95 | * theoretical [-pi/4,+pi/4] output interval. |
| 96 | * |
| 97 | * How much outside? Well, we subtract nf*pi/2 from x, so the |
| 98 | * error bounds above have be be multiplied by pi/2. And if |
| 99 | * both of the above sources of error suffer their worst cases |
| 100 | * at once, then the very largest value we could return is |
| 101 | * obtained by adding that lot to the interval bound pi/4 to |
| 102 | * get |
| 103 | * |
| 104 | * pi/4 + ((2/pi - 0f_3f22f983)*4096*pi + 2^-12) * pi/2 |
| 105 | * |
| 106 | * which comes to 0f_3f494b02. (Compare 0f_3f490fdb = pi/4.) |
| 107 | * |
| 108 | * So callers of this range reducer should be prepared to |
| 109 | * handle numbers up to that large. |
| 110 | */ |
| 111 | #ifdef __TARGET_FPU_SOFTVFP |
| 112 | nf = _frnd(nf); |
| 113 | #else |
| 114 | if (k & 0x80000000) |
| 115 | nf = (nf - 8388608.0f) + 8388608.0f; |
| 116 | else |
| 117 | nf = (nf + 8388608.0f) - 8388608.0f; /* round to _nearest_ integer. FIXME: use some sort of frnd in softfp */ |
| 118 | #endif |
| 119 | *q = 3 & (int)nf; |
| 120 | #if 0 |
| 121 | /* |
| 122 | * FIXME: now I need a bunch of special cases to avoid |
| 123 | * having to do the full four-word reduction every time. |
| 124 | * Also, adjust the comment at the top of this section! |
| 125 | */ |
| 126 | if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1)) |
| 127 | return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.4442d2p-24F; |
| 128 | else |
| 129 | #endif |
| 130 | return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.444p-24F - nf * 0x1.68c234p-39F; |
| 131 | } |
| 132 | |
| 133 | /* |
| 134 | * That's enough to do in-line; if we're still playing, hand off |
| 135 | * to the out-of-line main range reducer. |
| 136 | */ |
Szabolcs Nagy | 0d51c04 | 2018-04-25 13:26:22 +0100 | [diff] [blame] | 137 | return __mathlib_rredf2(x, q, k); |
George Lander | da55ef9 | 2015-11-19 12:05:06 +0000 | [diff] [blame] | 138 | } |
| 139 | |
| 140 | #ifdef __cplusplus |
| 141 | } /* end of extern "C" */ |
| 142 | #endif /* __cplusplus */ |
| 143 | |
| 144 | #endif /* included */ |
| 145 | |
| 146 | /* end of rredf.h */ |