Blame - math/single/rredf.h - platform_external_arm-optimized-routines

blob: 41d3a0c97d894919ca0e30d057cafd490036ed7f [file] [log] [blame]

Szabolcs Nagy	1b94597	2018-05-14 14:46:40 +0100	[diff] [blame]	1	/*
				2	* rredf.h - trigonometric range reduction function written new for RVCT 4.1
George Lander	da55ef9	2015-11-19 12:05:06 +0000	[diff] [blame]	3	*
Szabolcs Nagy	11253b0	2018-11-12 11:10:57 +0000	[diff] [blame]	4	* Copyright (c) 2009-2018, Arm Limited.
				5	* SPDX-License-Identifier: MIT
George Lander	da55ef9	2015-11-19 12:05:06 +0000	[diff] [blame]	6	*/
				7
				8	/*
				9	* This header file defines an inline function which all three of
				10	* the single-precision trig functions (sinf, cosf, tanf) should use
				11	* to perform range reduction. The inline function handles the
				12	* quickest and most common cases inline, before handing off to an
				13	* out-of-line function defined in rredf.c for everything else. Thus
				14	* a reasonable compromise is struck between speed and space. (I
				15	* hope.) In particular, this approach avoids a function call
				16	* overhead in the common case.
				17	*/
				18
				19	#ifndef _included_rredf_h
				20	#define _included_rredf_h
				21
				22	#include "math_private.h"
				23
				24	#ifdef __cplusplus
				25	extern "C" {
				26	#endif /* __cplusplus */
				27
Szabolcs Nagy	0d51c04	2018-04-25 13:26:22 +0100	[diff] [blame]	28	extern float __mathlib_rredf2(float x, int *q, unsigned k);
George Lander	da55ef9	2015-11-19 12:05:06 +0000	[diff] [blame]	29
				30	/*
				31	* Semantics of the function:
				32	* - x is the single-precision input value provided by the user
				33	* - the return value is in the range [-pi/4,pi/4], and is equal
				34	* (within reasonable accuracy bounds) to x minus n*pi/2 for some
				35	* integer n. (FIXME: perhaps some slippage on the output
				36	* interval is acceptable, requiring more range from the
				37	* following polynomial approximations but permitting more
				38	* approximate rred decisions?)
				39	* - *q is set to a positive value whose low two bits match those
				40	* of n. Alternatively, it comes back as -1 indicating that the
				41	* input value was trivial in some way (infinity, NaN, or so
				42	* small that we can safely return sin(x)=tan(x)=x,cos(x)=1).
				43	*/
Szabolcs Nagy	0d51c04	2018-04-25 13:26:22 +0100	[diff] [blame]	44	static __inline float __mathlib_rredf(float x, int *q)
George Lander	da55ef9	2015-11-19 12:05:06 +0000	[diff] [blame]	45	{
				46	/*
				47	* First, extract the bit pattern of x as an integer, so that we
				48	* can repeatedly compare things to it without multiple
				49	* overheads in retrieving comparison results from the VFP.
				50	*/
				51	unsigned k = fai(x);
				52
				53	/*
				54	* Deal immediately with the simplest possible case, in which x
				55	* is already within the interval [-pi/4,pi/4]. This also
				56	* identifies the subcase of ludicrously small x.
				57	*/
				58	if ((k << 1) < (0x3f490fdb << 1)) {
				59	if ((k << 1) < (0x39800000 << 1))
				60	*q = -1;
				61	else
				62	*q = 0;
				63	return x;
				64	}
				65
				66	/*
				67	* The next plan is to multiply x by 2/pi and convert to an
				68	* integer, which gives us n; then we subtract n*pi/2 from x to
				69	* get our output value.
				70	*
				71	* By representing pi/2 in that final step by a prec-and-a-half
				72	* approximation, we can arrange good accuracy for n strictly
				73	* less than 2^13 (so that an FP representation of n has twelve
				74	* zero bits at the bottom). So our threshold for this strategy
				75	* is 2^13 * pi/2 - pi/4, otherwise known as 8191.75 * pi/2 or
				76	* 4095.875*pi. (Or, for those perverse people interested in
				77	* actual numbers rather than multiples of pi/2, about 12867.5.)
				78	*/
				79	if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1)) {
				80	float nf = 0.636619772367581343f * x;
				81	/*
				82	* The difference between that single-precision constant and
				83	* the real 2/pi is about 2.568e-8. Hence the product nf has a
				84	* potential error of 2.568e-8\|x\| even before rounding; since
				85	* \|x\| < 4096 pi, that gives us an error bound of about
				86	* 0.0003305.
				87	*
				88	* nf is then rounded to single precision, with a max error of
				89	* 1/2 ULP, and since nf goes up to just under 8192, half a
				90	* ULP could be as big as 2^-12 ~= 0.0002441.
				91	*
				92	* So by the time we convert nf to an integer, it could be off
				93	* by that much, causing the wrong integer to be selected, and
				94	* causing us to return a value a little bit outside the
				95	* theoretical [-pi/4,+pi/4] output interval.
				96	*
				97	* How much outside? Well, we subtract nf*pi/2 from x, so the
				98	* error bounds above have be be multiplied by pi/2. And if
				99	* both of the above sources of error suffer their worst cases
				100	* at once, then the very largest value we could return is
				101	* obtained by adding that lot to the interval bound pi/4 to
				102	* get
				103	*
				104	* pi/4 + ((2/pi - 0f_3f22f983)4096pi + 2^-12) * pi/2
				105	*
				106	* which comes to 0f_3f494b02. (Compare 0f_3f490fdb = pi/4.)
				107	*
				108	* So callers of this range reducer should be prepared to
				109	* handle numbers up to that large.
				110	*/
				111	#ifdef __TARGET_FPU_SOFTVFP
				112	nf = _frnd(nf);
				113	#else
				114	if (k & 0x80000000)
				115	nf = (nf - 8388608.0f) + 8388608.0f;
				116	else
				117	nf = (nf + 8388608.0f) - 8388608.0f; /* round to _nearest_ integer. FIXME: use some sort of frnd in softfp */
				118	#endif
				119	*q = 3 & (int)nf;
				120	#if 0
				121	/*
				122	* FIXME: now I need a bunch of special cases to avoid
				123	* having to do the full four-word reduction every time.
				124	* Also, adjust the comment at the top of this section!
				125	*/
				126	if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1))
				127	return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.4442d2p-24F;
				128	else
				129	#endif
				130	return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.444p-24F - nf * 0x1.68c234p-39F;
				131	}
				132
				133	/*
				134	* That's enough to do in-line; if we're still playing, hand off
				135	* to the out-of-line main range reducer.
				136	*/
Szabolcs Nagy	0d51c04	2018-04-25 13:26:22 +0100	[diff] [blame]	137	return __mathlib_rredf2(x, q, k);
George Lander	da55ef9	2015-11-19 12:05:06 +0000	[diff] [blame]	138	}
				139
				140	#ifdef __cplusplus
				141	} /* end of extern "C" */
				142	#endif /* __cplusplus */
				143
				144	#endif /* included */
				145
				146	/* end of rredf.h */