math/single/rredf.h - platform_external_arm-optimized-routines - Gitiles

 /*
  * rredf.h - trigonometric range reduction function written new for RVCT 4.1
  *
  * Copyright (c) 2009-2018, Arm Limited.
  * SPDX-License-Identifier: MIT
  */

 /*
  * This header file defines an inline function which all three of
  * the single-precision trig functions (sinf, cosf, tanf) should use
  * to perform range reduction. The inline function handles the
  * quickest and most common cases inline, before handing off to an
  * out-of-line function defined in rredf.c for everything else. Thus
  * a reasonable compromise is struck between speed and space. (I
  * hope.) In particular, this approach avoids a function call
  * overhead in the common case.
  */

 #ifndef _included_rredf_h
 #define _included_rredf_h

 #include "math_private.h"

 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */

 extern float __mathlib_rredf2(float x, int *q, unsigned k);

 /*
  * Semantics of the function:
  *  - x is the single-precision input value provided by the user
  *  - the return value is in the range [-pi/4,pi/4], and is equal
  *    (within reasonable accuracy bounds) to x minus n*pi/2 for some
  *    integer n. (FIXME: perhaps some slippage on the output
  *    interval is acceptable, requiring more range from the
  *    following polynomial approximations but permitting more
  *    approximate rred decisions?)
  *  - *q is set to a positive value whose low two bits match those
  *    of n. Alternatively, it comes back as -1 indicating that the
  *    input value was trivial in some way (infinity, NaN, or so
  *    small that we can safely return sin(x)=tan(x)=x,cos(x)=1).
  */
 static __inline float __mathlib_rredf(float x, int *q)
 {
     /*
      * First, extract the bit pattern of x as an integer, so that we
      * can repeatedly compare things to it without multiple
      * overheads in retrieving comparison results from the VFP.
      */
     unsigned k = fai(x);

     /*
      * Deal immediately with the simplest possible case, in which x
      * is already within the interval [-pi/4,pi/4]. This also
      * identifies the subcase of ludicrously small x.
      */
     if ((k << 1) < (0x3f490fdb << 1)) {
         if ((k << 1) < (0x39800000 << 1))
             *q = -1;
         else
             *q = 0;
         return x;
     }

     /*
      * The next plan is to multiply x by 2/pi and convert to an
      * integer, which gives us n; then we subtract n*pi/2 from x to
      * get our output value.
      *
      * By representing pi/2 in that final step by a prec-and-a-half
      * approximation, we can arrange good accuracy for n strictly
      * less than 2^13 (so that an FP representation of n has twelve
      * zero bits at the bottom). So our threshold for this strategy
      * is 2^13 * pi/2 - pi/4, otherwise known as 8191.75 * pi/2 or
      * 4095.875*pi. (Or, for those perverse people interested in
      * actual numbers rather than multiples of pi/2, about 12867.5.)
      */
     if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1)) {
         float nf = 0.636619772367581343f * x;
         /*
          * The difference between that single-precision constant and
          * the real 2/pi is about 2.568e-8. Hence the product nf has a
          * potential error of 2.568e-8|x| even before rounding; since
          * |x| < 4096 pi, that gives us an error bound of about
          * 0.0003305.
          *
          * nf is then rounded to single precision, with a max error of
          * 1/2 ULP, and since nf goes up to just under 8192, half a
          * ULP could be as big as 2^-12 ~= 0.0002441.
          *
          * So by the time we convert nf to an integer, it could be off
          * by that much, causing the wrong integer to be selected, and
          * causing us to return a value a little bit outside the
          * theoretical [-pi/4,+pi/4] output interval.
          *
          * How much outside? Well, we subtract nf*pi/2 from x, so the
          * error bounds above have be be multiplied by pi/2. And if
          * both of the above sources of error suffer their worst cases
          * at once, then the very largest value we could return is
          * obtained by adding that lot to the interval bound pi/4 to
          * get
          *
          *    pi/4 + ((2/pi - 0f_3f22f983)*4096*pi + 2^-12) * pi/2
          *
          * which comes to 0f_3f494b02. (Compare 0f_3f490fdb = pi/4.)
          *
          * So callers of this range reducer should be prepared to
          * handle numbers up to that large.
          */
 #ifdef __TARGET_FPU_SOFTVFP
         nf = _frnd(nf);
 #else
         if (k & 0x80000000)
             nf = (nf - 8388608.0f) + 8388608.0f;
         else
             nf = (nf + 8388608.0f) - 8388608.0f;   /* round to _nearest_ integer. FIXME: use some sort of frnd in softfp */
 #endif
         *q = 3 & (int)nf;
 #if 0
         /*
          * FIXME: now I need a bunch of special cases to avoid
          * having to do the full four-word reduction every time.
          * Also, adjust the comment at the top of this section!
          */
         if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1))
             return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.4442d2p-24F;
         else
 #endif
             return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.444p-24F - nf * 0x1.68c234p-39F;
     }

     /*
      * That's enough to do in-line; if we're still playing, hand off
      * to the out-of-line main range reducer.
      */
     return __mathlib_rredf2(x, q, k);
 }

 #ifdef __cplusplus
 } /* end of extern "C" */
 #endif /* __cplusplus */

 #endif /* included */

 /* end of rredf.h */
	/*
	* rredf.h - trigonometric range reduction function written new for RVCT 4.1
	*
	* Copyright (c) 2009-2018, Arm Limited.
	* SPDX-License-Identifier: MIT
	*/

	/*
	* This header file defines an inline function which all three of
	* the single-precision trig functions (sinf, cosf, tanf) should use
	* to perform range reduction. The inline function handles the
	* quickest and most common cases inline, before handing off to an
	* out-of-line function defined in rredf.c for everything else. Thus
	* a reasonable compromise is struck between speed and space. (I
	* hope.) In particular, this approach avoids a function call
	* overhead in the common case.
	*/

	#ifndef _included_rredf_h
	#define _included_rredf_h

	#include "math_private.h"

	#ifdef __cplusplus
	extern "C" {
	#endif /* __cplusplus */

	extern float __mathlib_rredf2(float x, int *q, unsigned k);

	/*
	* Semantics of the function:
	* - x is the single-precision input value provided by the user
	* - the return value is in the range [-pi/4,pi/4], and is equal
	* (within reasonable accuracy bounds) to x minus n*pi/2 for some
	* integer n. (FIXME: perhaps some slippage on the output
	* interval is acceptable, requiring more range from the
	* following polynomial approximations but permitting more
	* approximate rred decisions?)
	* - *q is set to a positive value whose low two bits match those
	* of n. Alternatively, it comes back as -1 indicating that the
	* input value was trivial in some way (infinity, NaN, or so
	* small that we can safely return sin(x)=tan(x)=x,cos(x)=1).
	*/
	static __inline float __mathlib_rredf(float x, int *q)
	{
	/*
	* First, extract the bit pattern of x as an integer, so that we
	* can repeatedly compare things to it without multiple
	* overheads in retrieving comparison results from the VFP.
	*/
	unsigned k = fai(x);

	/*
	* Deal immediately with the simplest possible case, in which x
	* is already within the interval [-pi/4,pi/4]. This also
	* identifies the subcase of ludicrously small x.
	*/
	if ((k << 1) < (0x3f490fdb << 1)) {
	if ((k << 1) < (0x39800000 << 1))
	*q = -1;
	else
	*q = 0;
	return x;
	}

	/*
	* The next plan is to multiply x by 2/pi and convert to an
	* integer, which gives us n; then we subtract n*pi/2 from x to
	* get our output value.
	*
	* By representing pi/2 in that final step by a prec-and-a-half
	* approximation, we can arrange good accuracy for n strictly
	* less than 2^13 (so that an FP representation of n has twelve
	* zero bits at the bottom). So our threshold for this strategy
	* is 2^13 * pi/2 - pi/4, otherwise known as 8191.75 * pi/2 or
	* 4095.875*pi. (Or, for those perverse people interested in
	* actual numbers rather than multiples of pi/2, about 12867.5.)
	*/
	if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1)) {
	float nf = 0.636619772367581343f * x;
	/*
	* The difference between that single-precision constant and
	* the real 2/pi is about 2.568e-8. Hence the product nf has a
	* potential error of 2.568e-8\|x\| even before rounding; since
	* \|x\| < 4096 pi, that gives us an error bound of about
	* 0.0003305.
	*
	* nf is then rounded to single precision, with a max error of
	* 1/2 ULP, and since nf goes up to just under 8192, half a
	* ULP could be as big as 2^-12 ~= 0.0002441.
	*
	* So by the time we convert nf to an integer, it could be off
	* by that much, causing the wrong integer to be selected, and
	* causing us to return a value a little bit outside the
	* theoretical [-pi/4,+pi/4] output interval.
	*
	* How much outside? Well, we subtract nf*pi/2 from x, so the
	* error bounds above have be be multiplied by pi/2. And if
	* both of the above sources of error suffer their worst cases
	* at once, then the very largest value we could return is
	* obtained by adding that lot to the interval bound pi/4 to
	* get
	*
	* pi/4 + ((2/pi - 0f_3f22f983)4096pi + 2^-12) * pi/2
	*
	* which comes to 0f_3f494b02. (Compare 0f_3f490fdb = pi/4.)
	*
	* So callers of this range reducer should be prepared to
	* handle numbers up to that large.
	*/
	#ifdef __TARGET_FPU_SOFTVFP
	nf = _frnd(nf);
	#else
	if (k & 0x80000000)
	nf = (nf - 8388608.0f) + 8388608.0f;
	else
	nf = (nf + 8388608.0f) - 8388608.0f; /* round to _nearest_ integer. FIXME: use some sort of frnd in softfp */
	#endif
	*q = 3 & (int)nf;
	#if 0
	/*
	* FIXME: now I need a bunch of special cases to avoid
	* having to do the full four-word reduction every time.
	* Also, adjust the comment at the top of this section!
	*/
	if (__builtin_expect((k & 0x7fffffff) < 0x46490e49, 1))
	return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.4442d2p-24F;
	else
	#endif
	return ((x - nf * 0x1.92p+0F) - nf * 0x1.fb4p-12F) - nf * 0x1.444p-24F - nf * 0x1.68c234p-39F;
	}

	/*
	* That's enough to do in-line; if we're still playing, hand off
	* to the out-of-line main range reducer.
	*/
	return __mathlib_rredf2(x, q, k);
	}

	#ifdef __cplusplus
	} /* end of extern "C" */
	#endif /* __cplusplus */

	#endif /* included */

	/* end of rredf.h */