Blame - math/sincosf.h - platform_external_arm-optimized-routines

blob: 1e80fc9ba8e19cab265fc98ec325c9b3f17a998d [file] [log] [blame]

Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	1	/*
				2	* Header for sinf, cosf and sincosf.
				3	*
				4	* Copyright (c) 2018, Arm Limited.
Szabolcs Nagy	11253b0	2018-11-12 11:10:57 +0000	[diff] [blame]	5	* SPDX-License-Identifier: MIT
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	6	*/
				7
				8	#include <stdint.h>
				9	#include <math.h>
				10	#include "math_config.h"
				11
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	12	/* 2PI * 2^-64. */
				13	static const double pi63 = 0x1.921FB54442D18p-62;
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	14	/* PI / 4. */
				15	static const double pio4 = 0x1.921FB54442D18p-1;
				16
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	17	/* The constants and polynomials for sine and cosine. */
Wilco Dijkstra	3262ef2	2018-07-04 17:45:15 +0100	[diff] [blame]	18	typedef struct
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	19	{
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	20	double sign[4]; /* Sign of sine in quadrants 0..3. */
				21	double hpi_inv; /* 2 / PI ( * 2^24 if !TOINT_INTRINSICS). */
				22	double hpi; /* PI / 2. */
				23	double c0, c1, c2, c3, c4; /* Cosine polynomial. */
				24	double s1, s2, s3; /* Sine polynomial. */
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	25	} sincos_t;
				26
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	27	/* Polynomial data (the cosine polynomial is negated in the 2nd entry). */
Wilco Dijkstra	3262ef2	2018-07-04 17:45:15 +0100	[diff] [blame]	28	extern const sincos_t __sincosf_table[2] HIDDEN;
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	29
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	30	/* Table with 4/PI to 192 bit precision. */
Wilco Dijkstra	3262ef2	2018-07-04 17:45:15 +0100	[diff] [blame]	31	extern const uint32_t __inv_pio4[] HIDDEN;
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	32
Szabolcs Nagy	fce0997	2018-07-09 17:36:39 +0100	[diff] [blame]	33	/* Top 12 bits of the float representation with the sign bit cleared. */
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	34	static inline uint32_t
				35	abstop12 (float x)
				36	{
				37	return (asuint (x) >> 20) & 0x7ff;
				38	}
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	39
				40	/* Compute the sine and cosine of inputs X and X2 (X squared), using the
				41	polynomial P and store the results in SINP and COSP. N is the quadrant,
				42	if odd the cosine and sine polynomials are swapped. */
				43	static inline void
Wilco Dijkstra	3262ef2	2018-07-04 17:45:15 +0100	[diff] [blame]	44	sincosf_poly (double x, double x2, const sincos_t p, int n, float sinp,
				45	float *cosp)
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	46	{
				47	double x3, x4, x5, x6, s, c, c1, c2, s1;
				48
				49	x4 = x2 * x2;
				50	x3 = x2 * x;
				51	c2 = p->c3 + x2 * p->c4;
				52	s1 = p->s2 + x2 * p->s3;
				53
				54	/* Swap sin/cos result based on quadrant. */
				55	float *tmp = (n & 1 ? cosp : sinp);
				56	cosp = (n & 1 ? sinp : cosp);
				57	sinp = tmp;
				58
				59	c1 = p->c0 + x2 * p->c1;
				60	x5 = x3 * x2;
				61	x6 = x4 * x2;
				62
				63	s = x + x3 * p->s1;
				64	c = c1 + x4 * p->c2;
				65
				66	sinp = s + x5 s1;
				67	cosp = c + x6 c2;
				68	}
				69
				70	/* Return the sine of inputs X and X2 (X squared) using the polynomial P.
				71	N is the quadrant, and if odd the cosine polynomial is used. */
				72	static inline float
Wilco Dijkstra	3262ef2	2018-07-04 17:45:15 +0100	[diff] [blame]	73	sinf_poly (double x, double x2, const sincos_t *p, int n)
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	74	{
				75	double x3, x4, x6, x7, s, c, c1, c2, s1;
				76
				77	if ((n & 1) == 0)
				78	{
				79	x3 = x * x2;
				80	s1 = p->s2 + x2 * p->s3;
				81
				82	x7 = x3 * x2;
				83	s = x + x3 * p->s1;
				84
				85	return s + x7 * s1;
				86	}
				87	else
				88	{
				89	x4 = x2 * x2;
				90	c2 = p->c3 + x2 * p->c4;
				91	c1 = p->c0 + x2 * p->c1;
				92
				93	x6 = x4 * x2;
				94	c = c1 + x4 * p->c2;
				95
				96	return c + x6 * c2;
				97	}
				98	}
				99
				100	/* Fast range reduction using single multiply-subtract. Return the modulo of
				101	X as a value between -PI/4 and PI/4 and store the quadrant in NP.
				102	The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double
				103	is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	104	the result is accurate for \|X\| <= 120.0. */
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	105	static inline double
Wilco Dijkstra	3262ef2	2018-07-04 17:45:15 +0100	[diff] [blame]	106	reduce_fast (double x, const sincos_t p, int np)
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	107	{
				108	double r;
				109	#if TOINT_INTRINSICS
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	110	/* Use fast round and lround instructions when available. */
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	111	r = x * p->hpi_inv;
				112	*np = converttoint (r);
				113	return x - roundtoint (r) * p->hpi;
				114	#else
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	115	/* Use scaled float to int conversion with explicit rounding.
				116	hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
				117	This avoids inaccuracies introduced by truncating negative values. */
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	118	r = x * p->hpi_inv;
Wilco Dijkstra	f3af42d	2018-06-20 15:18:38 +0100	[diff] [blame]	119	int n = ((int32_t)r + 0x800000) >> 24;
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	120	*np = n;
				121	return x - n * p->hpi;
				122	#endif
				123	}
				124
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	125	/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	126	XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
				127	Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
				128	Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
				129	multiply computes the exact 2.62-bit fixed-point modulo. Since the result
				130	can have at most 29 leading zeros after the binary point, the double
				131	precision result is accurate to 33 bits. */
				132	static inline double
				133	reduce_large (uint32_t xi, int *np)
				134	{
Wilco Dijkstra	3262ef2	2018-07-04 17:45:15 +0100	[diff] [blame]	135	const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	136	int shift = (xi >> 23) & 7;
				137	uint64_t n, res0, res1, res2;
				138
				139	xi = (xi & 0xffffff) \| 0x800000;
				140	xi <<= shift;
				141
				142	res0 = xi * arr[0];
				143	res1 = (uint64_t)xi * arr[4];
				144	res2 = (uint64_t)xi * arr[8];
				145	res0 = (res2 >> 32) \| (res0 << 32);
				146	res0 += res1;
				147
				148	n = (res0 + (1ULL << 61)) >> 62;
				149	res0 -= n << 62;
				150	double x = (int64_t)res0;
				151	*np = n;
Wilco Dijkstra	b2fc989	2018-08-08 15:03:29 +0100	[diff] [blame]	152	return x * pi63;
Wilco Dijkstra	269dc16	2018-05-16 15:39:22 +0100	[diff] [blame]	153	}