Blame - libm/src/s_fma.c - platform_bionic

blob: 1fcc26c596cf24ee96acb03b0ebe0dd7696577e2 [file] [log] [blame]

The Android Open Source Project	1dc9e47	2009-03-03 19:28:35 -0800	[diff] [blame]	1	/*-
				2	* Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
				3	* All rights reserved.
				4	*
				5	* Redistribution and use in source and binary forms, with or without
				6	* modification, are permitted provided that the following conditions
				7	* are met:
				8	* 1. Redistributions of source code must retain the above copyright
				9	* notice, this list of conditions and the following disclaimer.
				10	* 2. Redistributions in binary form must reproduce the above copyright
				11	* notice, this list of conditions and the following disclaimer in the
				12	* documentation and/or other materials provided with the distribution.
				13	*
				14	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
				15	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				17	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
				18	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				19	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
				20	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
				21	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
				22	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
				23	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
				24	* SUCH DAMAGE.
				25	*/
				26
				27	#include <sys/cdefs.h>
				28	/* __FBSDID("$FreeBSD: src/lib/msun/src/s_fma.c,v 1.4 2005/03/18 02:27:59 das Exp $"); */
				29
				30	#include <fenv.h>
				31	#include <float.h>
				32	#include <math.h>
				33
				34	/*
				35	* Fused multiply-add: Compute x * y + z with a single rounding error.
				36	*
				37	* We use scaling to avoid overflow/underflow, along with the
				38	* canonical precision-doubling technique adapted from:
				39	*
				40	* Dekker, T. A Floating-Point Technique for Extending the
				41	* Available Precision. Numer. Math. 18, 224-242 (1971).
				42	*
				43	* This algorithm is sensitive to the rounding precision. FPUs such
				44	* as the i387 must be set in double-precision mode if variables are
				45	* to be stored in FP registers in order to avoid incorrect results.
				46	* This is the default on FreeBSD, but not on many other systems.
				47	*
				48	* Hardware instructions should be used on architectures that support it,
				49	* since this implementation will likely be several times slower.
				50	*/
				51	#if LDBL_MANT_DIG != 113
				52	double
				53	fma(double x, double y, double z)
				54	{
				55	static const double split = 0x1p27 + 1.0;
				56	double xs, ys, zs;
				57	double c, cc, hx, hy, p, q, tx, ty;
				58	double r, rr, s;
				59	int oround;
				60	int ex, ey, ez;
				61	int spread;
				62
				63	if (z == 0.0)
				64	return (x * y);
				65	if (x == 0.0 \|\| y == 0.0)
				66	return (x * y + z);
				67
				68	/* Results of frexp() are undefined for these cases. */
				69	if (!isfinite(x) \|\| !isfinite(y) \|\| !isfinite(z))
				70	return (x * y + z);
				71
				72	xs = frexp(x, &ex);
				73	ys = frexp(y, &ey);
				74	zs = frexp(z, &ez);
				75	oround = fegetround();
				76	spread = ex + ey - ez;
				77
				78	/*
				79	* If x * y and z are many orders of magnitude apart, the scaling
				80	* will overflow, so we handle these cases specially. Rounding
				81	* modes other than FE_TONEAREST are painful.
				82	*/
				83	if (spread > DBL_MANT_DIG * 2) {
				84	fenv_t env;
				85	feraiseexcept(FE_INEXACT);
				86	switch(oround) {
				87	case FE_TONEAREST:
				88	return (x * y);
				89	case FE_TOWARDZERO:
				90	if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
				91	return (x * y);
				92	feholdexcept(&env);
				93	r = x * y;
				94	if (!fetestexcept(FE_INEXACT))
				95	r = nextafter(r, 0);
				96	feupdateenv(&env);
				97	return (r);
				98	case FE_DOWNWARD:
				99	if (z > 0.0)
				100	return (x * y);
				101	feholdexcept(&env);
				102	r = x * y;
				103	if (!fetestexcept(FE_INEXACT))
				104	r = nextafter(r, -INFINITY);
				105	feupdateenv(&env);
				106	return (r);
				107	default: /* FE_UPWARD */
				108	if (z < 0.0)
				109	return (x * y);
				110	feholdexcept(&env);
				111	r = x * y;
				112	if (!fetestexcept(FE_INEXACT))
				113	r = nextafter(r, INFINITY);
				114	feupdateenv(&env);
				115	return (r);
				116	}
				117	}
				118	if (spread < -DBL_MANT_DIG) {
				119	feraiseexcept(FE_INEXACT);
				120	if (!isnormal(z))
				121	feraiseexcept(FE_UNDERFLOW);
				122	switch (oround) {
				123	case FE_TONEAREST:
				124	return (z);
				125	case FE_TOWARDZERO:
				126	if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
				127	return (z);
				128	else
				129	return (nextafter(z, 0));
				130	case FE_DOWNWARD:
				131	if (x > 0.0 ^ y < 0.0)
				132	return (z);
				133	else
				134	return (nextafter(z, -INFINITY));
				135	default: /* FE_UPWARD */
				136	if (x > 0.0 ^ y < 0.0)
				137	return (nextafter(z, INFINITY));
				138	else
				139	return (z);
				140	}
				141	}
				142
				143	/*
				144	* Use Dekker's algorithm to perform the multiplication and
				145	* subsequent addition in twice the machine precision.
				146	* Arrange so that x * y = c + cc, and x * y + z = r + rr.
				147	*/
				148	fesetround(FE_TONEAREST);
				149
				150	p = xs * split;
				151	hx = xs - p;
				152	hx += p;
				153	tx = xs - hx;
				154
				155	p = ys * split;
				156	hy = ys - p;
				157	hy += p;
				158	ty = ys - hy;
				159
				160	p = hx * hy;
				161	q = hx * ty + tx * hy;
				162	c = p + q;
				163	cc = p - c + q + tx * ty;
				164
				165	zs = ldexp(zs, -spread);
				166	r = c + zs;
				167	s = r - c;
				168	rr = (c - (r - s)) + (zs - s) + cc;
				169
				170	spread = ex + ey;
				171	if (spread + ilogb(r) > -1023) {
				172	fesetround(oround);
				173	r = r + rr;
				174	} else {
				175	/*
				176	* The result is subnormal, so we round before scaling to
				177	* avoid double rounding.
				178	*/
				179	p = ldexp(copysign(0x1p-1022, r), -spread);
				180	c = r + p;
				181	s = c - r;
				182	cc = (r - (c - s)) + (p - s) + rr;
				183	fesetround(oround);
				184	r = (c + cc) - p;
				185	}
				186	return (ldexp(r, spread));
				187	}
				188	#else /* LDBL_MANT_DIG == 113 */
				189	/*
				190	* 113 bits of precision is more than twice the precision of a double,
				191	* so it is enough to represent the intermediate product exactly.
				192	*/
				193	double
				194	fma(double x, double y, double z)
				195	{
				196	return ((long double)x * y + z);
				197	}
				198	#endif /* LDBL_MANT_DIG != 113 */
				199
				200	#if (LDBL_MANT_DIG == 53)
				201	__weak_reference(fma, fmal);
				202	#endif