Blame - math/v_math.h - platform_external_arm-optimized-routines

blob: 3db22e5b3c122e463f309168a5f659c25a18015e [file] [log] [blame]

Szabolcs Nagy	7a1f4cf	2019-07-18 12:51:41 +0100	[diff] [blame]	1	/*
				2	* Vector math abstractions.
				3	*
				4	* Copyright (c) 2019, Arm Limited.
				5	* SPDX-License-Identifier: MIT
				6	*/
				7
				8	#ifndef _V_MATH_H
				9	#define _V_MATH_H
				10
Szabolcs Nagy	1f3b163	2019-11-06 19:41:30 +0000	[diff] [blame]	11	#ifndef WANT_VMATH
				12	/* Enable the build of vector math code. */
				13	# define WANT_VMATH 1
				14	#endif
				15	#if WANT_VMATH
				16
Szabolcs Nagy	7a1f4cf	2019-07-18 12:51:41 +0100	[diff] [blame]	17	/* The goal of this header is to allow vector and scalar
				18	build of the same algorithm, the provided intrinsic
				19	wrappers are also vector length agnostic so they can
				20	be implemented for SVE too (or other simd architectures)
				21	and then the code should work on those targets too. */
				22
				23	#if SCALAR
				24	#define V_NAME(x) __s_##x
				25	#elif VPCS && __aarch64__
				26	#define V_NAME(x) __vn_##x
				27	#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
				28	#else
				29	#define V_NAME(x) __v_##x
				30	#endif
				31
				32	#ifndef VPCS_ATTR
				33	#define VPCS_ATTR
				34	#endif
				35	#ifndef VPCS_ALIAS
				36	#define VPCS_ALIAS
				37	#endif
				38
				39	#include <stdint.h>
				40	#include "math_config.h"
				41
				42	typedef float f32_t;
				43	typedef uint32_t u32_t;
				44	typedef int32_t s32_t;
				45	typedef double f64_t;
				46	typedef uint64_t u64_t;
				47	typedef int64_t s64_t;
				48
				49	/* reinterpret as type1 from type2. */
				50	static inline u32_t
				51	as_u32_f32 (f32_t x)
				52	{
				53	union { f32_t f; u32_t u; } r = {x};
				54	return r.u;
				55	}
				56	static inline f32_t
				57	as_f32_u32 (u32_t x)
				58	{
				59	union { u32_t u; f32_t f; } r = {x};
				60	return r.f;
				61	}
				62	static inline s32_t
				63	as_s32_u32 (u32_t x)
				64	{
				65	union { u32_t u; s32_t i; } r = {x};
				66	return r.i;
				67	}
				68	static inline u32_t
				69	as_u32_s32 (s32_t x)
				70	{
				71	union { s32_t i; u32_t u; } r = {x};
				72	return r.u;
				73	}
				74	static inline u64_t
				75	as_u64_f64 (f64_t x)
				76	{
				77	union { f64_t f; u64_t u; } r = {x};
				78	return r.u;
				79	}
				80	static inline f64_t
				81	as_f64_u64 (u64_t x)
				82	{
				83	union { u64_t u; f64_t f; } r = {x};
				84	return r.f;
				85	}
				86	static inline s64_t
				87	as_s64_u64 (u64_t x)
				88	{
				89	union { u64_t u; s64_t i; } r = {x};
				90	return r.i;
				91	}
				92	static inline u64_t
				93	as_u64_s64 (s64_t x)
				94	{
				95	union { s64_t i; u64_t u; } r = {x};
				96	return r.u;
				97	}
				98
				99	#if SCALAR
				100	#define V_SUPPORTED 1
				101	typedef f32_t v_f32_t;
				102	typedef u32_t v_u32_t;
				103	typedef s32_t v_s32_t;
				104	typedef f64_t v_f64_t;
				105	typedef u64_t v_u64_t;
				106	typedef s64_t v_s64_t;
				107
				108	static inline int
				109	v_lanes32 (void)
				110	{
				111	return 1;
				112	}
				113
				114	static inline v_f32_t
				115	v_f32 (f32_t x)
				116	{
				117	return x;
				118	}
				119	static inline v_u32_t
				120	v_u32 (u32_t x)
				121	{
				122	return x;
				123	}
				124	static inline v_s32_t
				125	v_s32 (s32_t x)
				126	{
				127	return x;
				128	}
				129
				130	static inline f32_t
				131	v_get_f32 (v_f32_t x, int i)
				132	{
				133	return x;
				134	}
				135	static inline u32_t
				136	v_get_u32 (v_u32_t x, int i)
				137	{
				138	return x;
				139	}
				140	static inline s32_t
				141	v_get_s32 (v_s32_t x, int i)
				142	{
				143	return x;
				144	}
				145
				146	static inline void
				147	v_set_f32 (v_f32_t *x, int i, f32_t v)
				148	{
				149	*x = v;
				150	}
				151	static inline void
				152	v_set_u32 (v_u32_t *x, int i, u32_t v)
				153	{
				154	*x = v;
				155	}
				156	static inline void
				157	v_set_s32 (v_s32_t *x, int i, s32_t v)
				158	{
				159	*x = v;
				160	}
				161
				162	/* true if any elements of a v_cond result is non-zero. */
				163	static inline int
				164	v_any_u32 (v_u32_t x)
				165	{
				166	return x != 0;
				167	}
				168	/* to wrap the result of relational operators. */
				169	static inline v_u32_t
				170	v_cond_u32 (v_u32_t x)
				171	{
				172	return x ? -1 : 0;
				173	}
				174	static inline v_f32_t
				175	v_abs_f32 (v_f32_t x)
				176	{
				177	return __builtin_fabsf (x);
				178	}
				179	static inline v_f32_t
				180	v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
				181	{
				182	return __builtin_fmaf (x, y, z);
				183	}
				184	static inline v_f32_t
				185	v_round_f32 (v_f32_t x)
				186	{
				187	return __builtin_roundf (x);
				188	}
				189	static inline v_s32_t
				190	v_round_s32 (v_f32_t x)
				191	{
				192	return __builtin_lroundf (x); /* relies on -fno-math-errno. */
				193	}
				194	/* convert to type1 from type2. */
				195	static inline v_f32_t
				196	v_to_f32_s32 (v_s32_t x)
				197	{
				198	return x;
				199	}
				200	static inline v_f32_t
				201	v_to_f32_u32 (v_u32_t x)
				202	{
				203	return x;
				204	}
				205	/* reinterpret as type1 from type2. */
				206	static inline v_u32_t
				207	v_as_u32_f32 (v_f32_t x)
				208	{
				209	union { v_f32_t f; v_u32_t u; } r = {x};
				210	return r.u;
				211	}
				212	static inline v_f32_t
				213	v_as_f32_u32 (v_u32_t x)
				214	{
				215	union { v_u32_t u; v_f32_t f; } r = {x};
				216	return r.f;
				217	}
				218	static inline v_s32_t
				219	v_as_s32_u32 (v_u32_t x)
				220	{
				221	union { v_u32_t u; v_s32_t i; } r = {x};
				222	return r.i;
				223	}
				224	static inline v_u32_t
				225	v_as_u32_s32 (v_s32_t x)
				226	{
				227	union { v_s32_t i; v_u32_t u; } r = {x};
				228	return r.u;
				229	}
				230	static inline v_f32_t
				231	v_lookup_f32 (const f32_t *tab, v_u32_t idx)
				232	{
				233	return tab[idx];
				234	}
				235	static inline v_u32_t
				236	v_lookup_u32 (const u32_t *tab, v_u32_t idx)
				237	{
				238	return tab[idx];
				239	}
				240	static inline v_f32_t
				241	v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
				242	{
				243	return f (x);
				244	}
				245	static inline v_f32_t
				246	v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
				247	v_u32_t p)
				248	{
				249	return f (x1, x2);
				250	}
				251
Szabolcs Nagy	59055d4	2020-01-10 15:10:45 +0000	[diff] [blame]	252	static inline int
				253	v_lanes64 (void)
				254	{
				255	return 1;
				256	}
Szabolcs Nagy	7a1f4cf	2019-07-18 12:51:41 +0100	[diff] [blame]	257	static inline v_f64_t
				258	v_f64 (f64_t x)
				259	{
				260	return x;
				261	}
				262	static inline v_u64_t
				263	v_u64 (u64_t x)
				264	{
				265	return x;
				266	}
				267	static inline v_s64_t
				268	v_s64 (s64_t x)
				269	{
				270	return x;
				271	}
Szabolcs Nagy	59055d4	2020-01-10 15:10:45 +0000	[diff] [blame]	272	static inline f64_t
				273	v_get_f64 (v_f64_t x, int i)
				274	{
				275	return x;
				276	}
				277	static inline void
				278	v_set_f64 (v_f64_t *x, int i, f64_t v)
				279	{
				280	*x = v;
				281	}
Szabolcs Nagy	7a1f4cf	2019-07-18 12:51:41 +0100	[diff] [blame]	282	/* true if any elements of a v_cond result is non-zero. */
				283	static inline int
				284	v_any_u64 (v_u64_t x)
				285	{
				286	return x != 0;
				287	}
				288	/* to wrap the result of relational operators. */
				289	static inline v_u64_t
				290	v_cond_u64 (v_u64_t x)
				291	{
				292	return x ? -1 : 0;
				293	}
				294	static inline v_f64_t
				295	v_abs_f64 (v_f64_t x)
				296	{
				297	return __builtin_fabs (x);
				298	}
				299	static inline v_f64_t
				300	v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
				301	{
				302	return __builtin_fma (x, y, z);
				303	}
				304	static inline v_f64_t
				305	v_round_f64 (v_f64_t x)
				306	{
				307	return __builtin_round (x);
				308	}
				309	static inline v_s64_t
				310	v_round_s64 (v_f64_t x)
				311	{
				312	return __builtin_lround (x); /* relies on -fno-math-errno. */
				313	}
				314	/* convert to type1 from type2. */
				315	static inline v_f64_t
				316	v_to_f64_s64 (v_s64_t x)
				317	{
				318	return x;
				319	}
				320	static inline v_f64_t
				321	v_to_f64_u64 (v_u64_t x)
				322	{
				323	return x;
				324	}
				325	/* reinterpret as type1 from type2. */
				326	static inline v_u64_t
				327	v_as_u64_f64 (v_f64_t x)
				328	{
				329	union { v_f64_t f; v_u64_t u; } r = {x};
				330	return r.u;
				331	}
				332	static inline v_f64_t
				333	v_as_f64_u64 (v_u64_t x)
				334	{
				335	union { v_u64_t u; v_f64_t f; } r = {x};
				336	return r.f;
				337	}
				338	static inline v_s64_t
				339	v_as_s64_u64 (v_u64_t x)
				340	{
				341	union { v_u64_t u; v_s64_t i; } r = {x};
				342	return r.i;
				343	}
				344	static inline v_u64_t
				345	v_as_u64_s64 (v_s64_t x)
				346	{
				347	union { v_s64_t i; v_u64_t u; } r = {x};
				348	return r.u;
				349	}
				350	static inline v_f64_t
				351	v_lookup_f64 (const f64_t *tab, v_u64_t idx)
				352	{
				353	return tab[idx];
				354	}
				355	static inline v_u64_t
				356	v_lookup_u64 (const u64_t *tab, v_u64_t idx)
				357	{
				358	return tab[idx];
				359	}
				360	static inline v_f64_t
				361	v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
				362	{
				363	return f (x);
				364	}
				365
				366	#elif __aarch64__
				367	#define V_SUPPORTED 1
				368	#include <arm_neon.h>
				369	typedef float32x4_t v_f32_t;
				370	typedef uint32x4_t v_u32_t;
				371	typedef int32x4_t v_s32_t;
				372	typedef float64x2_t v_f64_t;
				373	typedef uint64x2_t v_u64_t;
				374	typedef int64x2_t v_s64_t;
				375
				376	static inline int
				377	v_lanes32 (void)
				378	{
				379	return 4;
				380	}
				381
				382	static inline v_f32_t
				383	v_f32 (f32_t x)
				384	{
				385	return (v_f32_t){x, x, x, x};
				386	}
				387	static inline v_u32_t
				388	v_u32 (u32_t x)
				389	{
				390	return (v_u32_t){x, x, x, x};
				391	}
				392	static inline v_s32_t
				393	v_s32 (s32_t x)
				394	{
				395	return (v_s32_t){x, x, x, x};
				396	}
				397
				398	static inline f32_t
				399	v_get_f32 (v_f32_t x, int i)
				400	{
				401	return x[i];
				402	}
				403	static inline u32_t
				404	v_get_u32 (v_u32_t x, int i)
				405	{
				406	return x[i];
				407	}
				408	static inline s32_t
				409	v_get_s32 (v_s32_t x, int i)
				410	{
				411	return x[i];
				412	}
				413
				414	static inline void
				415	v_set_f32 (v_f32_t *x, int i, f32_t v)
				416	{
				417	(*x)[i] = v;
				418	}
				419	static inline void
				420	v_set_u32 (v_u32_t *x, int i, u32_t v)
				421	{
				422	(*x)[i] = v;
				423	}
				424	static inline void
				425	v_set_s32 (v_s32_t *x, int i, s32_t v)
				426	{
				427	(*x)[i] = v;
				428	}
				429
				430	/* true if any elements of a v_cond result is non-zero. */
				431	static inline int
				432	v_any_u32 (v_u32_t x)
				433	{
				434	/* assume elements in x are either 0 or -1u. */
				435	return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
				436	}
				437	/* to wrap the result of relational operators. */
				438	static inline v_u32_t
				439	v_cond_u32 (v_u32_t x)
				440	{
				441	return x;
				442	}
				443	static inline v_f32_t
				444	v_abs_f32 (v_f32_t x)
				445	{
				446	return vabsq_f32 (x);
				447	}
				448	static inline v_f32_t
				449	v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
				450	{
				451	return vfmaq_f32 (z, x, y);
				452	}
				453	static inline v_f32_t
				454	v_round_f32 (v_f32_t x)
				455	{
				456	return vrndaq_f32 (x);
				457	}
				458	static inline v_s32_t
				459	v_round_s32 (v_f32_t x)
				460	{
				461	return vcvtaq_s32_f32 (x);
				462	}
				463	/* convert to type1 from type2. */
				464	static inline v_f32_t
				465	v_to_f32_s32 (v_s32_t x)
				466	{
				467	return (v_f32_t){x[0], x[1], x[2], x[3]};
				468	}
				469	static inline v_f32_t
				470	v_to_f32_u32 (v_u32_t x)
				471	{
				472	return (v_f32_t){x[0], x[1], x[2], x[3]};
				473	}
				474	/* reinterpret as type1 from type2. */
				475	static inline v_u32_t
				476	v_as_u32_f32 (v_f32_t x)
				477	{
				478	union { v_f32_t f; v_u32_t u; } r = {x};
				479	return r.u;
				480	}
				481	static inline v_f32_t
				482	v_as_f32_u32 (v_u32_t x)
				483	{
				484	union { v_u32_t u; v_f32_t f; } r = {x};
				485	return r.f;
				486	}
				487	static inline v_s32_t
				488	v_as_s32_u32 (v_u32_t x)
				489	{
				490	union { v_u32_t u; v_s32_t i; } r = {x};
				491	return r.i;
				492	}
				493	static inline v_u32_t
				494	v_as_u32_s32 (v_s32_t x)
				495	{
				496	union { v_s32_t i; v_u32_t u; } r = {x};
				497	return r.u;
				498	}
				499	static inline v_f32_t
				500	v_lookup_f32 (const f32_t *tab, v_u32_t idx)
				501	{
				502	return (v_f32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
				503	}
				504	static inline v_u32_t
				505	v_lookup_u32 (const u32_t *tab, v_u32_t idx)
				506	{
				507	return (v_u32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
				508	}
				509	static inline v_f32_t
				510	v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
				511	{
				512	return (v_f32_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
				513	p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
				514	}
				515	static inline v_f32_t
				516	v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
				517	v_u32_t p)
				518	{
				519	return (
				520	v_f32_t){p[0] ? f (x1[0], x2[0]) : y[0], p[1] ? f (x1[1], x2[1]) : y[1],
				521	p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
				522	}
				523
Szabolcs Nagy	59055d4	2020-01-10 15:10:45 +0000	[diff] [blame]	524	static inline int
				525	v_lanes64 (void)
				526	{
				527	return 2;
				528	}
Szabolcs Nagy	7a1f4cf	2019-07-18 12:51:41 +0100	[diff] [blame]	529	static inline v_f64_t
				530	v_f64 (f64_t x)
				531	{
				532	return (v_f64_t){x, x};
				533	}
				534	static inline v_u64_t
				535	v_u64 (u64_t x)
				536	{
				537	return (v_u64_t){x, x};
				538	}
				539	static inline v_s64_t
				540	v_s64 (s64_t x)
				541	{
				542	return (v_s64_t){x, x};
				543	}
Szabolcs Nagy	59055d4	2020-01-10 15:10:45 +0000	[diff] [blame]	544	static inline f64_t
				545	v_get_f64 (v_f64_t x, int i)
				546	{
				547	return x[i];
				548	}
				549	static inline void
				550	v_set_f64 (v_f64_t *x, int i, f64_t v)
				551	{
				552	(*x)[i] = v;
				553	}
Szabolcs Nagy	7a1f4cf	2019-07-18 12:51:41 +0100	[diff] [blame]	554	/* true if any elements of a v_cond result is non-zero. */
				555	static inline int
				556	v_any_u64 (v_u64_t x)
				557	{
				558	/* assume elements in x are either 0 or -1u. */
				559	return vpaddd_u64 (x) != 0;
				560	}
				561	/* to wrap the result of relational operators. */
				562	static inline v_u64_t
				563	v_cond_u64 (v_u64_t x)
				564	{
				565	return x;
				566	}
				567	static inline v_f64_t
				568	v_abs_f64 (v_f64_t x)
				569	{
				570	return vabsq_f64 (x);
				571	}
				572	static inline v_f64_t
				573	v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
				574	{
				575	return vfmaq_f64 (z, x, y);
				576	}
				577	static inline v_f64_t
				578	v_round_f64 (v_f64_t x)
				579	{
				580	return vrndaq_f64 (x);
				581	}
				582	static inline v_s64_t
				583	v_round_s64 (v_f64_t x)
				584	{
				585	return vcvtaq_s64_f64 (x);
				586	}
				587	/* convert to type1 from type2. */
				588	static inline v_f64_t
				589	v_to_f64_s64 (v_s64_t x)
				590	{
				591	return (v_f64_t){x[0], x[1]};
				592	}
				593	static inline v_f64_t
				594	v_to_f64_u64 (v_u64_t x)
				595	{
				596	return (v_f64_t){x[0], x[1]};
				597	}
				598	/* reinterpret as type1 from type2. */
				599	static inline v_u64_t
				600	v_as_u64_f64 (v_f64_t x)
				601	{
				602	union { v_f64_t f; v_u64_t u; } r = {x};
				603	return r.u;
				604	}
				605	static inline v_f64_t
				606	v_as_f64_u64 (v_u64_t x)
				607	{
				608	union { v_u64_t u; v_f64_t f; } r = {x};
				609	return r.f;
				610	}
				611	static inline v_s64_t
				612	v_as_s64_u64 (v_u64_t x)
				613	{
				614	union { v_u64_t u; v_s64_t i; } r = {x};
				615	return r.i;
				616	}
				617	static inline v_u64_t
				618	v_as_u64_s64 (v_s64_t x)
				619	{
				620	union { v_s64_t i; v_u64_t u; } r = {x};
				621	return r.u;
				622	}
				623	static inline v_f64_t
				624	v_lookup_f64 (const f64_t *tab, v_u64_t idx)
				625	{
				626	return (v_f64_t){tab[idx[0]], tab[idx[1]]};
				627	}
				628	static inline v_u64_t
				629	v_lookup_u64 (const u64_t *tab, v_u64_t idx)
				630	{
				631	return (v_u64_t){tab[idx[0]], tab[idx[1]]};
				632	}
				633	static inline v_f64_t
				634	v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
				635	{
				636	return (v_f64_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1]};
				637	}
				638	#endif
				639
				640	#endif
Szabolcs Nagy	1f3b163	2019-11-06 19:41:30 +0000	[diff] [blame]	641	#endif