blob: 3db22e5b3c122e463f309168a5f659c25a18015e [file] [log] [blame]
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +01001/*
2 * Vector math abstractions.
3 *
4 * Copyright (c) 2019, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8#ifndef _V_MATH_H
9#define _V_MATH_H
10
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +000011#ifndef WANT_VMATH
12/* Enable the build of vector math code. */
13# define WANT_VMATH 1
14#endif
15#if WANT_VMATH
16
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +010017/* The goal of this header is to allow vector and scalar
18 build of the same algorithm, the provided intrinsic
19 wrappers are also vector length agnostic so they can
20 be implemented for SVE too (or other simd architectures)
21 and then the code should work on those targets too. */
22
23#if SCALAR
24#define V_NAME(x) __s_##x
25#elif VPCS && __aarch64__
26#define V_NAME(x) __vn_##x
27#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
28#else
29#define V_NAME(x) __v_##x
30#endif
31
32#ifndef VPCS_ATTR
33#define VPCS_ATTR
34#endif
35#ifndef VPCS_ALIAS
36#define VPCS_ALIAS
37#endif
38
39#include <stdint.h>
40#include "math_config.h"
41
42typedef float f32_t;
43typedef uint32_t u32_t;
44typedef int32_t s32_t;
45typedef double f64_t;
46typedef uint64_t u64_t;
47typedef int64_t s64_t;
48
49/* reinterpret as type1 from type2. */
50static inline u32_t
51as_u32_f32 (f32_t x)
52{
53 union { f32_t f; u32_t u; } r = {x};
54 return r.u;
55}
56static inline f32_t
57as_f32_u32 (u32_t x)
58{
59 union { u32_t u; f32_t f; } r = {x};
60 return r.f;
61}
62static inline s32_t
63as_s32_u32 (u32_t x)
64{
65 union { u32_t u; s32_t i; } r = {x};
66 return r.i;
67}
68static inline u32_t
69as_u32_s32 (s32_t x)
70{
71 union { s32_t i; u32_t u; } r = {x};
72 return r.u;
73}
74static inline u64_t
75as_u64_f64 (f64_t x)
76{
77 union { f64_t f; u64_t u; } r = {x};
78 return r.u;
79}
80static inline f64_t
81as_f64_u64 (u64_t x)
82{
83 union { u64_t u; f64_t f; } r = {x};
84 return r.f;
85}
86static inline s64_t
87as_s64_u64 (u64_t x)
88{
89 union { u64_t u; s64_t i; } r = {x};
90 return r.i;
91}
92static inline u64_t
93as_u64_s64 (s64_t x)
94{
95 union { s64_t i; u64_t u; } r = {x};
96 return r.u;
97}
98
99#if SCALAR
100#define V_SUPPORTED 1
101typedef f32_t v_f32_t;
102typedef u32_t v_u32_t;
103typedef s32_t v_s32_t;
104typedef f64_t v_f64_t;
105typedef u64_t v_u64_t;
106typedef s64_t v_s64_t;
107
108static inline int
109v_lanes32 (void)
110{
111 return 1;
112}
113
114static inline v_f32_t
115v_f32 (f32_t x)
116{
117 return x;
118}
119static inline v_u32_t
120v_u32 (u32_t x)
121{
122 return x;
123}
124static inline v_s32_t
125v_s32 (s32_t x)
126{
127 return x;
128}
129
130static inline f32_t
131v_get_f32 (v_f32_t x, int i)
132{
133 return x;
134}
135static inline u32_t
136v_get_u32 (v_u32_t x, int i)
137{
138 return x;
139}
140static inline s32_t
141v_get_s32 (v_s32_t x, int i)
142{
143 return x;
144}
145
146static inline void
147v_set_f32 (v_f32_t *x, int i, f32_t v)
148{
149 *x = v;
150}
151static inline void
152v_set_u32 (v_u32_t *x, int i, u32_t v)
153{
154 *x = v;
155}
156static inline void
157v_set_s32 (v_s32_t *x, int i, s32_t v)
158{
159 *x = v;
160}
161
162/* true if any elements of a v_cond result is non-zero. */
163static inline int
164v_any_u32 (v_u32_t x)
165{
166 return x != 0;
167}
168/* to wrap the result of relational operators. */
169static inline v_u32_t
170v_cond_u32 (v_u32_t x)
171{
172 return x ? -1 : 0;
173}
174static inline v_f32_t
175v_abs_f32 (v_f32_t x)
176{
177 return __builtin_fabsf (x);
178}
179static inline v_f32_t
180v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
181{
182 return __builtin_fmaf (x, y, z);
183}
184static inline v_f32_t
185v_round_f32 (v_f32_t x)
186{
187 return __builtin_roundf (x);
188}
189static inline v_s32_t
190v_round_s32 (v_f32_t x)
191{
192 return __builtin_lroundf (x); /* relies on -fno-math-errno. */
193}
194/* convert to type1 from type2. */
195static inline v_f32_t
196v_to_f32_s32 (v_s32_t x)
197{
198 return x;
199}
200static inline v_f32_t
201v_to_f32_u32 (v_u32_t x)
202{
203 return x;
204}
205/* reinterpret as type1 from type2. */
206static inline v_u32_t
207v_as_u32_f32 (v_f32_t x)
208{
209 union { v_f32_t f; v_u32_t u; } r = {x};
210 return r.u;
211}
212static inline v_f32_t
213v_as_f32_u32 (v_u32_t x)
214{
215 union { v_u32_t u; v_f32_t f; } r = {x};
216 return r.f;
217}
218static inline v_s32_t
219v_as_s32_u32 (v_u32_t x)
220{
221 union { v_u32_t u; v_s32_t i; } r = {x};
222 return r.i;
223}
224static inline v_u32_t
225v_as_u32_s32 (v_s32_t x)
226{
227 union { v_s32_t i; v_u32_t u; } r = {x};
228 return r.u;
229}
230static inline v_f32_t
231v_lookup_f32 (const f32_t *tab, v_u32_t idx)
232{
233 return tab[idx];
234}
235static inline v_u32_t
236v_lookup_u32 (const u32_t *tab, v_u32_t idx)
237{
238 return tab[idx];
239}
240static inline v_f32_t
241v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
242{
243 return f (x);
244}
245static inline v_f32_t
246v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
247 v_u32_t p)
248{
249 return f (x1, x2);
250}
251
Szabolcs Nagy59055d42020-01-10 15:10:45 +0000252static inline int
253v_lanes64 (void)
254{
255 return 1;
256}
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100257static inline v_f64_t
258v_f64 (f64_t x)
259{
260 return x;
261}
262static inline v_u64_t
263v_u64 (u64_t x)
264{
265 return x;
266}
267static inline v_s64_t
268v_s64 (s64_t x)
269{
270 return x;
271}
Szabolcs Nagy59055d42020-01-10 15:10:45 +0000272static inline f64_t
273v_get_f64 (v_f64_t x, int i)
274{
275 return x;
276}
277static inline void
278v_set_f64 (v_f64_t *x, int i, f64_t v)
279{
280 *x = v;
281}
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100282/* true if any elements of a v_cond result is non-zero. */
283static inline int
284v_any_u64 (v_u64_t x)
285{
286 return x != 0;
287}
288/* to wrap the result of relational operators. */
289static inline v_u64_t
290v_cond_u64 (v_u64_t x)
291{
292 return x ? -1 : 0;
293}
294static inline v_f64_t
295v_abs_f64 (v_f64_t x)
296{
297 return __builtin_fabs (x);
298}
299static inline v_f64_t
300v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
301{
302 return __builtin_fma (x, y, z);
303}
304static inline v_f64_t
305v_round_f64 (v_f64_t x)
306{
307 return __builtin_round (x);
308}
309static inline v_s64_t
310v_round_s64 (v_f64_t x)
311{
312 return __builtin_lround (x); /* relies on -fno-math-errno. */
313}
314/* convert to type1 from type2. */
315static inline v_f64_t
316v_to_f64_s64 (v_s64_t x)
317{
318 return x;
319}
320static inline v_f64_t
321v_to_f64_u64 (v_u64_t x)
322{
323 return x;
324}
325/* reinterpret as type1 from type2. */
326static inline v_u64_t
327v_as_u64_f64 (v_f64_t x)
328{
329 union { v_f64_t f; v_u64_t u; } r = {x};
330 return r.u;
331}
332static inline v_f64_t
333v_as_f64_u64 (v_u64_t x)
334{
335 union { v_u64_t u; v_f64_t f; } r = {x};
336 return r.f;
337}
338static inline v_s64_t
339v_as_s64_u64 (v_u64_t x)
340{
341 union { v_u64_t u; v_s64_t i; } r = {x};
342 return r.i;
343}
344static inline v_u64_t
345v_as_u64_s64 (v_s64_t x)
346{
347 union { v_s64_t i; v_u64_t u; } r = {x};
348 return r.u;
349}
350static inline v_f64_t
351v_lookup_f64 (const f64_t *tab, v_u64_t idx)
352{
353 return tab[idx];
354}
355static inline v_u64_t
356v_lookup_u64 (const u64_t *tab, v_u64_t idx)
357{
358 return tab[idx];
359}
360static inline v_f64_t
361v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
362{
363 return f (x);
364}
365
366#elif __aarch64__
367#define V_SUPPORTED 1
368#include <arm_neon.h>
369typedef float32x4_t v_f32_t;
370typedef uint32x4_t v_u32_t;
371typedef int32x4_t v_s32_t;
372typedef float64x2_t v_f64_t;
373typedef uint64x2_t v_u64_t;
374typedef int64x2_t v_s64_t;
375
376static inline int
377v_lanes32 (void)
378{
379 return 4;
380}
381
382static inline v_f32_t
383v_f32 (f32_t x)
384{
385 return (v_f32_t){x, x, x, x};
386}
387static inline v_u32_t
388v_u32 (u32_t x)
389{
390 return (v_u32_t){x, x, x, x};
391}
392static inline v_s32_t
393v_s32 (s32_t x)
394{
395 return (v_s32_t){x, x, x, x};
396}
397
398static inline f32_t
399v_get_f32 (v_f32_t x, int i)
400{
401 return x[i];
402}
403static inline u32_t
404v_get_u32 (v_u32_t x, int i)
405{
406 return x[i];
407}
408static inline s32_t
409v_get_s32 (v_s32_t x, int i)
410{
411 return x[i];
412}
413
414static inline void
415v_set_f32 (v_f32_t *x, int i, f32_t v)
416{
417 (*x)[i] = v;
418}
419static inline void
420v_set_u32 (v_u32_t *x, int i, u32_t v)
421{
422 (*x)[i] = v;
423}
424static inline void
425v_set_s32 (v_s32_t *x, int i, s32_t v)
426{
427 (*x)[i] = v;
428}
429
430/* true if any elements of a v_cond result is non-zero. */
431static inline int
432v_any_u32 (v_u32_t x)
433{
434 /* assume elements in x are either 0 or -1u. */
435 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
436}
437/* to wrap the result of relational operators. */
438static inline v_u32_t
439v_cond_u32 (v_u32_t x)
440{
441 return x;
442}
443static inline v_f32_t
444v_abs_f32 (v_f32_t x)
445{
446 return vabsq_f32 (x);
447}
448static inline v_f32_t
449v_fma_f32 (v_f32_t x, v_f32_t y, v_f32_t z)
450{
451 return vfmaq_f32 (z, x, y);
452}
453static inline v_f32_t
454v_round_f32 (v_f32_t x)
455{
456 return vrndaq_f32 (x);
457}
458static inline v_s32_t
459v_round_s32 (v_f32_t x)
460{
461 return vcvtaq_s32_f32 (x);
462}
463/* convert to type1 from type2. */
464static inline v_f32_t
465v_to_f32_s32 (v_s32_t x)
466{
467 return (v_f32_t){x[0], x[1], x[2], x[3]};
468}
469static inline v_f32_t
470v_to_f32_u32 (v_u32_t x)
471{
472 return (v_f32_t){x[0], x[1], x[2], x[3]};
473}
474/* reinterpret as type1 from type2. */
475static inline v_u32_t
476v_as_u32_f32 (v_f32_t x)
477{
478 union { v_f32_t f; v_u32_t u; } r = {x};
479 return r.u;
480}
481static inline v_f32_t
482v_as_f32_u32 (v_u32_t x)
483{
484 union { v_u32_t u; v_f32_t f; } r = {x};
485 return r.f;
486}
487static inline v_s32_t
488v_as_s32_u32 (v_u32_t x)
489{
490 union { v_u32_t u; v_s32_t i; } r = {x};
491 return r.i;
492}
493static inline v_u32_t
494v_as_u32_s32 (v_s32_t x)
495{
496 union { v_s32_t i; v_u32_t u; } r = {x};
497 return r.u;
498}
499static inline v_f32_t
500v_lookup_f32 (const f32_t *tab, v_u32_t idx)
501{
502 return (v_f32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
503}
504static inline v_u32_t
505v_lookup_u32 (const u32_t *tab, v_u32_t idx)
506{
507 return (v_u32_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
508}
509static inline v_f32_t
510v_call_f32 (f32_t (*f) (f32_t), v_f32_t x, v_f32_t y, v_u32_t p)
511{
512 return (v_f32_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
513 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
514}
515static inline v_f32_t
516v_call2_f32 (f32_t (*f) (f32_t, f32_t), v_f32_t x1, v_f32_t x2, v_f32_t y,
517 v_u32_t p)
518{
519 return (
520 v_f32_t){p[0] ? f (x1[0], x2[0]) : y[0], p[1] ? f (x1[1], x2[1]) : y[1],
521 p[2] ? f (x1[2], x2[2]) : y[2], p[3] ? f (x1[3], x2[3]) : y[3]};
522}
523
Szabolcs Nagy59055d42020-01-10 15:10:45 +0000524static inline int
525v_lanes64 (void)
526{
527 return 2;
528}
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100529static inline v_f64_t
530v_f64 (f64_t x)
531{
532 return (v_f64_t){x, x};
533}
534static inline v_u64_t
535v_u64 (u64_t x)
536{
537 return (v_u64_t){x, x};
538}
539static inline v_s64_t
540v_s64 (s64_t x)
541{
542 return (v_s64_t){x, x};
543}
Szabolcs Nagy59055d42020-01-10 15:10:45 +0000544static inline f64_t
545v_get_f64 (v_f64_t x, int i)
546{
547 return x[i];
548}
549static inline void
550v_set_f64 (v_f64_t *x, int i, f64_t v)
551{
552 (*x)[i] = v;
553}
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100554/* true if any elements of a v_cond result is non-zero. */
555static inline int
556v_any_u64 (v_u64_t x)
557{
558 /* assume elements in x are either 0 or -1u. */
559 return vpaddd_u64 (x) != 0;
560}
561/* to wrap the result of relational operators. */
562static inline v_u64_t
563v_cond_u64 (v_u64_t x)
564{
565 return x;
566}
567static inline v_f64_t
568v_abs_f64 (v_f64_t x)
569{
570 return vabsq_f64 (x);
571}
572static inline v_f64_t
573v_fma_f64 (v_f64_t x, v_f64_t y, v_f64_t z)
574{
575 return vfmaq_f64 (z, x, y);
576}
577static inline v_f64_t
578v_round_f64 (v_f64_t x)
579{
580 return vrndaq_f64 (x);
581}
582static inline v_s64_t
583v_round_s64 (v_f64_t x)
584{
585 return vcvtaq_s64_f64 (x);
586}
587/* convert to type1 from type2. */
588static inline v_f64_t
589v_to_f64_s64 (v_s64_t x)
590{
591 return (v_f64_t){x[0], x[1]};
592}
593static inline v_f64_t
594v_to_f64_u64 (v_u64_t x)
595{
596 return (v_f64_t){x[0], x[1]};
597}
598/* reinterpret as type1 from type2. */
599static inline v_u64_t
600v_as_u64_f64 (v_f64_t x)
601{
602 union { v_f64_t f; v_u64_t u; } r = {x};
603 return r.u;
604}
605static inline v_f64_t
606v_as_f64_u64 (v_u64_t x)
607{
608 union { v_u64_t u; v_f64_t f; } r = {x};
609 return r.f;
610}
611static inline v_s64_t
612v_as_s64_u64 (v_u64_t x)
613{
614 union { v_u64_t u; v_s64_t i; } r = {x};
615 return r.i;
616}
617static inline v_u64_t
618v_as_u64_s64 (v_s64_t x)
619{
620 union { v_s64_t i; v_u64_t u; } r = {x};
621 return r.u;
622}
623static inline v_f64_t
624v_lookup_f64 (const f64_t *tab, v_u64_t idx)
625{
626 return (v_f64_t){tab[idx[0]], tab[idx[1]]};
627}
628static inline v_u64_t
629v_lookup_u64 (const u64_t *tab, v_u64_t idx)
630{
631 return (v_u64_t){tab[idx[0]], tab[idx[1]]};
632}
633static inline v_f64_t
634v_call_f64 (f64_t (*f) (f64_t), v_f64_t x, v_f64_t y, v_u64_t p)
635{
636 return (v_f64_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1]};
637}
638#endif
639
640#endif
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +0000641#endif