blob: 33ceda3837136607a7aaa31678b9b9d20c073683 [file] [log] [blame]
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +01001/*
2 * Microbenchmark for math functions.
3 *
4 * Copyright (c) 2018, Arm Limited.
Szabolcs Nagy11253b02018-11-12 11:10:57 +00005 * SPDX-License-Identifier: MIT
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +01006 */
7
8#undef _GNU_SOURCE
9#define _GNU_SOURCE 1
10#include <stdint.h>
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14#include <time.h>
15#include <math.h>
16#include "mathlib.h"
17
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +000018#ifndef WANT_VMATH
19/* Enable the build of vector math code. */
20# define WANT_VMATH 1
21#endif
22
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +010023/* Number of measurements, best result is reported. */
24#define MEASURE 60
25/* Array size. */
26#define N 8000
27/* Iterations over the array. */
28#define ITER 125
29
Wilco Dijkstra2127ba72018-06-15 09:52:52 +010030static double *Trace;
31static size_t trace_size;
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +010032static double A[N];
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +010033static float Af[N];
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +010034static long measurecount = MEASURE;
35static long itercount = ITER;
36
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +000037#if __aarch64__ && WANT_VMATH
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +010038typedef __f64x2_t v_double;
39
40#define v_double_len() 2
41
42static inline v_double
43v_double_load (const double *p)
44{
45 return (v_double){p[0], p[1]};
46}
47
48static inline v_double
49v_double_dup (double x)
50{
51 return (v_double){x, x};
52}
53
54typedef __f32x4_t v_float;
55
56#define v_float_len() 4
57
58static inline v_float
59v_float_load (const float *p)
60{
61 return (v_float){p[0], p[1], p[2], p[3]};
62}
63
64static inline v_float
65v_float_dup (float x)
66{
67 return (v_float){x, x, x, x};
68}
69#else
70/* dummy definitions to make things compile. */
71typedef double v_double;
72typedef float v_float;
73#define v_double_len(x) 1
74#define v_double_load(x) (x)[0]
75#define v_double_dup(x) (x)
76#define v_float_len(x) 1
77#define v_float_load(x) (x)[0]
78#define v_float_dup(x) (x)
79#endif
80
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +010081static double
82dummy (double x)
83{
84 return x;
85}
86
87static float
88dummyf (float x)
89{
90 return x;
91}
92
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +000093#if WANT_VMATH
Szabolcs Nagy80922e82019-11-06 18:19:40 +000094#if __aarch64__
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +010095static v_double
96__v_dummy (v_double x)
97{
98 return x;
99}
100
101static v_float
102__v_dummyf (v_float x)
103{
104 return x;
105}
106
107#ifdef __vpcs
108__vpcs static v_double
109__vn_dummy (v_double x)
110{
111 return x;
112}
113
114__vpcs static v_float
115__vn_dummyf (v_float x)
116{
117 return x;
118}
Szabolcs Nagyba75d0a2019-08-09 16:24:59 +0100119
120__vpcs static v_float
121xy__vn_powf (v_float x)
122{
123 return __vn_powf (x, x);
124}
125
126__vpcs static v_float
127xy_Z_powf (v_float x)
128{
129 return _ZGVnN4vv_powf (x, x);
130}
Szabolcs Nagya807c9b2020-01-10 15:10:45 +0000131
132__vpcs static v_double
133xy__vn_pow (v_double x)
134{
135 return __vn_pow (x, x);
136}
137
138__vpcs static v_double
139xy_Z_pow (v_double x)
140{
141 return _ZGVnN2vv_pow (x, x);
142}
Szabolcs Nagyba75d0a2019-08-09 16:24:59 +0100143#endif
144
145static v_float
146xy__v_powf (v_float x)
147{
148 return __v_powf (x, x);
149}
Szabolcs Nagya807c9b2020-01-10 15:10:45 +0000150
151static v_double
152xy__v_pow (v_double x)
153{
154 return __v_pow (x, x);
155}
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100156#endif
157
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +0000158static float
159xy__s_powf (float x)
160{
161 return __s_powf (x, x);
162}
Szabolcs Nagya807c9b2020-01-10 15:10:45 +0000163
164static double
165xy__s_pow (double x)
166{
167 return __s_pow (x, x);
168}
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +0000169#endif
170
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100171static double
172xypow (double x)
173{
174 return pow (x, x);
175}
176
177static float
178xypowf (float x)
179{
180 return powf (x, x);
181}
182
183static double
184xpow (double x)
185{
186 return pow (x, 23.4);
187}
188
189static float
190xpowf (float x)
191{
192 return powf (x, 23.4f);
193}
194
195static double
196ypow (double x)
197{
198 return pow (2.34, x);
199}
200
201static float
202ypowf (float x)
203{
204 return powf (2.34f, x);
205}
206
207static float
208sincosf_wrap (float x)
209{
210 float s, c;
211 sincosf (x, &s, &c);
212 return s + c;
213}
214
215static const struct fun
216{
217 const char *name;
218 int prec;
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100219 int vec;
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100220 double lo;
221 double hi;
222 union
223 {
224 double (*d) (double);
225 float (*f) (float);
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100226 v_double (*vd) (v_double);
227 v_float (*vf) (v_float);
228#ifdef __vpcs
229 __vpcs v_double (*vnd) (v_double);
230 __vpcs v_float (*vnf) (v_float);
231#endif
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100232 } fun;
233} funtab[] = {
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100234#define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
235#define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
236#define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
237#define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
238#define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
239#define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100240D (dummy, 1.0, 2.0)
241D (exp, -9.9, 9.9)
242D (exp, 0.5, 1.0)
243D (exp2, -9.9, 9.9)
244D (log, 0.01, 11.1)
245D (log, 0.999, 1.001)
246D (log2, 0.01, 11.1)
247D (log2, 0.999, 1.001)
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100248{"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100249D (xpow, 0.01, 11.1)
250D (ypow, -9.9, 9.9)
251
252F (dummyf, 1.0, 2.0)
253F (expf, -9.9, 9.9)
254F (exp2f, -9.9, 9.9)
255F (logf, 0.01, 11.1)
256F (log2f, 0.01, 11.1)
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100257{"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100258F (xpowf, 0.01, 11.1)
259F (ypowf, -9.9, 9.9)
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100260{"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
261{"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
262{"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
263{"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
264{"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
265{"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100266F (sinf, 0.1, 0.7)
267F (sinf, 0.8, 3.1)
268F (sinf, -3.1, 3.1)
269F (sinf, 3.3, 33.3)
270F (sinf, 100, 1000)
271F (sinf, 1e6, 1e32)
272F (cosf, 0.1, 0.7)
273F (cosf, 0.8, 3.1)
274F (cosf, -3.1, 3.1)
275F (cosf, 3.3, 33.3)
276F (cosf, 100, 1000)
277F (cosf, 1e6, 1e32)
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +0000278#if WANT_VMATH
279D (__s_sin, -3.1, 3.1)
280D (__s_cos, -3.1, 3.1)
281D (__s_exp, -9.9, 9.9)
282D (__s_log, 0.01, 11.1)
Szabolcs Nagya807c9b2020-01-10 15:10:45 +0000283{"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +0000284F (__s_expf, -9.9, 9.9)
285F (__s_expf_1u, -9.9, 9.9)
286F (__s_exp2f, -9.9, 9.9)
287F (__s_exp2f_1u, -9.9, 9.9)
288F (__s_logf, 0.01, 11.1)
289{"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
290F (__s_sinf, -3.1, 3.1)
Szabolcs Nagyc5cba852019-08-09 15:39:09 +0100291F (__s_cosf, -3.1, 3.1)
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100292#if __aarch64__
293VD (__v_dummy, 1.0, 2.0)
Szabolcs Nagya2f717e2019-08-09 16:56:54 +0100294VD (__v_sin, -3.1, 3.1)
295VD (__v_cos, -3.1, 3.1)
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100296VD (__v_exp, -9.9, 9.9)
Szabolcs Nagyd9840982019-08-29 14:46:28 +0100297VD (__v_log, 0.01, 11.1)
Szabolcs Nagya807c9b2020-01-10 15:10:45 +0000298{"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100299VF (__v_dummyf, 1.0, 2.0)
300VF (__v_expf, -9.9, 9.9)
301VF (__v_expf_1u, -9.9, 9.9)
Szabolcs Nagy69170e12019-10-14 15:21:28 +0100302VF (__v_exp2f, -9.9, 9.9)
303VF (__v_exp2f_1u, -9.9, 9.9)
Szabolcs Nagyc280e492019-08-09 15:18:40 +0100304VF (__v_logf, 0.01, 11.1)
Szabolcs Nagyba75d0a2019-08-09 16:24:59 +0100305{"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
Szabolcs Nagyc5cba852019-08-09 15:39:09 +0100306VF (__v_sinf, -3.1, 3.1)
307VF (__v_cosf, -3.1, 3.1)
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100308#ifdef __vpcs
309VND (__vn_dummy, 1.0, 2.0)
310VND (__vn_exp, -9.9, 9.9)
311VND (_ZGVnN2v_exp, -9.9, 9.9)
Szabolcs Nagyd9840982019-08-29 14:46:28 +0100312VND (__vn_log, 0.01, 11.1)
313VND (_ZGVnN2v_log, 0.01, 11.1)
Szabolcs Nagya807c9b2020-01-10 15:10:45 +0000314{"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
315{"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
Szabolcs Nagya2f717e2019-08-09 16:56:54 +0100316VND (__vn_sin, -3.1, 3.1)
317VND (_ZGVnN2v_sin, -3.1, 3.1)
318VND (__vn_cos, -3.1, 3.1)
319VND (_ZGVnN2v_cos, -3.1, 3.1)
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100320VNF (__vn_dummyf, 1.0, 2.0)
321VNF (__vn_expf, -9.9, 9.9)
322VNF (_ZGVnN4v_expf, -9.9, 9.9)
323VNF (__vn_expf_1u, -9.9, 9.9)
Szabolcs Nagy69170e12019-10-14 15:21:28 +0100324VNF (__vn_exp2f, -9.9, 9.9)
325VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
326VNF (__vn_exp2f_1u, -9.9, 9.9)
Szabolcs Nagyc280e492019-08-09 15:18:40 +0100327VNF (__vn_logf, 0.01, 11.1)
328VNF (_ZGVnN4v_logf, 0.01, 11.1)
Szabolcs Nagyba75d0a2019-08-09 16:24:59 +0100329{"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
330{"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
Szabolcs Nagyc5cba852019-08-09 15:39:09 +0100331VNF (__vn_sinf, -3.1, 3.1)
332VNF (_ZGVnN4v_sinf, -3.1, 3.1)
333VNF (__vn_cosf, -3.1, 3.1)
334VNF (_ZGVnN4v_cosf, -3.1, 3.1)
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100335#endif
336#endif
Szabolcs Nagy1f3b1632019-11-06 19:41:30 +0000337#endif
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100338{0},
339#undef F
340#undef D
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100341#undef VF
342#undef VD
343#undef VNF
344#undef VND
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100345};
346
347static void
348gen_linear (double lo, double hi)
349{
350 for (int i = 0; i < N; i++)
351 A[i] = (lo * (N - i) + hi * i) / N;
352}
353
354static void
355genf_linear (double lo, double hi)
356{
357 for (int i = 0; i < N; i++)
358 Af[i] = (float)(lo * (N - i) + hi * i) / N;
359}
360
361static inline double
362asdouble (uint64_t i)
363{
364 union
365 {
366 uint64_t i;
367 double f;
368 } u = {i};
369 return u.f;
370}
371
372static uint64_t seed = 0x0123456789abcdef;
373
374static double
375frand (double lo, double hi)
376{
377 seed = 6364136223846793005ULL * seed + 1;
378 return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
379}
380
381static void
382gen_rand (double lo, double hi)
383{
384 for (int i = 0; i < N; i++)
385 A[i] = frand (lo, hi);
386}
387
388static void
389genf_rand (double lo, double hi)
390{
391 for (int i = 0; i < N; i++)
392 Af[i] = (float)frand (lo, hi);
393}
394
395static void
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100396gen_trace (int index)
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100397{
398 for (int i = 0; i < N; i++)
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100399 A[i] = Trace[index + i];
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100400}
401
402static void
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100403genf_trace (int index)
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100404{
405 for (int i = 0; i < N; i++)
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100406 Af[i] = (float)Trace[index + i];
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100407}
408
409static void
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100410run_thruput (double f (double))
411{
412 for (int i = 0; i < N; i++)
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100413 f (A[i]);
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100414}
415
416static void
417runf_thruput (float f (float))
418{
419 for (int i = 0; i < N; i++)
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100420 f (Af[i]);
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100421}
422
423volatile double zero = 0;
424
425static void
426run_latency (double f (double))
427{
428 double z = zero;
429 double prev = z;
430 for (int i = 0; i < N; i++)
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100431 prev = f (A[i] + prev * z);
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100432}
433
434static void
435runf_latency (float f (float))
436{
437 float z = (float)zero;
438 float prev = z;
439 for (int i = 0; i < N; i++)
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100440 prev = f (Af[i] + prev * z);
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100441}
442
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100443static void
444run_v_thruput (v_double f (v_double))
445{
446 for (int i = 0; i < N; i += v_double_len ())
447 f (v_double_load (A+i));
448}
449
450static void
451runf_v_thruput (v_float f (v_float))
452{
453 for (int i = 0; i < N; i += v_float_len ())
454 f (v_float_load (Af+i));
455}
456
457static void
458run_v_latency (v_double f (v_double))
459{
460 v_double z = v_double_dup (zero);
461 v_double prev = z;
462 for (int i = 0; i < N; i += v_double_len ())
463 prev = f (v_double_load (A+i) + prev * z);
464}
465
466static void
467runf_v_latency (v_float f (v_float))
468{
469 v_float z = v_float_dup (zero);
470 v_float prev = z;
471 for (int i = 0; i < N; i += v_float_len ())
472 prev = f (v_float_load (Af+i) + prev * z);
473}
474
475#ifdef __vpcs
476static void
477run_vn_thruput (__vpcs v_double f (v_double))
478{
479 for (int i = 0; i < N; i += v_double_len ())
480 f (v_double_load (A+i));
481}
482
483static void
484runf_vn_thruput (__vpcs v_float f (v_float))
485{
486 for (int i = 0; i < N; i += v_float_len ())
487 f (v_float_load (Af+i));
488}
489
490static void
491run_vn_latency (__vpcs v_double f (v_double))
492{
493 v_double z = v_double_dup (zero);
494 v_double prev = z;
495 for (int i = 0; i < N; i += v_double_len ())
496 prev = f (v_double_load (A+i) + prev * z);
497}
498
499static void
500runf_vn_latency (__vpcs v_float f (v_float))
501{
502 v_float z = v_float_dup (zero);
503 v_float prev = z;
504 for (int i = 0; i < N; i += v_float_len ())
505 prev = f (v_float_load (Af+i) + prev * z);
506}
507#endif
508
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100509static uint64_t
510tic (void)
511{
512 struct timespec ts;
513 if (clock_gettime (CLOCK_REALTIME, &ts))
514 abort ();
515 return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
516}
517
518#define TIMEIT(run, f) do { \
519 dt = -1; \
520 run (f); /* Warm up. */ \
521 for (int j = 0; j < measurecount; j++) \
522 { \
523 uint64_t t0 = tic (); \
524 for (int i = 0; i < itercount; i++) \
525 run (f); \
526 uint64_t t1 = tic (); \
527 if (t1 - t0 < dt) \
528 dt = t1 - t0; \
529 } \
530} while (0)
531
532static void
533bench1 (const struct fun *f, int type, double lo, double hi)
534{
535 uint64_t dt = 0;
536 uint64_t ns100;
537 const char *s = type == 't' ? "rthruput" : "latency";
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100538 int vlen = 1;
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100539
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100540 if (f->vec && f->prec == 'd')
541 vlen = v_double_len();
542 else if (f->vec && f->prec == 'f')
543 vlen = v_float_len();
544
545 if (f->prec == 'd' && type == 't' && f->vec == 0)
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100546 TIMEIT (run_thruput, f->fun.d);
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100547 else if (f->prec == 'd' && type == 'l' && f->vec == 0)
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100548 TIMEIT (run_latency, f->fun.d);
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100549 else if (f->prec == 'f' && type == 't' && f->vec == 0)
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100550 TIMEIT (runf_thruput, f->fun.f);
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100551 else if (f->prec == 'f' && type == 'l' && f->vec == 0)
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100552 TIMEIT (runf_latency, f->fun.f);
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100553 else if (f->prec == 'd' && type == 't' && f->vec == 'v')
554 TIMEIT (run_v_thruput, f->fun.vd);
555 else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
556 TIMEIT (run_v_latency, f->fun.vd);
557 else if (f->prec == 'f' && type == 't' && f->vec == 'v')
558 TIMEIT (runf_v_thruput, f->fun.vf);
559 else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
560 TIMEIT (runf_v_latency, f->fun.vf);
561#ifdef __vpcs
562 else if (f->prec == 'd' && type == 't' && f->vec == 'n')
563 TIMEIT (run_vn_thruput, f->fun.vnd);
564 else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
565 TIMEIT (run_vn_latency, f->fun.vnd);
566 else if (f->prec == 'f' && type == 't' && f->vec == 'n')
567 TIMEIT (runf_vn_thruput, f->fun.vnf);
568 else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
569 TIMEIT (runf_vn_latency, f->fun.vnf);
570#endif
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100571
Szabolcs Nagy7a1f4cf2019-07-18 12:51:41 +0100572 if (type == 't')
573 {
574 ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
575 printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
576 (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
577 (unsigned long long) dt, lo, hi);
578 }
579 else if (type == 'l')
580 {
581 ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
582 printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
583 (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
584 (unsigned long long) dt, lo, hi);
585 }
Szabolcs Nagy5f82bff2018-07-11 09:11:06 +0100586 fflush (stdout);
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100587}
588
589static void
590bench (const struct fun *f, double lo, double hi, int type, int gen)
591{
592 if (f->prec == 'd' && gen == 'r')
593 gen_rand (lo, hi);
594 else if (f->prec == 'd' && gen == 'l')
595 gen_linear (lo, hi);
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100596 else if (f->prec == 'd' && gen == 't')
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100597 gen_trace (0);
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100598 else if (f->prec == 'f' && gen == 'r')
599 genf_rand (lo, hi);
600 else if (f->prec == 'f' && gen == 'l')
601 genf_linear (lo, hi);
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100602 else if (f->prec == 'f' && gen == 't')
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100603 genf_trace (0);
604
605 if (gen == 't')
606 hi = trace_size / N;
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100607
608 if (type == 'b' || type == 't')
609 bench1 (f, 't', lo, hi);
610
611 if (type == 'b' || type == 'l')
612 bench1 (f, 'l', lo, hi);
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100613
614 for (int i = N; i < trace_size; i += N)
615 {
616 if (f->prec == 'd')
617 gen_trace (i);
618 else
619 genf_trace (i);
620
621 lo = i / N;
622 if (type == 'b' || type == 't')
623 bench1 (f, 't', lo, hi);
624
625 if (type == 'b' || type == 'l')
626 bench1 (f, 'l', lo, hi);
627 }
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100628}
629
630static void
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100631readtrace (const char *name)
632{
633 int n = 0;
634 FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
635 if (!f)
636 {
637 printf ("openning \"%s\" failed: %m\n", name);
638 exit (1);
639 }
640 for (;;)
641 {
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100642 if (n >= trace_size)
643 {
644 trace_size += N;
645 Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
646 if (Trace == NULL)
647 {
648 printf ("out of memory\n");
649 exit (1);
650 }
651 }
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100652 if (fscanf (f, "%lf", Trace + n) != 1)
653 break;
654 n++;
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100655 }
656 if (ferror (f) || n == 0)
657 {
658 printf ("reading \"%s\" failed: %m\n", name);
659 exit (1);
660 }
661 fclose (f);
Wilco Dijkstra2127ba72018-06-15 09:52:52 +0100662 if (n % N == 0)
663 trace_size = n;
664 for (int i = 0; n < trace_size; n++, i++)
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100665 Trace[n] = Trace[i];
666}
667
668static void
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100669usage (void)
670{
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100671 printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
672 "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
673 "[func2 ..]\n");
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100674 printf ("func:\n");
675 printf ("%7s [run all benchmarks]\n", "all");
676 for (const struct fun *f = funtab; f->name; f++)
677 printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
678 exit (1);
679}
680
681int
682main (int argc, char *argv[])
683{
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100684 int usergen = 0, gen = 'r', type = 'b', all = 0;
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100685 double lo = 0, hi = 0;
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100686 const char *tracefile = "-";
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100687
688 argv++;
689 argc--;
690 for (;;)
691 {
692 if (argc <= 0)
693 usage ();
694 if (argv[0][0] != '-')
695 break;
696 else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
697 {
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100698 usergen = 1;
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100699 lo = strtod (argv[1], 0);
700 hi = strtod (argv[2], 0);
701 argv += 3;
702 argc -= 3;
703 }
704 else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
705 {
706 measurecount = strtol (argv[1], 0, 0);
707 argv += 2;
708 argc -= 2;
709 }
710 else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
711 {
712 itercount = strtol (argv[1], 0, 0);
713 argv += 2;
714 argc -= 2;
715 }
716 else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
717 {
718 gen = argv[1][0];
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100719 if (strchr ("rlt", gen) == 0)
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100720 usage ();
721 argv += 2;
722 argc -= 2;
723 }
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100724 else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
725 {
726 gen = 't'; /* -f implies -g trace. */
727 tracefile = argv[1];
728 argv += 2;
729 argc -= 2;
730 }
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100731 else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
732 {
733 type = argv[1][0];
734 if (strchr ("ltb", type) == 0)
735 usage ();
736 argv += 2;
737 argc -= 2;
738 }
739 else
740 usage ();
741 }
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100742 if (gen == 't')
743 {
744 readtrace (tracefile);
745 lo = hi = 0;
746 usergen = 1;
747 }
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100748 while (argc > 0)
749 {
750 int found = 0;
751 all = strcmp (argv[0], "all") == 0;
752 for (const struct fun *f = funtab; f->name; f++)
753 if (all || strcmp (argv[0], f->name) == 0)
754 {
755 found = 1;
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100756 if (!usergen)
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100757 {
758 lo = f->lo;
759 hi = f->hi;
760 }
761 bench (f, lo, hi, type, gen);
Szabolcs Nagy9159cf22018-06-12 13:13:57 +0100762 if (usergen && !all)
Szabolcs Nagy764b4bf2018-06-08 19:08:48 +0100763 break;
764 }
765 if (!found)
766 printf ("unknown function: %s\n", argv[0]);
767 argv++;
768 argc--;
769 }
770 return 0;
771}