blob: aa02e66ad46a134dc412b789b2c47d2448545929 [file] [log] [blame]
Christopher Ferris1348ce22013-04-11 17:50:18 -07001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Dan Albertdc847e62014-11-15 18:50:10 -080017#include "bandwidth.h"
18
19#include <ctype.h>
Christopher Ferris1348ce22013-04-11 17:50:18 -070020#include <pthread.h>
21#include <sched.h>
Christopher Ferris1348ce22013-04-11 17:50:18 -070022#include <sys/resource.h>
Dan Albertdc847e62014-11-15 18:50:10 -080023#include <sys/time.h>
Christopher Ferris1348ce22013-04-11 17:50:18 -070024#include <unistd.h>
Christopher Ferris1348ce22013-04-11 17:50:18 -070025
26#include <map>
27#include <vector>
28
Christopher Ferris1348ce22013-04-11 17:50:18 -070029
30typedef struct {
31 const char *name;
32 bool int_type;
33} option_t;
34
35option_t bandwidth_opts[] = {
36 { "size", true },
37 { "num_warm_loops", true },
38 { "num_loops", true },
39 { "type", false },
40 { NULL, false },
41};
42
43option_t per_core_opts[] = {
44 { "size", true },
45 { "num_warm_loops", true},
46 { "num_loops", true },
47 { "type", false },
48 { NULL, false },
49};
50
51option_t multithread_opts[] = {
52 { "size", true },
53 { "num_warm_loops", true},
54 { "num_loops", true },
55 { "type", false },
56 { "num_threads", true },
57 { NULL, false },
58};
59
60typedef union {
61 int int_value;
62 const char *char_value;
63} arg_value_t;
64typedef std::map<const char*, arg_value_t> arg_t;
65
66bool processBandwidthOptions(int argc, char** argv, option_t options[],
67 arg_t *values) {
68 for (int i = 1; i < argc; i++) {
69 if (argv[i][0] == '-' && argv[i][1] == '-' && !isdigit(argv[i][2])) {
70 char *arg = &argv[i][2];
71
72 for (int j = 0; options[j].name != NULL; j++) {
73 if (strcmp(arg, options[j].name) == 0) {
74 const char *name = options[j].name;
75 if (i == argc - 1) {
76 printf("The option --%s requires an argument.\n", name);
77 return false;
78 }
79 if (options[j].int_type) {
80 (*values)[name].int_value = strtol(argv[++i], NULL, 0);
81 } else {
82 (*values)[name].char_value = argv[++i];
83 }
84 }
85 }
86 }
87 }
88
89 return true;
90}
91
92BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) {
93 BandwidthBenchmark *bench = NULL;
94
95 const char *name = values["type"].char_value;
96 size_t size = 0;
97 if (values.count("size") > 0) {
98 size = values["size"].int_value;
99 }
100 if (strcmp(name, "copy_ldrd_strd") == 0) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700101 bench = new CopyLdrdStrdBenchmark();
Christopher Ferris1348ce22013-04-11 17:50:18 -0700102 } else if (strcmp(name, "copy_ldmia_stmia") == 0) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700103 bench = new CopyLdmiaStmiaBenchmark();
Christopher Ferris65d2c782013-07-02 16:38:45 -0700104 } else if (strcmp(name, "copy_vld1_vst1") == 0) {
105 bench = new CopyVld1Vst1Benchmark();
106 } else if (strcmp(name, "copy_vldr_vstr") == 0) {
107 bench = new CopyVldrVstrBenchmark();
Christopher Ferris1348ce22013-04-11 17:50:18 -0700108 } else if (strcmp(name, "copy_vldmia_vstmia") == 0) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700109 bench = new CopyVldmiaVstmiaBenchmark();
Christopher Ferris1348ce22013-04-11 17:50:18 -0700110 } else if (strcmp(name, "memcpy") == 0) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700111 bench = new MemcpyBenchmark();
Christopher Ferris1348ce22013-04-11 17:50:18 -0700112 } else if (strcmp(name, "write_strd") == 0) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700113 bench = new WriteStrdBenchmark();
Christopher Ferris1348ce22013-04-11 17:50:18 -0700114 } else if (strcmp(name, "write_stmia") == 0) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700115 bench = new WriteStmiaBenchmark();
Christopher Ferris65d2c782013-07-02 16:38:45 -0700116 } else if (strcmp(name, "write_vst1") == 0) {
117 bench = new WriteVst1Benchmark();
118 } else if (strcmp(name, "write_vstr") == 0) {
119 bench = new WriteVstrBenchmark();
Christopher Ferris1348ce22013-04-11 17:50:18 -0700120 } else if (strcmp(name, "write_vstmia") == 0) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700121 bench = new WriteVstmiaBenchmark();
Christopher Ferris1348ce22013-04-11 17:50:18 -0700122 } else if (strcmp(name, "memset") == 0) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700123 bench = new MemsetBenchmark();
124 } else if (strcmp(name, "read_ldrd") == 0) {
125 bench = new ReadLdrdBenchmark();
126 } else if (strcmp(name, "read_ldmia") == 0) {
127 bench = new ReadLdmiaBenchmark();
Christopher Ferris65d2c782013-07-02 16:38:45 -0700128 } else if (strcmp(name, "read_vld1") == 0) {
129 bench = new ReadVld1Benchmark();
130 } else if (strcmp(name, "read_vldr") == 0) {
131 bench = new ReadVldrBenchmark();
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700132 } else if (strcmp(name, "read_vldmia") == 0) {
133 bench = new ReadVldmiaBenchmark();
134 } else {
135 printf("Unknown type name %s\n", name);
136 return NULL;
Christopher Ferris1348ce22013-04-11 17:50:18 -0700137 }
138
Ben Chengfb829a42015-11-10 17:33:40 +0800139 if (!bench->setSize(size)) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700140 printf("Failed to allocate buffers for benchmark.\n");
Yunlian Jiangd31537b2016-12-13 17:05:50 -0800141 delete bench;
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700142 return NULL;
143 }
144
145 if (values.count("num_warm_loops") > 0) {
146 bench->set_num_loops(values["num_warm_loops"].int_value);
147 }
148 if (values.count("num_loops") > 0) {
149 bench->set_num_loops(values["num_loops"].int_value);
Christopher Ferris1348ce22013-04-11 17:50:18 -0700150 }
151
152 return bench;
153}
154
155bool getAvailCpus(std::vector<int> *cpu_list) {
156 cpu_set_t cpuset;
157
158 CPU_ZERO(&cpuset);
159 if (sched_getaffinity(0, sizeof(cpuset), &cpuset) != 0) {
160 perror("sched_getaffinity failed.");
161 return false;
162 }
163
164 for (int i = 0; i < CPU_SETSIZE; i++) {
165 if (CPU_ISSET(i, &cpuset)) {
166 cpu_list->push_back(i);
167 }
168 }
169
170 return true;
171}
172
173typedef struct {
174 int core;
175 BandwidthBenchmark *bench;
176 double avg_mb;
177 volatile bool *run;
178} thread_arg_t;
179
180void *runBandwidthThread(void *data) {
181 thread_arg_t *arg = reinterpret_cast<thread_arg_t *>(data);
182
183 if (arg->core >= 0) {
184 cpu_set_t cpuset;
185 CPU_ZERO(&cpuset);
186 CPU_SET(arg->core, &cpuset);
187 if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
188 perror("sched_setaffinity failed");
189 return NULL;
190 }
191 }
192
193 // Spinloop waiting for the run variable to get set to true.
194 while (!*arg->run) {
195 }
196
197 double avg_mb = 0;
198 for (int run = 1; ; run++) {
199 arg->bench->run();
200 if (!*arg->run) {
201 // Throw away the last data point since it's possible not
202 // all of the threads are running at this point.
203 break;
204 }
205 avg_mb = (avg_mb/run) * (run-1) + arg->bench->mb_per_sec()/run;
206 }
207 arg->avg_mb = avg_mb;
208
209 return NULL;
210}
211
212bool processThreadArgs(int argc, char** argv, option_t options[],
213 arg_t *values) {
214 // Use some smaller values for the number of loops.
215 (*values)["num_warm_loops"].int_value = 1000000;
216 (*values)["num_loops"].int_value = 10000000;
217
218 if (!processBandwidthOptions(argc, argv, options, values)) {
219 return false;
220 }
221 if (values->count("size") > 0 && ((*values)["size"].int_value % 64) != 0) {
222 printf("The size values must be a multiple of 64.\n");
223 return false;
224 }
225 if (values->count("type") == 0) {
226 printf("Must specify the type value.\n");
227 return false;
228 }
229
230 BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values);
231 if (!bench) {
Christopher Ferris1348ce22013-04-11 17:50:18 -0700232 return false;
233 }
234
235 if (setpriority(PRIO_PROCESS, 0, -20)) {
236 perror("Unable to raise priority of process.");
237 return false;
238 }
239
240 printf("Calculating optimum run time...\n");
241 nsecs_t t = system_time();
242 bench->run();
243 t = system_time() - t;
244 // Since this is only going to be running single threaded, assume that
245 // if the number is set to ten times this value, we should get at least
246 // a couple of samples per thread.
247 int run_time = int((t/1000000000.0)*10 + 0.5) + 5;
248
249 (*values)["run_time"].int_value = run_time;
250 (*values)["size"].int_value = bench->size();
251 (*values)["num_warm_loops"].int_value = bench->num_warm_loops();
252 (*values)["num_loops"].int_value = bench->num_loops();
253 delete bench;
254
255 return true;
256}
257
258bool runThreadedTest(thread_arg_t args[], int num_threads, int run_time) {
259 pthread_t threads[num_threads];
260 volatile bool run = false;
261
262 int rc;
263 for (int i = 0; i < num_threads; i++) {
264 args[i].run = &run;
265 rc = pthread_create(&threads[i], NULL, runBandwidthThread,
266 (void*)&args[i]);
267 if (rc != 0) {
268 printf("Failed to launch thread %d\n", i);
269 return false;
270 }
271 }
272
273 // Kick start the threads.
274 run = true;
275
276 // Let the threads run.
277 sleep(run_time);
278
279 // Stop the threads.
280 run = false;
281
282 // Wait for the threads to complete.
283 for (int i = 0; i < num_threads; i++) {
284 rc = pthread_join(threads[i], NULL);
285 if (rc != 0) {
286 printf("Thread %d failed to join.\n", i);
287 return false;
288 }
289 printf("Thread %d: bandwidth using %s %0.2f MB/s\n", i,
290 args[i].bench->getName(), args[i].avg_mb);
291 }
292
293 return true;
294}
295
296int per_core_bandwidth(int argc, char** argv) {
297 arg_t values;
298 if (!processThreadArgs(argc, argv, per_core_opts, &values)) {
299 return -1;
300 }
301
302 std::vector<int> cpu_list;
303 if (!getAvailCpus(&cpu_list)) {
304 printf("Failed to get available cpu list.\n");
305 return -1;
306 }
307
308 thread_arg_t args[cpu_list.size()];
309
310 int i = 0;
311 for (std::vector<int>::iterator it = cpu_list.begin();
312 it != cpu_list.end(); ++it, ++i) {
313 args[i].core = *it;
314 args[i].bench = createBandwidthBenchmarkObject(values);
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700315 if (!args[i].bench) {
Yunlian Jiangf943d1b2017-02-07 19:47:08 -0800316 for (int j = 0; j < i; j++)
317 delete args[j].bench;
Christopher Ferrisf90ab5f2013-05-03 12:51:45 -0700318 return -1;
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700319 }
Christopher Ferris1348ce22013-04-11 17:50:18 -0700320 }
321
322 printf("Running on %d cores\n", cpu_list.size());
323 printf(" run_time = %ds\n", values["run_time"].int_value);
324 printf(" size = %d\n", values["size"].int_value);
325 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
326 printf(" num_loops = %d\n", values["num_loops"].int_value);
327 printf("\n");
328
329 if (!runThreadedTest(args, cpu_list.size(), values["run_time"].int_value)) {
330 return -1;
331 }
332
333 return 0;
334}
335
336int multithread_bandwidth(int argc, char** argv) {
337 arg_t values;
338 if (!processThreadArgs(argc, argv, multithread_opts, &values)) {
339 return -1;
340 }
341 if (values.count("num_threads") == 0) {
342 printf("Must specify the num_threads value.\n");
343 return -1;
344 }
345 int num_threads = values["num_threads"].int_value;
346
347 thread_arg_t args[num_threads];
348
Christopher Ferris1348ce22013-04-11 17:50:18 -0700349 for (int i = 0; i < num_threads; i++) {
350 args[i].core = -1;
351 args[i].bench = createBandwidthBenchmarkObject(values);
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700352 if (!args[i].bench) {
Yunlian Jiangf943d1b2017-02-07 19:47:08 -0800353 for (int j = 0; j < i; j++)
354 delete args[j].bench;
Christopher Ferrisf90ab5f2013-05-03 12:51:45 -0700355 return -1;
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700356 }
Christopher Ferris1348ce22013-04-11 17:50:18 -0700357 }
358
359 printf("Running %d threads\n", num_threads);
360 printf(" run_time = %ds\n", values["run_time"].int_value);
361 printf(" size = %d\n", values["size"].int_value);
362 printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value);
363 printf(" num_loops = %d\n", values["num_loops"].int_value);
364 printf("\n");
365
366 if (!runThreadedTest(args, num_threads, values["run_time"].int_value)) {
367 return -1;
368 }
369
370 return 0;
371}
372
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700373bool run_bandwidth_benchmark(int argc, char** argv, const char *name,
374 std::vector<BandwidthBenchmark*> bench_objs) {
Christopher Ferris1348ce22013-04-11 17:50:18 -0700375 arg_t values;
376 values["size"].int_value = 0;
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700377 values["num_warm_loops"].int_value = 0;
378 values["num_loops"].int_value = 0;
Christopher Ferris1348ce22013-04-11 17:50:18 -0700379 if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) {
Christopher Ferrisf90ab5f2013-05-03 12:51:45 -0700380 return false;
Christopher Ferris1348ce22013-04-11 17:50:18 -0700381 }
382
383 size_t size = values["size"].int_value;
384 if ((size % 64) != 0) {
385 printf("The size value must be a multiple of 64.\n");
Christopher Ferrisf90ab5f2013-05-03 12:51:45 -0700386 return false;
Christopher Ferris1348ce22013-04-11 17:50:18 -0700387 }
388
389 if (setpriority(PRIO_PROCESS, 0, -20)) {
390 perror("Unable to raise priority of process.");
Christopher Ferrisf90ab5f2013-05-03 12:51:45 -0700391 return false;
Christopher Ferris1348ce22013-04-11 17:50:18 -0700392 }
393
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700394 bool preamble_printed = false;
395 size_t num_warm_loops = values["num_warm_loops"].int_value;
396 size_t num_loops = values["num_loops"].int_value;
Christopher Ferris1348ce22013-04-11 17:50:18 -0700397 for (std::vector<BandwidthBenchmark*>::iterator it = bench_objs.begin();
398 it != bench_objs.end(); ++it) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700399 if (!(*it)->canRun()) {
400 continue;
401 }
Ben Chengfb829a42015-11-10 17:33:40 +0800402 if (!(*it)->setSize(values["size"].int_value)) {
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700403 printf("Failed creating buffer for bandwidth test.\n");
404 return false;
405 }
406 if (num_warm_loops) {
407 (*it)->set_num_warm_loops(num_warm_loops);
408 }
409 if (num_loops) {
410 (*it)->set_num_loops(num_loops);
411 }
412 if (!preamble_printed) {
413 preamble_printed = true;
414 printf("Benchmarking %s bandwidth\n", name);
415 printf(" size = %d\n", (*it)->size());
416 printf(" num_warm_loops = %d\n", (*it)->num_warm_loops());
417 printf(" num_loops = %d\n\n", (*it)->num_loops());
418 }
Christopher Ferris1348ce22013-04-11 17:50:18 -0700419 (*it)->run();
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700420 printf(" %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(),
Christopher Ferris1348ce22013-04-11 17:50:18 -0700421 (*it)->mb_per_sec());
422 }
423
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700424 return true;
425}
426
427int copy_bandwidth(int argc, char** argv) {
428 std::vector<BandwidthBenchmark*> bench_objs;
429 bench_objs.push_back(new CopyLdrdStrdBenchmark());
430 bench_objs.push_back(new CopyLdmiaStmiaBenchmark());
Christopher Ferris65d2c782013-07-02 16:38:45 -0700431 bench_objs.push_back(new CopyVld1Vst1Benchmark());
432 bench_objs.push_back(new CopyVldrVstrBenchmark());
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700433 bench_objs.push_back(new CopyVldmiaVstmiaBenchmark());
434 bench_objs.push_back(new MemcpyBenchmark());
435
436 if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) {
437 return -1;
438 }
439 return 0;
440}
441
442int write_bandwidth(int argc, char** argv) {
443 std::vector<BandwidthBenchmark*> bench_objs;
444 bench_objs.push_back(new WriteStrdBenchmark());
445 bench_objs.push_back(new WriteStmiaBenchmark());
Christopher Ferris65d2c782013-07-02 16:38:45 -0700446 bench_objs.push_back(new WriteVst1Benchmark());
447 bench_objs.push_back(new WriteVstrBenchmark());
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700448 bench_objs.push_back(new WriteVstmiaBenchmark());
449 bench_objs.push_back(new MemsetBenchmark());
450
451 if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) {
452 return -1;
453 }
454
455 return 0;
456}
457
458int read_bandwidth(int argc, char** argv) {
459 std::vector<BandwidthBenchmark*> bench_objs;
460 bench_objs.push_back(new ReadLdrdBenchmark());
461 bench_objs.push_back(new ReadLdmiaBenchmark());
Christopher Ferris65d2c782013-07-02 16:38:45 -0700462 bench_objs.push_back(new ReadVld1Benchmark());
463 bench_objs.push_back(new ReadVldrBenchmark());
Christopher Ferris1a3794a2013-05-02 15:12:11 -0700464 bench_objs.push_back(new ReadVldmiaBenchmark());
465
466 if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) {
467 return -1;
468 }
Christopher Ferris1348ce22013-04-11 17:50:18 -0700469 return 0;
470}