Crypto performance benchmark

This reverts commit 39087e3740095e1851b0fcf1988c1c0881a208b6. And adds
the respective fix.

Change-Id: I1f70211c77f892c6252cf5e05e5e52d6c63e2293
diff --git a/crypto-perf/crypto.cpp b/crypto-perf/crypto.cpp
new file mode 100644
index 0000000..2cd2709
--- /dev/null
+++ b/crypto-perf/crypto.cpp
@@ -0,0 +1,170 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sched.h>
+#include <sys/resource.h>
+#include <ctype.h>
+#define USEC_PER_SEC 1000000ULL
+#define MAX_COUNT 1000000000ULL
+#define NUM_INSTS_GARBAGE 18
+
+// Contains information about benchmark options.
+typedef struct {
+    int cpu_to_lock;
+    int locked_freq;
+} command_data_t;
+
+void usage() {
+    printf("--------------------------------------------------------------------------------\n");
+    printf("Usage:");
+    printf("	crypto [--cpu_to_lock CPU] [--locked_freq FREQ_IN_KHZ]\n\n");
+    printf("!!!!!!Lock the desired core to a desired frequency before invoking this benchmark.\n");
+    printf(
+          "Hint: Set scaling_max_freq=scaling_min_freq=FREQ_IN_KHZ. FREQ_IN_KHZ "
+          "can be obtained from scaling_available_freq\n");
+    printf("--------------------------------------------------------------------------------\n");
+}
+
+int processOptions(int argc, char **argv, command_data_t *cmd_data) {
+    // Initialize the command_flags.
+    cmd_data->cpu_to_lock = 0;
+    cmd_data->locked_freq = 1;
+    for (int i = 1; i < argc; i++) {
+        if (argv[i][0] == '-') {
+            int *save_value = NULL;
+            if (strcmp(argv[i], "--cpu_to_lock") == 0) {
+                save_value = &cmd_data->cpu_to_lock;
+	    } else if (strcmp(argv[i], "--locked_freq") == 0) {
+                save_value = &cmd_data->locked_freq;
+            } else {
+                printf("Unknown option %s\n", argv[i]);
+                return -1;
+            }
+            if (save_value) {
+                // Checking both characters without a strlen() call should be
+                // safe since as long as the argument exists, one character will
+                // be present (\0). And if the first character is '-', then
+                // there will always be a second character (\0 again).
+                if (i == argc - 1 ||
+                    (argv[i + 1][0] == '-' && !isdigit(argv[i + 1][1]))) {
+                    printf("The option %s requires one argument.\n", argv[i]);
+                    return -1;
+                }
+                *save_value = (int)strtol(argv[++i], NULL, 0);
+            }
+	}
+    }
+    return 0;
+}
+/* Performs encryption on garbage values. In Cortex-A57 r0p1 and later
+ * revisions, pairs of dependent AESE/AESMC and AESD/AESIMC instructions are
+ * higher performance when adjacent, and in the described order below. */
+void garbage_encrypt() {
+    __asm__ __volatile__(
+	"aese  v0.16b, v4.16b ;"
+        "aesmc	v0.16b, v0.16b ;"
+        "aese  v1.16b, v4.16b ;"
+        "aesmc	v1.16b, v1.16b ;"
+        "aese  v2.16b, v4.16b ;"
+        "aesmc	v2.16b, v2.16b ;"
+        "aese  v0.16b, v5.16b ;"
+        "aesmc	v0.16b, v0.16b ;"
+        "aese  v1.16b, v5.16b ;"
+        "aesmc	v1.16b, v1.16b ;"
+        "aese  v2.16b, v5.16b ;"
+        "aesmc	v2.16b, v2.16b ;"
+        "aese  v0.16b, v6.16b ;"
+        "aesmc	v0.16b, v0.16b ;"
+        "aese  v1.16b, v6.16b ;"
+        "aesmc	v1.16b, v1.16b ;"
+        "aese  v2.16b, v6.16b ;"
+        "aesmc	v2.16b, v2.16b ;");
+}
+
+void garbage_decrypt() {
+    __asm__ __volatile__(
+	"aesd  v0.16b, v4.16b ;"
+        "aesimc	v0.16b, v0.16b ;"
+        "aesd  v1.16b, v4.16b ;"
+        "aesimc	v1.16b, v1.16b ;"
+        "aesd  v2.16b, v4.16b ;"
+        "aesimc	v2.16b, v2.16b ;"
+        "aesd  v0.16b, v5.16b ;"
+        "aesimc	v0.16b, v0.16b ;"
+        "aesd  v1.16b, v5.16b ;"
+        "aesimc	v1.16b, v1.16b ;"
+        "aesd  v2.16b, v5.16b ;"
+        "aesimc	v2.16b, v2.16b ;"
+        "aesd  v0.16b, v6.16b ;"
+        "aesimc	v0.16b, v0.16b ;"
+        "aesd  v1.16b, v6.16b ;"
+        "aesimc	v1.16b, v1.16b ;"
+        "aesd  v2.16b, v6.16b ;"
+        "aesimc	v2.16b, v2.16b ;");
+}
+
+
+int main(int argc, char **argv) {
+    usage();
+    command_data_t cmd_data;
+
+    if(processOptions(argc, argv, &cmd_data) == -1) {
+        usage();
+        return -1;
+    }
+    unsigned long long count = 0;
+    struct timeval begin_time, end_time, elapsed_time;
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+    CPU_SET(cmd_data.cpu_to_lock, &cpuset);
+    if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+	perror("sched_setaffinity failed");
+	return false;
+    }
+    gettimeofday(&begin_time, NULL);
+    while (count < MAX_COUNT) {
+      garbage_encrypt();
+      count++;
+    }
+    gettimeofday(&end_time, NULL);
+    timersub(&end_time, &begin_time, &elapsed_time);
+    fprintf(stderr, "encrypt: %llu us\n",
+            elapsed_time.tv_sec * USEC_PER_SEC + elapsed_time.tv_usec);
+    fprintf(stderr, "encrypt instructions: %llu\n",
+            MAX_COUNT * NUM_INSTS_GARBAGE);
+    fprintf(stderr, "encrypt instructions per second: %f\n",
+            (float)(MAX_COUNT * NUM_INSTS_GARBAGE * USEC_PER_SEC) /
+                (elapsed_time.tv_sec * USEC_PER_SEC + elapsed_time.tv_usec));
+    if (cmd_data.locked_freq != 0) {
+	fprintf(stderr, "encrypt instructions per cycle: %f\n",
+		(float)(MAX_COUNT * NUM_INSTS_GARBAGE * USEC_PER_SEC) /
+		((elapsed_time.tv_sec * USEC_PER_SEC + elapsed_time.tv_usec) *
+		 1000 * cmd_data.locked_freq));
+    }
+    printf("--------------------------------------------------------------------------------\n");
+
+    count = 0;
+    gettimeofday(&begin_time, NULL);
+    while (count < MAX_COUNT) {
+      garbage_decrypt();
+      count++;
+    }
+    gettimeofday(&end_time, NULL);
+    timersub(&end_time, &begin_time, &elapsed_time);
+    fprintf(stderr, "decrypt: %llu us\n",
+            elapsed_time.tv_sec * USEC_PER_SEC + elapsed_time.tv_usec);
+    fprintf(stderr, "decrypt instructions: %llu\n",
+            MAX_COUNT * NUM_INSTS_GARBAGE);
+    fprintf(stderr, "decrypt instructions per second: %f\n",
+            (float)(MAX_COUNT * NUM_INSTS_GARBAGE * USEC_PER_SEC) /
+                (elapsed_time.tv_sec * USEC_PER_SEC + elapsed_time.tv_usec));
+    if (cmd_data.locked_freq != 0) {
+	fprintf(stderr, "decrypt instructions per cycle: %f\n",
+		(float)(MAX_COUNT * NUM_INSTS_GARBAGE * USEC_PER_SEC) /
+		((elapsed_time.tv_sec * USEC_PER_SEC + elapsed_time.tv_usec) *
+		 1000 * cmd_data.locked_freq));
+    }
+    return 0;
+}