Fix percentile calculations Adjust max latency in running binderThroughputTest to improve percentile accuracy. We used to use a fixed max latency which is devided into time buckets to obtain the round trip latency percentile. The actual latency, however, can be much smaller than the max latency, which results in measurements from all iterations ending up in the same bucket and leads to inaccurate percentile measurement. This is resolved by either performing a training round (option -t), which sets the max latency to two times the worst latency during training, or by providing a max latency (option -m) in millisecond by the caller. Use --help option to see usage. Bug: 62660944 Change-Id: Iaa23837eb24b90012ae20c35f20caae33986b35a Fixes: 62660944 Test: Running "binderThroughputTest -p -w 2 -i 10000 -s 0" on marlin-3.18 used to show the same percentile values for 50%, 90%, 95%, and 99%. Now with -t flag,percentile values are different. Signed-off-by: Sherry Yang <sherryy@google.com>

commit: c88f8bb54a39902206e7353dcd1a12bb5f07ddb2 [log] [tgz]
author: Sherry Yang <sherryy@google.com> Thu Jun 15 11:32:06 2017 -0700
committer: Sherry Yang <sherryy@google.com> Tue Jun 27 10:15:39 2017 -0700
tree: a8bbb3ddba9a7d3d013c5563f382fca148abc5f3
parent: c089ce812e4db0c8c7a70096cbe71c009cc9710d [diff] [blame]
diff --git a/libs/binder/tests/binderThroughputTest.cpp b/libs/binder/tests/binderThroughputTest.cpp
index 6e8f7df..455f2c4 100644
--- a/libs/binder/tests/binderThroughputTest.cpp
+++ b/libs/binder/tests/binderThroughputTest.cpp

@@ -101,18 +101,21 @@
 };
 
 static const uint32_t num_buckets = 128;
-static const uint64_t max_time_bucket = 50ull * 1000000;
-static const uint64_t time_per_bucket = max_time_bucket / num_buckets;
-static constexpr float time_per_bucket_ms = time_per_bucket / 1.0E6;
+static uint64_t max_time_bucket = 50ull * 1000000;
+static uint64_t time_per_bucket = max_time_bucket / num_buckets;
 
 struct ProcResults {
-    uint64_t m_best = max_time_bucket;
     uint64_t m_worst = 0;
     uint32_t m_buckets[num_buckets] = {0};
     uint64_t m_transactions = 0;
+    uint64_t m_long_transactions = 0;
     uint64_t m_total_time = 0;
+    uint64_t m_best = max_time_bucket;
 
     void add_time(uint64_t time) {
+        if (time > max_time_bucket) {
+            m_long_transactions++;
+        }
         m_buckets[min(time, max_time_bucket-1) / time_per_bucket] += 1;
         m_best = min(time, m_best);
         m_worst = max(time, m_worst);
@@ -127,16 +130,24 @@
         ret.m_worst = max(a.m_worst, b.m_worst);
         ret.m_best = min(a.m_best, b.m_best);
         ret.m_transactions = a.m_transactions + b.m_transactions;
+        ret.m_long_transactions = a.m_long_transactions + b.m_long_transactions;
         ret.m_total_time = a.m_total_time + b.m_total_time;
         return ret;
     }
     void dump() {
+        if (m_long_transactions > 0) {
+            cout << (double)m_long_transactions / m_transactions << "% of transactions took longer "
+                "than estimated max latency. Consider setting -m to be higher than "
+                 << m_worst / 1000 << " microseconds" << endl;
+        }
+
         double best = (double)m_best / 1.0E6;
         double worst = (double)m_worst / 1.0E6;
         double average = (double)m_total_time / m_transactions / 1.0E6;
         cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl;
 
         uint64_t cur_total = 0;
+        float time_per_bucket_ms = time_per_bucket / 1.0E6;
         for (int i = 0; i < num_buckets; i++) {
             float cur_time = time_per_bucket_ms * i + 0.5f * time_per_bucket_ms;
             if ((cur_total < 0.5f * m_transactions) && (cur_total + m_buckets[i] >= 0.5f * m_transactions)) {
@@ -154,7 +165,6 @@
             cur_total += m_buckets[i];
         }
         cout << endl;
-
     }
 };
 
@@ -166,13 +176,12 @@
     return serviceName;
 }
 
-void worker_fx(
-    int num,
-    int worker_count,
-    int iterations,
-    int payload_size,
-    bool cs_pair,
-    Pipe p)
+void worker_fx(int num,
+               int worker_count,
+               int iterations,
+               int payload_size,
+               bool cs_pair,
+               Pipe p)
 {
     // Create BinderWorkerService and for go.
     ProcessState::self()->startThreadPool();
@@ -204,12 +213,12 @@
     for (int i = 0; (!cs_pair || num >= server_count) && i < iterations; i++) {
         Parcel data, reply;
         int target = cs_pair ? num % server_count : rand() % workers.size();
-	int sz = payload_size;
+        int sz = payload_size;
 
-	while (sz > sizeof(uint32_t)) {
-		data.writeInt32(0);
-		sz -= sizeof(uint32_t);
-	}
+        while (sz > sizeof(uint32_t)) {
+            data.writeInt32(0);
+            sz -= sizeof(uint32_t);
+        }
         start = chrono::high_resolution_clock::now();
         status_t ret = workers[target]->transact(BINDER_NOP, data, &reply);
         end = chrono::high_resolution_clock::now();
@@ -264,47 +273,19 @@
     }
 }
 
-int main(int argc, char *argv[])
+void run_main(int iterations,
+              int workers,
+              int payload_size,
+              int cs_pair,
+              bool training_round=false)
 {
-    int workers = 2;
-    int iterations = 10000;
-    int payload_size = 0;
-    bool cs_pair = false;
-    (void)argc;
-    (void)argv;
     vector<Pipe> pipes;
-
-    // Parse arguments.
-    for (int i = 1; i < argc; i++) {
-        if (string(argv[i]) == "-w") {
-            workers = atoi(argv[i+1]);
-            i++;
-            continue;
-        }
-        if (string(argv[i]) == "-i") {
-            iterations = atoi(argv[i+1]);
-            i++;
-            continue;
-        }
-        if (string(argv[i]) == "-s") {
-            payload_size = atoi(argv[i+1]);
-	    i++;
-	}
-        if (string(argv[i]) == "-p") {
-		// client/server pairs instead of spreading
-		// requests to all workers. If true, half
-		// the workers become clients and half servers
-		cs_pair = true;
-	}
-    }
-
     // Create all the workers and wait for them to spawn.
     for (int i = 0; i < workers; i++) {
         pipes.push_back(make_worker(i, iterations, workers, payload_size, cs_pair));
     }
     wait_all(pipes);
 
-
     // Run the workers and wait for completion.
     chrono::time_point<chrono::high_resolution_clock> start, end;
     cout << "waiting for workers to complete" << endl;
@@ -326,7 +307,6 @@
         pipes[i].recv(tmp_results);
         tot_results = ProcResults::combine(tot_results, tmp_results);
     }
-    tot_results.dump();
 
     // Kill all the workers.
     cout << "killing workers" << endl;
@@ -338,5 +318,83 @@
             cout << "nonzero child status" << status << endl;
         }
     }
+    if (training_round) {
+        // sets max_time_bucket to 2 * m_worst from the training round.
+        // Also needs to adjust time_per_bucket accordingly.
+        max_time_bucket = 2 * tot_results.m_worst;
+        time_per_bucket = max_time_bucket / num_buckets;
+        cout << "Max latency during training: " << tot_results.m_worst / 1.0E6 << "ms" << endl;
+    } else {
+            tot_results.dump();
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    int workers = 2;
+    int iterations = 10000;
+    int payload_size = 0;
+    bool cs_pair = false;
+    bool training_round = false;
+    (void)argc;
+    (void)argv;
+
+    // Parse arguments.
+    for (int i = 1; i < argc; i++) {
+        if (string(argv[i]) == "--help") {
+            cout << "Usage: binderThroughputTest [OPTIONS]" << endl;
+            cout << "\t-i N    : Specify number of iterations." << endl;
+            cout << "\t-m N    : Specify expected max latency in microseconds." << endl;
+            cout << "\t-p      : Split workers into client/server pairs." << endl;
+            cout << "\t-s N    : Specify payload size." << endl;
+            cout << "\t-t N    : Run training round." << endl;
+            cout << "\t-w N    : Specify total number of workers." << endl;
+            return 0;
+        }
+        if (string(argv[i]) == "-w") {
+            workers = atoi(argv[i+1]);
+            i++;
+            continue;
+        }
+        if (string(argv[i]) == "-i") {
+            iterations = atoi(argv[i+1]);
+            i++;
+            continue;
+        }
+        if (string(argv[i]) == "-s") {
+            payload_size = atoi(argv[i+1]);
+            i++;
+        }
+        if (string(argv[i]) == "-p") {
+            // client/server pairs instead of spreading
+            // requests to all workers. If true, half
+            // the workers become clients and half servers
+            cs_pair = true;
+        }
+        if (string(argv[i]) == "-t") {
+            // Run one training round before actually collecting data
+            // to get an approximation of max latency.
+            training_round = true;
+        }
+        if (string(argv[i]) == "-m") {
+            // Caller specified the max latency in microseconds.
+            // No need to run training round in this case.
+            if (atoi(argv[i+1]) > 0) {
+                max_time_bucket = strtoull(argv[i+1], (char **)NULL, 10) * 1000;
+                i++;
+            } else {
+                cout << "Max latency -m must be positive." << endl;
+                exit(EXIT_FAILURE);
+            }
+        }
+    }
+
+    if (training_round) {
+        cout << "Start training round" << endl;
+        run_main(iterations, workers, payload_size, cs_pair, training_round=true);
+        cout << "Completed training round" << endl << endl;
+    }
+
+    run_main(iterations, workers, payload_size, cs_pair);
     return 0;
 }
commit	c88f8bb54a39902206e7353dcd1a12bb5f07ddb2	[log] [tgz]
author	Sherry Yang <sherryy@google.com>	Thu Jun 15 11:32:06 2017 -0700
committer	Sherry Yang <sherryy@google.com>	Tue Jun 27 10:15:39 2017 -0700
tree	a8bbb3ddba9a7d3d013c5563f382fca148abc5f3
parent	c089ce812e4db0c8c7a70096cbe71c009cc9710d [diff] [blame]