diff --git a/ANRdaemon/ANRdaemon.cpp b/ANRdaemon/ANRdaemon.cpp
index 35f2ecb..c33414c 100644
--- a/ANRdaemon/ANRdaemon.cpp
+++ b/ANRdaemon/ANRdaemon.cpp
@@ -134,7 +134,7 @@
 
     if ((fp = fopen("/proc/stat", "r")) == NULL) {
         err = true;
-        sprintf(err_msg, "can't read from /proc/stat with errno %d", errno);
+        snprintf(err_msg, sizeof(err_msg), "can't read from /proc/stat with errno %d", errno);
     } else {
         if (fscanf(fp, params, &cpu->utime, &cpu->ntime,
                 &cpu->stime, &cpu->itime, &cpu->iowtime, &cpu->irqtime,
@@ -144,6 +144,7 @@
              * is_heavy_loaded() will return false.
              */
             ALOGE("Error in getting cpu status. Skipping this check.");
+            fclose(fp);
             return;
         }
 
@@ -157,7 +158,7 @@
 /*
  * Calculate cpu usage in the past interval.
  * If tracing is on, increase the idle threshold by 1.00% so that we do not
- * turn on and off tracing frequently whe the cpu load is right close to
+ * turn on and off tracing frequently when the cpu load is right close to
  * threshold.
  */
 static bool is_heavy_load(void) {
@@ -192,7 +193,7 @@
     int fd = open(path, O_WRONLY);
     if (fd == -1) {
         err = true;
-        sprintf(err_msg, "Can't open %s. Error: %d", path, errno);
+        snprintf(err_msg, sizeof(err_msg), "Can't open %s. Error: %d", path, errno);
         return -1;
     }
     const char* control = (enable?"1":"0");
@@ -205,7 +206,7 @@
         }
 
         err = true;
-        sprintf(err_msg, "Error %d in writing to %s.", errno, path);
+        snprintf(err_msg, sizeof(err_msg), "Error %d in writing to %s.", errno, path);
     }
     close(fd);
     return (err?-1:0);
@@ -216,16 +217,16 @@
  */
 static void dfs_set_property(uint64_t mtag, const char* mapp, bool enable) {
     char buf[64];
-    snprintf(buf, 64, "%#" PRIx64, mtag);
+    snprintf(buf, sizeof(buf), "%#" PRIx64, mtag);
     if (property_set(dfs_tags_property, buf) < 0) {
         err = true;
-        sprintf(err_msg, "Failed to set debug tags system properties.");
+        snprintf(err_msg, sizeof(err_msg), "Failed to set debug tags system properties.");
     }
 
     if (strlen(mapp) > 0
             && property_set(dfs_apps_property, mapp) < 0) {
         err = true;
-        sprintf(err_msg, "Failed to set debug applications.");
+        snprintf(err_msg, sizeof(err_msg), "Failed to set debug applications.");
     }
 
     if (log_sched) {
@@ -403,13 +404,13 @@
     int fd = open(dfs_buffer_size_path, O_WRONLY);
     if (fd == -1) {
         err = true;
-        sprintf(err_msg, "Can't open atrace buffer size file under /d/tracing.");
+        snprintf(err_msg, sizeof(err_msg), "Can't open atrace buffer size file under /d/tracing.");
         return -1;
     }
     ssize_t len = strlen(buf_size_kb);
     if (write(fd, buf_size_kb, len) != len) {
         err = true;
-        sprintf(err_msg, "Error in writing to atrace buffer size file.");
+        snprintf(err_msg, sizeof(err_msg), "Error in writing to atrace buffer size file.");
     }
     close(fd);
     return (err?-1:0);
diff --git a/cppreopts/cppreopts.sh b/cppreopts/cppreopts.sh
index 8798206..9f21ac7 100644
--- a/cppreopts/cppreopts.sh
+++ b/cppreopts/cppreopts.sh
@@ -19,20 +19,20 @@
 
 # Helper function to copy files
 function do_copy() {
-  odex_file=$1
+  source_file=$1
   dest_name=$2
   # Move to a temporary file so we can do a rename and have the preopted file
   # appear atomically in the filesystem.
   temp_dest_name=${dest_name}.tmp
-  if ! cp ${odex_file} ${temp_dest_name} ; then
-    log -p w -t cppreopts "Unable to copy odex file ${odex_file} to ${temp_dest_name}!"
+  if ! cp ${source_file} ${temp_dest_name} ; then
+    log -p w -t cppreopts "Unable to copy file ${source_file} to ${temp_dest_name}!"
   else
-    log -p i -t cppreopts "Copied odex file from ${odex_file} to ${temp_dest_name}"
+    log -p i -t cppreopts "Copied file from ${source_file} to ${temp_dest_name}"
     sync
     if ! mv ${temp_dest_name} ${dest_name} ; then
-      log -p w -t cppreopts "Unable to rename temporary odex file from ${temp_dest_name} to ${dest_name}"
+      log -p w -t cppreopts "Unable to rename temporary file from ${temp_dest_name} to ${dest_name}"
     else
-      log -p i -t cppreopts "Renamed temporary odex file from ${temp_dest_name} to ${dest_name}"
+      log -p i -t cppreopts "Renamed temporary file from ${temp_dest_name} to ${dest_name}"
     fi
   fi
 }
@@ -42,23 +42,23 @@
   mountpoint=$1
 
   if ! test -f ${mountpoint}/system-other-odex-marker ; then
-    log -p i -t cppreopts "system_other partition does not appear have been built to contain preopted files."
+    log -p i -t cppreopts "system_other partition does not appear to have been built to contain preopted files."
     exit 1
   fi
 
   log -p i -t cppreopts "cppreopts from ${mountpoint}"
-  # For each odex file do the copy task
+  # For each odex and vdex file do the copy task
   # NOTE: this implementation will break in any path with spaces to favor
   # background copy tasks
-  for odex_file in $(find ${mountpoint} -type f -name "*.odex"); do
-    real_odex_name=${odex_file/${mountpoint}/\/system}
-    dest_name=$(preopt2cachename ${real_odex_name})
+  for file in $(find ${mountpoint} -type f -name "*.odex" -o -type f -name "*.vdex"); do
+    real_name=${file/${mountpoint}/\/system}
+    dest_name=$(preopt2cachename ${real_name})
     if ! test $? -eq 0 ; then
-      log -p i -t cppreopts "Unable to figure out destination for ${odex_file}"
+      log -p i -t cppreopts "Unable to figure out destination for ${file}"
       continue
     fi
     # Copy files in background to speed things up
-    do_copy ${odex_file} ${dest_name} &
+    do_copy ${file} ${dest_name} &
   done
   # Wait for jobs to finish
   wait
diff --git a/ext4_utils/allocate.c b/ext4_utils/allocate.c
index 00f2203..28fc8e5 100644
--- a/ext4_utils/allocate.c
+++ b/ext4_utils/allocate.c
@@ -234,6 +234,18 @@
 	for (i = 0; i < num_blocks; i++, block--)
 		bg->block_bitmap[block / 8] &= ~(1 << (block % 8));
 	bg->free_blocks += num_blocks;
+	for (i = bg->chunk_count; i > 0 ;) {
+		--i;
+		if (bg->chunks[i].len >= num_blocks && bg->chunks[i].block <= block) {
+			if (bg->chunks[i].block == block) {
+				bg->chunks[i].block += num_blocks;
+				bg->chunks[i].len -= num_blocks;
+			} else if (bg->chunks[i].block + bg->chunks[i].len - 1 == block + num_blocks) {
+				bg->chunks[i].len -= num_blocks;
+			}
+			break;
+		}
+	}
 }
 
 /* Reduces an existing allocation by len blocks by return the last blocks
diff --git a/ext4_utils/ext4_crypt.cpp b/ext4_utils/ext4_crypt.cpp
index d594a48..890082e 100644
--- a/ext4_utils/ext4_crypt.cpp
+++ b/ext4_utils/ext4_crypt.cpp
@@ -39,11 +39,11 @@
 #define EXT4_KEY_DESCRIPTOR_SIZE_HEX 17
 
 struct ext4_encryption_policy {
-    char version;
-    char contents_encryption_mode;
-    char filenames_encryption_mode;
-    char flags;
-    char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
+    uint8_t version;
+    uint8_t contents_encryption_mode;
+    uint8_t filenames_encryption_mode;
+    uint8_t flags;
+    uint8_t master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
 } __attribute__((__packed__));
 
 #define EXT4_ENCRYPTION_MODE_AES_256_XTS    1
diff --git a/memcpy-perf/memcpy-perf.cpp b/memcpy-perf/memcpy-perf.cpp
index 20d060b..2dfd900 100644
--- a/memcpy-perf/memcpy-perf.cpp
+++ b/memcpy-perf/memcpy-perf.cpp
@@ -7,14 +7,20 @@
 #include <memory>
 #include <cmath>
 #include <string>
+#include <thread>
+
+#define CACHE_HIT_SIZE 1 << 17
 
 using namespace std;
 
-const size_t size_start = 64;
-const size_t size_end = 16 * (1ull << 20);
-const size_t samples = 2048;
+size_t size_start = 64;
+size_t size_end = 16 * (1ull << 20);
+size_t samples = 2048;
 size_t size_per_test = 64 * (1ull << 20);
 size_t tot_sum = 0;
+size_t delay = 0;
+float speed = 0;
+bool dummy = false;
 
 void __attribute__((noinline)) memcpy_noinline(void *dst, void *src, size_t size);
 void __attribute__((noinline)) memset_noinline(void *dst, int value, size_t size);
@@ -26,21 +32,64 @@
     SumBench,
 };
 
+static void usage(char* p) {
+    printf("Usage: %s <test> <options>\n"
+           "<test> is one of the following:\n"
+           "  --memcpy\n"
+           "  --memset\n"
+           "  --sum\n"
+           "<options> are optional and apply to all tests:\n"
+           "  --dummy\n"
+           "    Simulates cpu-only load of a test. Guaranteed to use L2\n"
+           "    instead.  Not supported on --sum test.\n"
+           "  --delay DELAY_DIVISOR\n"
+           "  --start START_SIZE_MB\n"
+           "    --end END_SIZE_MB (requires start, optional)\n"
+           "  --samples NUM_SAMPLES\n"
+           , p);
+}
+
 int main(int argc, char *argv[])
 {
-    BenchType type;
+    BenchType type = MemcpyBench;
     if (argc <= 1) {
-        cerr << "memcpy_perf [--memcpy|--memset|--sum]" << endl;
+        usage(argv[0]);
         return 0;
     }
-    if (string(argv[1]) == string("--memcpy")) {
-        type = MemcpyBench;
-    } else if (string(argv[1]) == string("--memset")) {
-        type = MemsetBench;
-    } else if (string(argv[1]) == string("--sum")) {
-        type = SumBench;
-    } else {
-        type = MemcpyBench;
+    for (int i = 1; i < argc; i++) {
+      if (string(argv[i]) == string("--memcpy")) {
+         type = MemcpyBench;
+      } else if (string(argv[i]) == string("--memset")) {
+         type = MemsetBench;
+      } else if (string(argv[i]) == string("--sum")) {
+         type = SumBench;
+      } else if (string(argv[i]) == string("--dummy")) {
+         dummy = true;
+      } else if (i + 1 < argc) {
+          if (string(argv[i]) == string("--delay")) {
+             delay = atoi(argv[++i]);
+          } else if (string(argv[i]) == string("--start")) {
+             size_start = atoi(argv[++i]) * (1ull << 20);
+             size_end = size_start;
+          } else if (string(argv[i]) == string("--end")) {
+             size_t end = atoi(argv[++i]) * (1ull << 20);
+             if (end > size_start && i > 3
+                 && string(argv[i-3]) == string("--start")) {
+                 size_end = end;
+             } else {
+                 printf("Cannot specify --end without --start.\n");
+                 return 0;
+             }
+          } else if (string(argv[i]) == string("--samples")) {
+             samples = atoi(argv[++i]);
+          } else {
+             printf("Unknown argument %s\n", argv[i]);
+             return 0;
+          }
+       } else {
+          printf("The %s option requires a single argument.\n", argv[i]);
+          return 0;
+       }
     }
 
     unique_ptr<uint8_t[]> src(new uint8_t[size_end]);
@@ -54,8 +103,10 @@
     //cout << "src: " << (uintptr_t)src.get() << endl;
     //cout << "dst: " <<  (uintptr_t)dst.get() << endl;
 
-    for (double cur_pow = start_pow; cur_pow <= end_pow; cur_pow += pow_inc) {
-        chrono::time_point<chrono::high_resolution_clock> copy_start, copy_end;
+    for (double cur_pow = start_pow; cur_pow <= end_pow && samples > 0;
+            cur_pow += pow_inc) {
+        chrono::time_point<chrono::high_resolution_clock>
+            copy_start, copy_end, pre_wait;
 
         size_t cur_size = (size_t)pow(10.0, cur_pow);
         size_t iter_per_size = size_per_test / cur_size;
@@ -65,9 +116,21 @@
             case MemsetBench: {
                 memcpy_noinline(src.get(), dst.get(), cur_size);
                 memset_noinline(dst.get(), 0xdeadbeef, cur_size);
+                size_t hit_size = CACHE_HIT_SIZE;
                 copy_start = chrono::high_resolution_clock::now();
                 for (int i = 0; i < iter_per_size; i++) {
-                    memset_noinline(dst.get(), 0xdeadbeef, cur_size);
+                    if (!dummy) {
+                        memset_noinline(dst.get(), 0xdeadbeef, cur_size);
+                    } else {
+                        while (hit_size < cur_size) {
+                            memset_noinline
+                                (dst.get(), 0xdeadbeef, CACHE_HIT_SIZE);
+                            hit_size += 1 << 17;
+                        }
+                    }
+                    if (delay != 0)
+                        this_thread::sleep_for(chrono
+                            ::nanoseconds(size_per_test / delay));
                 }
                 copy_end = chrono::high_resolution_clock::now();
                 break;
@@ -75,9 +138,21 @@
             case MemcpyBench: {
                 memcpy_noinline(dst.get(), src.get(), cur_size);
                 memcpy_noinline(src.get(), dst.get(), cur_size);
+                size_t hit_size = CACHE_HIT_SIZE;
                 copy_start = chrono::high_resolution_clock::now();
                 for (int i = 0; i < iter_per_size; i++) {
-                    memcpy_noinline(dst.get(), src.get(), cur_size);
+                    if (!dummy) {
+                        memcpy_noinline(dst.get(), src.get(), cur_size);
+                    } else {
+                        while (hit_size < cur_size) {
+                            memcpy_noinline
+                                (dst.get(), src.get(), CACHE_HIT_SIZE);
+                            hit_size += CACHE_HIT_SIZE;
+                        }
+                    }
+                    if (delay != 0)
+                        this_thread::sleep_for(chrono
+                            ::nanoseconds(size_per_test / delay));
                 }
                 copy_end = chrono::high_resolution_clock::now();
                 break;
@@ -88,6 +163,9 @@
                 copy_start = chrono::high_resolution_clock::now();
                 for (int i = 0; i < iter_per_size; i++) {
                     s += sum(src.get(), cur_size);
+                    if (delay != 0)
+                        this_thread::sleep_for(chrono
+                            ::nanoseconds(size_per_test / delay));
                 }
                 copy_end = chrono::high_resolution_clock::now();
                 tot_sum += s;
@@ -95,11 +173,18 @@
             }
         }
 
+        samples--;
         double ns_per_copy = chrono::duration_cast<chrono::nanoseconds>(copy_end - copy_start).count() / double(iter_per_size);
         double gb_per_sec = ((double)cur_size / (1ull<<30)) / (ns_per_copy / 1.0E9);
         if (type == MemcpyBench)
             gb_per_sec *= 2.0;
-        cout << "size: " << cur_size << ", perf: " << gb_per_sec << "GB/s, iter: " << iter_per_size << endl;
+        double percent_waiting = 0;
+        if (delay != 0) {
+            percent_waiting = (size_per_test / delay) / ns_per_copy * 100;
+        }
+        cout << "size: " << cur_size << ", perf: " << gb_per_sec
+             << "GB/s, iter: " << iter_per_size << ", \% time spent waiting: "
+             << percent_waiting << endl;
     }
     return 0;
 }
diff --git a/preopt2cachename/preopt2cachename.cpp b/preopt2cachename/preopt2cachename.cpp
index dfdc63f..f9a12ff 100644
--- a/preopt2cachename/preopt2cachename.cpp
+++ b/preopt2cachename/preopt2cachename.cpp
@@ -24,37 +24,38 @@
 #endif
 
 static const char* kDalvikCacheDir = "/data/dalvik-cache/";
-static const char* kCacheSuffix = "@classes.dex";
+static const char* kOdexCacheSuffix = "@classes.dex";
+static const char* kVdexCacheSuffix = "@classes.vdex";
 
-// Returns the ISA extracted from the odex_file_location.
-// odex_file_location is formatted like /system/app/<app_name>/oat/<isa>/<app_name>.odex for all
-// functions. We return an empty string "" in error cases.
-static std::string ExtractISA(const std::string& odex_file_location) {
-  std::vector<std::string> split_file_location = android::base::Split(odex_file_location, "/");
+// Returns the ISA extracted from the file_location.
+// file_location is formatted like /system/app/<app_name>/oat/<isa>/<app_name>.{odex,vdex}
+// for all functions. We return an empty string "" in error cases.
+static std::string ExtractISA(const std::string& file_location) {
+  std::vector<std::string> split_file_location = android::base::Split(file_location, "/");
   if (split_file_location.size() <= 1) {
     return "";
   } else if (split_file_location.size() != 7) {
-    LOG(WARNING) << "Unexpected length for odex-file-location. We expected 7 segments but found "
+    LOG(WARNING) << "Unexpected length for file-location. We expected 7 segments but found "
                  << split_file_location.size();
   }
   return split_file_location[split_file_location.size() - 2];
 }
 
-// Returns the apk name extracted from the odex_file_location.
-// odex_file_location is formatted like /system/app/<app_name>/oat/<isa>/<app_name>.odex. We return
-// the final <app_name> with the .odex replaced with .apk.
-static std::string ExtractAPKName(const std::string& odex_file_location) {
+// Returns the apk name extracted from the file_location.
+// file_location is formatted like /system/app/<app_name>/oat/<isa>/<app_name>.{odex,vdex}.
+// We return the final <app_name> with the .{odex,vdex} replaced with .apk.
+static std::string ExtractAPKName(const std::string& file_location) {
   // Find and copy filename.
-  size_t file_location_start = odex_file_location.rfind('/');
+  size_t file_location_start = file_location.rfind('/');
   if (file_location_start == std::string::npos) {
     return "";
   }
-  size_t ext_start = odex_file_location.rfind('.');
+  size_t ext_start = file_location.rfind('.');
   if (ext_start == std::string::npos || ext_start < file_location_start) {
     return "";
   }
-  std::string apk_name = odex_file_location.substr(file_location_start + 1,
-                                                   ext_start - file_location_start);
+  std::string apk_name = file_location.substr(file_location_start + 1,
+                                              ext_start - file_location_start);
 
   // Replace extension with .apk.
   apk_name += "apk";
@@ -62,18 +63,18 @@
 }
 
 // The cache file name is /data/dalvik-cache/<isa>/ prior to this function
-static bool OdexFilenameToCacheFile(const std::string& odex_file_location,
-                                    /*in-out*/std::string& cache_file) {
-  // Skip the first '/' in odex_file_location.
-  size_t initial_position = odex_file_location[0] == '/' ? 1 : 0;
-  size_t apk_position = odex_file_location.find("/oat", initial_position);
+static bool SystemBFilenameToCacheFile(const std::string& file_location,
+                                       /*in-out*/std::string& cache_file) {
+  // Skip the first '/' in file_location.
+  size_t initial_position = file_location[0] == '/' ? 1 : 0;
+  size_t apk_position = file_location.find("/oat", initial_position);
   if (apk_position == std::string::npos) {
     LOG(ERROR) << "Unable to find oat directory!";
     return false;
   }
 
   size_t cache_file_position = cache_file.size();
-  cache_file += odex_file_location.substr(initial_position, apk_position);
+  cache_file += file_location.substr(initial_position, apk_position);
   // '/' -> '@' up to where the apk would be.
   cache_file_position = cache_file.find('/', cache_file_position);
   while (cache_file_position != std::string::npos) {
@@ -82,28 +83,33 @@
   }
 
   // Add <apk_name>.
-  std::string apk_name = ExtractAPKName(odex_file_location);
+  std::string apk_name = ExtractAPKName(file_location);
   if (apk_name.empty()) {
-    LOG(ERROR) << "Unable to determine apk name from odex file name '" << odex_file_location << "'";
+    LOG(ERROR) << "Unable to determine apk name from file name '" << file_location << "'";
     return false;
   }
   cache_file += apk_name;
-  cache_file += kCacheSuffix;
+  if (file_location.size() >= 5 &&
+      file_location.substr(file_location.size() - 5) == std::string(".vdex")) {
+    cache_file += kVdexCacheSuffix;
+  } else {
+    cache_file += kOdexCacheSuffix;
+  }
   return true;
 }
 
-// Do the overall transformation from odex_file_location to output_file_location. Prior to this the
+// Do the overall transformation from file_location to output_file_location. Prior to this the
 // output_file_location is empty.
-static bool OdexToCacheFile(std::string& odex_file_location,
-                            /*out*/std::string& output_file_location) {
-  std::string isa = ExtractISA(odex_file_location);
+static bool SystemBFileToCacheFile(const std::string& file_location,
+                                   /*out*/std::string& output_file_location) {
+  std::string isa = ExtractISA(file_location);
   if (isa.empty()) {
-    LOG(ERROR) << "Unable to determine isa for odex file '" << odex_file_location << "', skipping";
+    LOG(ERROR) << "Unable to determine isa for file '" << file_location << "', skipping";
     return false;
   }
   output_file_location += isa;
   output_file_location += '/';
-  return OdexFilenameToCacheFile(odex_file_location, output_file_location);
+  return SystemBFilenameToCacheFile(file_location, output_file_location);
 }
 
 // This program is used to determine where in the /data directory the runtime will search for an
@@ -115,9 +121,9 @@
     LOG(ERROR) << "usage: preopt2cachename preopt-location";
     return 2;
   }
-  std::string odex_file_location(argv[1]);
+  std::string file_location(argv[1]);
   std::string output_file_location(kDalvikCacheDir);
-  if (!OdexToCacheFile(odex_file_location, output_file_location)) {
+  if (!SystemBFileToCacheFile(file_location, output_file_location)) {
     return 1;
   } else {
     std::cout << output_file_location;
diff --git a/puncture_fs/puncture_fs.c b/puncture_fs/puncture_fs.c
index e9d08dc..dbb4efc 100644
--- a/puncture_fs/puncture_fs.c
+++ b/puncture_fs/puncture_fs.c
@@ -163,6 +163,10 @@
                 (int) (100.0 * starting_max / total_size));
         hole_max = get_random_num(starting_max, ending_max);
 
+	do {
+		hole_max = get_random_num(starting_max, ending_max);
+	} while (hole_max == starting_max);
+
         create_unique_file(stay_dir,
                            hole_max - starting_max,
                            file_id++,
diff --git a/simpleperf/cpu_hotplug_test.cpp b/simpleperf/cpu_hotplug_test.cpp
index 51ec677..23a6bec 100644
--- a/simpleperf/cpu_hotplug_test.cpp
+++ b/simpleperf/cpu_hotplug_test.cpp
@@ -199,13 +199,24 @@
 struct CpuToggleThreadArg {
   int toggle_cpu;
   std::atomic<bool> end_flag;
+  std::atomic<bool> cpu_hotplug_failed;
+
+  CpuToggleThreadArg(int cpu)
+      : toggle_cpu(cpu), end_flag(false), cpu_hotplug_failed(false) {
+  }
 };
 
 static void CpuToggleThread(CpuToggleThreadArg* arg) {
   while (!arg->end_flag) {
-    CHECK(SetCpuOnline(arg->toggle_cpu, true));
+    if (!SetCpuOnline(arg->toggle_cpu, true)) {
+      arg->cpu_hotplug_failed = true;
+      break;
+    }
     std::this_thread::sleep_for(cpu_hotplug_interval);
-    CHECK(SetCpuOnline(arg->toggle_cpu, false));
+    if (!SetCpuOnline(arg->toggle_cpu, false)) {
+      arg->cpu_hotplug_failed = true;
+      break;
+    }
     std::this_thread::sleep_for(cpu_hotplug_interval);
   }
 }
@@ -223,9 +234,7 @@
   if (!FindAHotpluggableCpu(&test_cpu)) {
     return;
   }
-  CpuToggleThreadArg cpu_toggle_arg;
-  cpu_toggle_arg.toggle_cpu = test_cpu;
-  cpu_toggle_arg.end_flag = false;
+  CpuToggleThreadArg cpu_toggle_arg(test_cpu);
   std::thread cpu_toggle_thread(CpuToggleThread, &cpu_toggle_arg);
 
   std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType("cpu-cycles");
@@ -240,7 +249,7 @@
   auto report_step = std::chrono::seconds(15);
   size_t iterations = 0;
 
-  while (cur_time < end_time) {
+  while (cur_time < end_time && !cpu_toggle_arg.cpu_hotplug_failed) {
     if (cur_time + report_step < std::chrono::steady_clock::now()) {
       // Report test time.
       auto diff = std::chrono::duration_cast<std::chrono::seconds>(
@@ -261,6 +270,9 @@
       GTEST_LOG_(INFO) << "Test offline while recording for " << iterations << " times.";
     }
   }
+  if (cpu_toggle_arg.cpu_hotplug_failed) {
+    GTEST_LOG_(INFO) << "Test ends because of cpu hotplug failure.";
+  }
   cpu_toggle_arg.end_flag = true;
   cpu_toggle_thread.join();
 }
@@ -278,9 +290,7 @@
   if (!FindAHotpluggableCpu(&test_cpu)) {
     return;
   }
-  CpuToggleThreadArg cpu_toggle_arg;
-  cpu_toggle_arg.toggle_cpu = test_cpu;
-  cpu_toggle_arg.end_flag = false;
+  CpuToggleThreadArg cpu_toggle_arg(test_cpu);
   std::thread cpu_toggle_thread(CpuToggleThread, &cpu_toggle_arg);
 
   std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType("cpu-cycles");
@@ -295,7 +305,7 @@
   auto report_step = std::chrono::seconds(15);
   size_t iterations = 0;
 
-  while (cur_time < end_time) {
+  while (cur_time < end_time && !cpu_toggle_arg.cpu_hotplug_failed) {
     if (cur_time + report_step < std::chrono::steady_clock::now()) {
       // Report test time.
       auto diff = std::chrono::duration_cast<std::chrono::seconds>(
@@ -319,6 +329,9 @@
       GTEST_LOG_(INFO) << "Test offline while ioctl(PERF_EVENT_IOC_ENABLE) for " << iterations << " times.";
     }
   }
+  if (cpu_toggle_arg.cpu_hotplug_failed) {
+    GTEST_LOG_(INFO) << "Test ends because of cpu hotplug failure.";
+  }
   cpu_toggle_arg.end_flag = true;
   cpu_toggle_thread.join();
 }
@@ -350,9 +363,7 @@
   if (!FindAHotpluggableCpu(&test_cpu)) {
     return;
   }
-  CpuToggleThreadArg cpu_toggle_arg;
-  cpu_toggle_arg.toggle_cpu = test_cpu;
-  cpu_toggle_arg.end_flag = false;
+  CpuToggleThreadArg cpu_toggle_arg(test_cpu);
   std::thread cpu_toggle_thread(CpuToggleThread, &cpu_toggle_arg);
 
   // Start cpu spinner.
@@ -378,7 +389,7 @@
   auto report_step = std::chrono::seconds(15);
   size_t iterations = 0;
 
-  while (cur_time < end_time) {
+  while (cur_time < end_time && !cpu_toggle_arg.cpu_hotplug_failed) {
     if (cur_time + report_step < std::chrono::steady_clock::now()) {
       auto diff = std::chrono::duration_cast<std::chrono::seconds>(
           std::chrono::steady_clock::now() - start_time);
@@ -403,13 +414,17 @@
       GTEST_LOG_(INFO) << "Test offline while user process profiling for " << iterations << " times.";
     }
   }
+  if (cpu_toggle_arg.cpu_hotplug_failed) {
+    GTEST_LOG_(INFO) << "Test ends because of cpu hotplug failure.";
+  }
   cpu_toggle_arg.end_flag = true;
   cpu_toggle_thread.join();
   cpu_spin_arg.end_flag = true;
   cpu_spin_thread.join();
   // Check if the cpu-cycle event is still available on test_cpu.
-  ASSERT_TRUE(SetCpuOnline(test_cpu, true));
-  ASSERT_TRUE(EventFd::OpenEventFile(attr, -1, test_cpu, nullptr, true) != nullptr);
+  if (SetCpuOnline(test_cpu, true)) {
+    ASSERT_TRUE(EventFd::OpenEventFile(attr, -1, test_cpu, nullptr, true) != nullptr);
+  }
 }
 
 // http://b/19863147.
@@ -433,10 +448,14 @@
   const size_t TEST_ITERATION_COUNT = 10u;
   for (size_t i = 0; i < TEST_ITERATION_COUNT; ++i) {
     int record_cpu = 0;
-    ASSERT_TRUE(SetCpuOnline(test_cpu, true));
+    if (!SetCpuOnline(test_cpu, true)) {
+      break;
+    }
     std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu, nullptr);
     ASSERT_TRUE(event_fd != nullptr);
-    ASSERT_TRUE(SetCpuOnline(test_cpu, false));
+    if (!SetCpuOnline(test_cpu, false)) {
+      break;
+    }
     event_fd = nullptr;
     event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu, nullptr);
     ASSERT_TRUE(event_fd != nullptr);
diff --git a/squashfs_utils/mksquashfsimage.sh b/squashfs_utils/mksquashfsimage.sh
index 6a2ec1c..8357415 100755
--- a/squashfs_utils/mksquashfsimage.sh
+++ b/squashfs_utils/mksquashfsimage.sh
@@ -5,7 +5,7 @@
 function usage() {
 cat<<EOT
 Usage:
-${0##*/} SRC_DIR OUTPUT_FILE [-s] [-m MOUNT_POINT] [-d PRODUCT_OUT] [-C FS_CONFIG ] [-c FILE_CONTEXTS] [-B BLOCK_MAP_FILE] [-b BLOCK_SIZE] [-z COMPRESSOR] [-zo COMPRESSOR_OPT] [-t COMPRESS_THRESHOLD] [-a]
+${0##*/} SRC_DIR OUTPUT_FILE [-s] [-m MOUNT_POINT] [-d PRODUCT_OUT] [-C FS_CONFIG ] [-c FILE_CONTEXTS] [-B BLOCK_MAP_FILE] [-b BLOCK_SIZE] [-z COMPRESSOR] [-zo COMPRESSOR_OPT] [-t COMPRESS_THRESHOLD] [-w WHITELIST_FILE] [-a]
 EOT
 }
 
@@ -85,6 +85,11 @@
     shift; shift
 fi
 
+WHITELIST_FILE=
+if [[ "$1" == "-w" ]]; then
+    WHITELIST_FILE=$2
+    shift; shift
+fi
 
 DISABLE_4K_ALIGN=false
 if [[ "$1" == "-a" ]]; then
@@ -117,6 +122,9 @@
 if [ "$DISABLE_4K_ALIGN" = true ]; then
   OPT="$OPT -disable-4k-align"
 fi
+if [ -n "$WHITELIST_FILE" ]; then
+    OPT="$OPT -whitelist $WHITELIST_FILE"
+fi
 
 MAKE_SQUASHFS_CMD="mksquashfs $SRC_DIR/ $OUTPUT_FILE -no-progress -comp $COMPRESSOR $COMPRESSOR_OPT -no-exports -noappend -no-recovery -no-fragments -no-duplicates -android-fs-config $OPT"
 echo $MAKE_SQUASHFS_CMD
diff --git a/tests/icachetest/Android.mk b/tests/icachetest/Android.mk
index 132efd3..9874ffd 100644
--- a/tests/icachetest/Android.mk
+++ b/tests/icachetest/Android.mk
@@ -2,7 +2,7 @@
 LOCAL_PATH:= $(call my-dir)
 include $(CLEAR_VARS)
 
-LOCAL_SRC_FILES:= icache_main.c icache.S icache2.S
+LOCAL_SRC_FILES:= icache_main.cpp Profiler.cpp icache.S
 
 LOCAL_SHARED_LIBRARIES := libc
 
@@ -12,4 +12,6 @@
 
 LOCAL_MODULE_TARGET_ARCH := arm
 
+LOCAL_CFLAGS += -Wall -Werror
+
 include $(BUILD_EXECUTABLE)
diff --git a/tests/icachetest/Profiler.cpp b/tests/icachetest/Profiler.cpp
new file mode 100644
index 0000000..792cf43
--- /dev/null
+++ b/tests/icachetest/Profiler.cpp
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Profiler.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <iostream>
+
+#if defined(__linux__)
+
+#include <sys/syscall.h>
+
+#ifdef __ARM_ARCH
+    enum ARMv8PmuPerfTypes{
+        // Common micro-architecture events
+        ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL    = 0x01,
+        ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS    = 0x14,
+        ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS     = 0x16,
+        ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL     = 0x17,
+        ARMV8_PMUV3_PERFCTR_L2_CACHE_WB         = 0x18,
+    };
+#endif
+
+static int perf_event_open(struct perf_event_attr* hw_event, pid_t pid,
+        int cpu, int group_fd, unsigned long flags) {
+    return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
+}
+
+#endif // __linux__
+
+namespace utils {
+
+Profiler& Profiler::get() noexcept {
+    static Profiler sProfiler;
+    return sProfiler;
+}
+
+Profiler::Profiler() noexcept {
+    std::uninitialized_fill(mCountersFd.begin(), mCountersFd.end(), -1);
+    Profiler::resetEvents(EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES);
+}
+
+Profiler::~Profiler() noexcept {
+    for (int fd : mCountersFd) {
+        if (fd >= 0) {
+            close(fd);
+        }
+    }
+}
+
+uint32_t Profiler::resetEvents(uint32_t eventMask) noexcept {
+    // close all counters
+    for (int& fd : mCountersFd) {
+        if (fd >= 0) {
+            close(fd);
+            fd = -1;
+        }
+    }
+    mEnabledEvents = 0;
+
+#if defined(__linux__)
+
+    struct perf_event_attr pe;
+    memset(&pe, 0, sizeof(struct perf_event_attr));
+    pe.type = PERF_TYPE_HARDWARE;
+    pe.size = sizeof(struct perf_event_attr);
+    pe.config = PERF_COUNT_HW_INSTRUCTIONS;
+    pe.disabled = 1;
+    pe.exclude_kernel = 1;
+    pe.exclude_hv = 1;
+    pe.read_format = PERF_FORMAT_GROUP |
+                     PERF_FORMAT_ID |
+                     PERF_FORMAT_TOTAL_TIME_ENABLED |
+                     PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+    uint8_t count = 0;
+    int fd = perf_event_open(&pe, 0, -1, -1, 0);
+    if (fd >= 0) {
+        const int groupFd = fd;
+        mIds[INSTRUCTIONS] = count++;
+        mCountersFd[INSTRUCTIONS] = fd;
+
+        pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
+
+        if (eventMask & EV_CPU_CYCLES) {
+            pe.type = PERF_TYPE_HARDWARE;
+            pe.config = PERF_COUNT_HW_CPU_CYCLES;
+            mCountersFd[CPU_CYCLES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+            if (mCountersFd[CPU_CYCLES] > 0) {
+                mIds[CPU_CYCLES] = count++;
+                mEnabledEvents |= EV_CPU_CYCLES;
+            }
+        }
+
+        if (eventMask & EV_L1D_REFS) {
+            pe.type = PERF_TYPE_HARDWARE;
+            pe.config = PERF_COUNT_HW_CACHE_REFERENCES;
+            mCountersFd[DCACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
+            if (mCountersFd[DCACHE_REFS] > 0) {
+                mIds[DCACHE_REFS] = count++;
+                mEnabledEvents |= EV_L1D_REFS;
+            }
+        }
+
+        if (eventMask & EV_L1D_MISSES) {
+            pe.type = PERF_TYPE_HARDWARE;
+            pe.config = PERF_COUNT_HW_CACHE_MISSES;
+            mCountersFd[DCACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+            if (mCountersFd[DCACHE_MISSES] > 0) {
+                mIds[DCACHE_MISSES] = count++;
+                mEnabledEvents |= EV_L1D_MISSES;
+            }
+        }
+    
+        if (eventMask & EV_BPU_REFS) {
+            pe.type = PERF_TYPE_HARDWARE;
+            pe.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+            mCountersFd[BRANCHES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+            if (mCountersFd[BRANCHES] > 0) {
+                mIds[BRANCHES] = count++;
+                mEnabledEvents |= EV_BPU_REFS;
+            }
+        }
+    
+        if (eventMask & EV_BPU_MISSES) {
+            pe.type = PERF_TYPE_HARDWARE;
+            pe.config = PERF_COUNT_HW_BRANCH_MISSES;
+            mCountersFd[BRANCH_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+            if (mCountersFd[BRANCH_MISSES] > 0) {
+                mIds[BRANCH_MISSES] = count++;
+                mEnabledEvents |= EV_BPU_MISSES;
+            }
+        }
+    
+#ifdef __ARM_ARCH
+        if (eventMask & EV_L1I_REFS) {
+            pe.type = PERF_TYPE_RAW;
+            pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS;
+            mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
+            if (mCountersFd[ICACHE_REFS] > 0) {
+                mIds[ICACHE_REFS] = count++;
+                mEnabledEvents |= EV_L1I_REFS;
+            }
+        }
+
+        if (eventMask & EV_L1I_MISSES) {
+            pe.type = PERF_TYPE_RAW;
+            pe.config = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL;
+            mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+            if (mCountersFd[ICACHE_MISSES] > 0) {
+                mIds[ICACHE_MISSES] = count++;
+                mEnabledEvents |= EV_L1I_MISSES;
+            }
+        }
+#else
+        if (eventMask & EV_L1I_REFS) {
+            pe.type = PERF_TYPE_HW_CACHE;
+            pe.config = PERF_COUNT_HW_CACHE_L1I | 
+                (PERF_COUNT_HW_CACHE_OP_READ<<8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS<<16);
+            mCountersFd[ICACHE_REFS] = perf_event_open(&pe, 0, -1, groupFd, 0);
+            if (mCountersFd[ICACHE_REFS] > 0) {
+                mIds[ICACHE_REFS] = count++;
+                mEnabledEvents |= EV_L1I_REFS;
+            }
+        }
+
+        if (eventMask & EV_L1I_MISSES) {
+            pe.type = PERF_TYPE_HW_CACHE;
+            pe.config = PERF_COUNT_HW_CACHE_L1I | 
+                (PERF_COUNT_HW_CACHE_OP_READ<<8) | (PERF_COUNT_HW_CACHE_RESULT_MISS<<16);
+            mCountersFd[ICACHE_MISSES] = perf_event_open(&pe, 0, -1, groupFd, 0);
+            if (mCountersFd[ICACHE_MISSES] > 0) {
+                mIds[ICACHE_MISSES] = count++;
+                mEnabledEvents |= EV_L1I_MISSES;
+            }
+        }
+#endif
+    }
+#endif // __linux__
+    return mEnabledEvents;
+}
+
+} // namespace utils
diff --git a/tests/icachetest/Profiler.h b/tests/icachetest/Profiler.h
new file mode 100644
index 0000000..a36cab3
--- /dev/null
+++ b/tests/icachetest/Profiler.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TNT_UTILS_PROFILER_H
+#define TNT_UTILS_PROFILER_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <array>
+#include <chrono>
+
+#if defined(__linux__)
+#   include <unistd.h>
+#   include <sys/ioctl.h>
+#   include <linux/perf_event.h>
+#endif
+
+namespace utils {
+
+class Profiler {
+    enum {
+        INSTRUCTIONS    = 0,   // must be zero
+        CPU_CYCLES      = 1,
+        DCACHE_REFS     = 2,
+        DCACHE_MISSES   = 3,
+        BRANCHES        = 4,
+        BRANCH_MISSES   = 5,
+        ICACHE_REFS     = 6,
+        ICACHE_MISSES   = 7,
+
+        // Must be last one
+        EVENT_COUNT
+    };
+
+public:
+
+    enum {
+        EV_CPU_CYCLES = 1 << CPU_CYCLES,
+        EV_L1D_REFS   = 1 << DCACHE_REFS,
+        EV_L1D_MISSES = 1 << DCACHE_MISSES,
+        EV_BPU_REFS   = 1 << BRANCHES,
+        EV_BPU_MISSES = 1 << BRANCH_MISSES,
+        EV_L1I_REFS   = 1 << ICACHE_REFS,
+        EV_L1I_MISSES = 1 << ICACHE_MISSES,
+        // helpers
+        EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES,
+        EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES,
+        EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES,
+    };
+
+    static Profiler& get() noexcept;
+
+
+    Profiler(const Profiler& rhs) = delete;
+    Profiler(Profiler&& rhs) = delete;
+    Profiler& operator=(const Profiler& rhs) = delete;
+    Profiler& operator=(Profiler&& rhs) = delete;
+
+    // selects which events are enabled. 
+    // By Default: EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES
+    uint32_t resetEvents(uint32_t eventMask) noexcept;
+
+    uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; }
+
+    // could return false if performance counters are not supported/enabled
+    bool isValid() const { return mCountersFd[0] >= 0; }
+
+    class Counters {
+        friend class Profiler;
+        uint64_t nr;
+        uint64_t time_enabled;
+        uint64_t time_running;
+        struct {
+            uint64_t value;
+            uint64_t id;
+        } counters[Profiler::EVENT_COUNT];
+
+        friend Counters operator-(Counters lhs, const Counters& rhs) noexcept {
+            lhs.nr -= rhs.nr;
+            lhs.time_enabled -= rhs.time_enabled;
+            lhs.time_running -= rhs.time_running;
+            for (size_t i=0 ; i<EVENT_COUNT ; ++i) {
+                lhs.counters[i].value -= rhs.counters[i].value;
+            }
+            return lhs;
+        }
+
+    public:
+        uint64_t getInstructions() const        { return counters[INSTRUCTIONS].value; }
+        uint64_t getCpuCycles() const           { return counters[CPU_CYCLES].value; }
+        uint64_t getL1DReferences() const       { return counters[DCACHE_REFS].value; }
+        uint64_t getL1DMisses() const           { return counters[DCACHE_MISSES].value; }
+        uint64_t getL1IReferences() const       { return counters[ICACHE_REFS].value; }
+        uint64_t getL1IMisses() const           { return counters[ICACHE_MISSES].value; }
+        uint64_t getBranchInstructions() const  { return counters[BRANCHES].value; }
+        uint64_t getBranchMisses() const        { return counters[BRANCH_MISSES].value; }
+
+        std::chrono::duration<uint64_t, std::nano> getWallTime() const {
+            return std::chrono::duration<uint64_t, std::nano>(time_enabled);
+        }
+
+        std::chrono::duration<uint64_t, std::nano> getRunningTime() const {
+            return std::chrono::duration<uint64_t, std::nano>(time_running);
+        }
+
+        double getIPC() const noexcept {
+            uint64_t cpuCycles = getCpuCycles();
+            uint64_t instructions = getInstructions();
+            return double(instructions) / double(cpuCycles);
+        }
+
+        double getCPI() const noexcept {
+            uint64_t cpuCycles = getCpuCycles();
+            uint64_t instructions = getInstructions();
+            return double(cpuCycles) / double(instructions);
+        }
+
+        double getL1DMissRate() const noexcept {
+            uint64_t cacheReferences = getL1DReferences();
+            uint64_t cacheMisses = getL1DMisses();
+            return double(cacheMisses) / double(cacheReferences);
+        }
+
+        double getL1DHitRate() const noexcept {
+            return 1.0 - getL1DMissRate();
+        }
+
+        double getL1IMissRate() const noexcept {
+            uint64_t cacheReferences = getL1IReferences();
+            uint64_t cacheMisses = getL1IMisses();
+            return double(cacheMisses) / double(cacheReferences);
+        }
+
+        double getL1IHitRate() const noexcept {
+            return 1.0 - getL1IMissRate();
+        }
+
+        double getBranchMissRate() const noexcept {
+            uint64_t branchReferences = getBranchInstructions();
+            uint64_t branchMisses = getBranchMisses();
+            return double(branchMisses) / double(branchReferences);
+        }
+
+        double getBranchHitRate() const noexcept {
+            return 1.0 - getBranchMissRate();
+        }
+
+        double getMPKI(uint64_t misses) const noexcept {
+            return (misses * 1000.0) / getInstructions();
+        }
+
+    };
+
+#if defined(__linux__)
+
+    void reset() noexcept {
+        int fd = mCountersFd[0];
+        ioctl(fd, PERF_EVENT_IOC_RESET,  PERF_IOC_FLAG_GROUP);
+    }
+
+    void start() noexcept {
+        int fd = mCountersFd[0];
+        ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
+    }
+
+    void stop() noexcept {
+        int fd = mCountersFd[0];
+        ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
+    }
+
+    void readCounters(Counters* outCounters) noexcept {
+        Counters counters;
+        ssize_t n = read(mCountersFd[0], &counters, sizeof(Counters));
+        memset(outCounters, 0, sizeof(Counters));
+        if (n > 0) {
+            outCounters->nr = counters.nr;
+            outCounters->time_enabled = counters.time_enabled;
+            outCounters->time_running = counters.time_running;
+            for (size_t i=0 ; i<size_t(EVENT_COUNT) ; i++) {
+                if (mCountersFd[i] >= 0) {
+                    outCounters->counters[i] = counters.counters[mIds[i]];
+                }
+            }
+        }
+    }
+
+#else // !__linux__
+
+    void reset() noexcept { }
+    void start() noexcept { }
+    void stop() noexcept { }
+    void readCounters(Counters* counters) noexcept { }
+
+#endif // __linux__
+
+    bool hasBranchRates() const noexcept {
+        return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0);
+    }
+
+    bool hasICacheRates() const noexcept {
+        return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0);
+    }
+
+private:
+    Profiler() noexcept;
+    ~Profiler() noexcept;
+
+    std::array<uint8_t, EVENT_COUNT> mIds;
+    std::array<int, EVENT_COUNT> mCountersFd;
+    uint32_t mEnabledEvents = 0;
+};
+
+} // namespace utils
+
+#endif // TNT_UTILS_PROFILER_H
diff --git a/tests/icachetest/icache.S b/tests/icachetest/icache.S
index fbe8fa7..e82895d 100644
--- a/tests/icachetest/icache.S
+++ b/tests/icachetest/icache.S
@@ -19,6 +19,14 @@
         mov     r0, r0                 ; \
         mov     r0, r0                 ; \
         mov     r0, r0                 ; \
+        mov     r0, r0                 ; \
+        mov     r0, r0                 ; \
+        mov     r0, r0                 ; \
+        mov     r0, r0                 ; \
+        mov     r0, r0                 ; \
+        mov     r0, r0                 ; \
+        mov     r0, r0                 ; \
+        mov     r0, r0                 ; \
         beq     end_loop               ; \
         mov     r0, r0                 ; \
 
@@ -37,6 +45,14 @@
         mov     r0, r0
         mov     r0, r0
         mov     r0, r0
+        mov     r0, r0
+        mov     r0, r0
+        mov     r0, r0
+        mov     r0, r0
+        mov     r0, r0
+        mov     r0, r0
+        mov     r0, r0
+        mov     r0, r0
 
 end_loop:
 		subs      r0, r0, r1
diff --git a/tests/icachetest/icache2.S b/tests/icachetest/icache2.S
deleted file mode 100644
index 2a204ce..0000000
--- a/tests/icachetest/icache2.S
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- *  icache.s
- *  
- *
- *  Copyright 2005 The Android Open Source Project
- *
- */
-
-    .text
-    .align
-    
-    .global icache_test2
-    .type icache_test2, %function
-
-#define LOOP                             \
-        mov     r0, r0                 ; \
-        mov     r0, r0                 ; \
-        mov     r0, r0                 ; \
-        mov     r0, r0                 ; \
-        mov     r0, r0                 ; \
-        mov     r0, r0                 ; \
-        mov     r0, r0                 ; \
-        mov     r0, r0                 ;
-
-
-    /*
-     * r0 = loop_count
-     * r1 = step
-     * r2 = mask
-     */
-
-icache_test2:
-end_loop:
-        
-        /* each loop iteration is one cache line 
-           repeat this block 2048 times... */
-
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-        LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP LOOP 
-    
-        subs    r0, r0, #1
-        bgt     end_loop
-        bx      lr
-
-        
diff --git a/tests/icachetest/icache_main.c b/tests/icachetest/icache_main.c
deleted file mode 100644
index 93f36d4..0000000
--- a/tests/icachetest/icache_main.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <stdio.h>
-#include <sys/time.h>
-
-extern void icache_test(long count, long step);
-extern void icache_test2(long count);
-
-int main() 
-{
-    printf("[bytes]\t[us]\n");
-
-    struct timeval now, tm;
-    long long t;
-    long MBs;
-    long i;
-    long step = 32;
-    for (i=0 ; step<=2048 ; i++, step+=32) 
-    {
-        long value;
-        gettimeofday(&now, 0);
-        icache_test(0x800000L, step);
-        gettimeofday(&tm, 0);
-        t = (tm.tv_sec*1000000LL+tm.tv_usec) - (now.tv_sec*1000000LL+now.tv_usec);
-        printf("%6ld\t%lld\n", step*32, t);
-    }
-
-    gettimeofday(&now, 0);
-    icache_test2(0x800000L / 2048);
-    gettimeofday(&tm, 0);
-    t = (tm.tv_sec*1000000LL+tm.tv_usec) - (now.tv_sec*1000000LL+now.tv_usec);
-    MBs = (8388608LL*32*1000000) / (t * (1024*1024));
-    printf("\n%6lld us\t%ld MB/s\n", t, MBs);
-    
-    return 0;
-}
diff --git a/tests/icachetest/icache_main.cpp b/tests/icachetest/icache_main.cpp
new file mode 100644
index 0000000..d5aeda0
--- /dev/null
+++ b/tests/icachetest/icache_main.cpp
@@ -0,0 +1,124 @@
+#include <stdio.h>
+#include <sys/time.h>
+#include <getopt.h>
+
+#include <thread>
+#include <iostream>
+#include <iomanip>
+
+#include <sched.h>
+
+#include "Profiler.h"
+
+extern "C" void icache_test(long count, long step);
+
+static constexpr size_t MAX_CODE_SIZE = 128*1024;
+static constexpr size_t CACHE_LINE_SIZE = 64;
+static constexpr size_t MAX_ITERATIONS_COUNT = MAX_CODE_SIZE / CACHE_LINE_SIZE;
+static constexpr size_t REPETITIONS = 0x800000L;
+
+
+using namespace utils;
+
+static cpu_set_t g_cpu_set;
+
+static void printUsage(char* name) {
+    std::string exec_name(name);
+    std::string usage(
+            "ICACHE is a command-line tool for testing the L1 instruction cache performance.\n"
+            "(Make sure security.perf_harden is set to 0)\n\n"
+            "Usages:\n"
+            "    ICACHE [options]\n"
+            "\n"
+            "Options:\n"
+            "   --help, -h\n"
+            "       print this message\n\n"
+            "   --affinity=N, -a N\n"
+            "       Specify which CPU the test should run on.\n\n"
+    );
+    const std::string from("ICACHE");
+    for (size_t pos = usage.find(from); pos != std::string::npos; pos = usage.find(from, pos)) {
+         usage.replace(pos, from.length(), exec_name);
+    }
+    printf("%s", usage.c_str());
+}
+
+static int handleCommandLineArgments(int argc, char* argv[]) {
+    static constexpr const char* OPTSTR = "ha:";
+    static const struct option OPTIONS[] = {
+            { "help",                 no_argument, 0, 'h' },
+            { "affinity",       required_argument, 0, 'a' },
+            { 0, 0, 0, 0 }  // termination of the option list
+    };
+    int opt;
+    int option_index = 0;
+    while ((opt = getopt_long(argc, argv, OPTSTR, OPTIONS, &option_index)) >= 0) {
+        std::string arg(optarg ? optarg : "");
+        switch (opt) {
+            default:
+            case 'h':
+                printUsage(argv[0]);
+                exit(0);
+                break;
+            case 'a':
+                size_t cpu = std::stoi(arg);
+                if (cpu < std::thread::hardware_concurrency()) {
+                    CPU_SET(cpu, &g_cpu_set);
+                } else {
+                    std::cerr << "N must be < " << std::thread::hardware_concurrency() << std::endl;
+                    exit(0);
+                }
+                break;
+        }
+    }
+    return optind;
+}
+
+int main(int argc, char* argv[]) {
+    CPU_ZERO(&g_cpu_set);
+
+    [[maybe_unused]] int option_index = handleCommandLineArgments(argc, argv);
+    [[maybe_unused]] int num_args = argc - option_index;
+
+    if (CPU_COUNT(&g_cpu_set)) {
+        sched_setaffinity(gettid(), sizeof(g_cpu_set), &g_cpu_set);
+    }
+
+    Profiler& profiler = Profiler::get();
+    profiler.resetEvents(Profiler::EV_CPU_CYCLES | Profiler::EV_L1I_RATES);
+
+    if (!profiler.isValid()) {
+        fprintf(stderr, "performance counters not enabled. try \"setprop security.perf_harden 0\"\n");
+        exit(0);
+    }
+
+    size_t const stepInBytes = 1024;    // 1 KiB steps
+    size_t const step = stepInBytes / CACHE_LINE_SIZE;
+
+    std::cout << std::fixed << std::setprecision(2);
+
+    printf("[KiB]\t[cyc]\t[refs]\t[MPKI]\t[ns]\n");
+
+    Profiler::Counters counters;
+
+    for (size_t i=step ; i <= MAX_ITERATIONS_COUNT ; i += step) {
+        profiler.reset();
+
+        auto now = std::chrono::steady_clock::now();
+        profiler.start();
+        icache_test(REPETITIONS, i);
+        profiler.stop();
+        auto duration = std::chrono::steady_clock::now() - now;
+
+        profiler.readCounters(&counters);
+
+        std::cout << ((i*CACHE_LINE_SIZE)/1024) << "\t"
+            << counters.getCpuCycles()/double(REPETITIONS) << "\t"
+            << counters.getL1IReferences()/double(REPETITIONS) << "\t"
+            << counters.getMPKI(counters.getL1IMisses()) << "\t"
+            << duration.count()/double(REPETITIONS) << "\t"
+            << std::endl;
+    }
+
+    return 0;
+}
diff --git a/tests/sdcard/sdcard_perf_test.cpp b/tests/sdcard/sdcard_perf_test.cpp
index c93c52b..7efa650 100644
--- a/tests/sdcard/sdcard_perf_test.cpp
+++ b/tests/sdcard/sdcard_perf_test.cpp
@@ -132,7 +132,7 @@
            "  -s --size:        Size in kbytes of the data.\n"
            "  -S --chunk-size:  Size of a chunk. Default to size ie 1 chunk.\n"
            "                    Data will be written/read using that chunk size.\n"
-           "  -D --depth:       Depth of directory tree to create for traversal.\n",
+           "  -D --depth:       Depth of directory tree to create for traversal.\n"
            "  -i --iterations:  Number of time a process should carry its task.\n"
            "  -p --procnb:      Number of processes to use.\n"
            "  -d --dump:        Print the raw timing on stdout.\n"
diff --git a/tests/workloads/pwrsummary.sh b/tests/workloads/pwrsummary.sh
index 3d3aeb8..c527b54 100755
--- a/tests/workloads/pwrsummary.sh
+++ b/tests/workloads/pwrsummary.sh
@@ -99,7 +99,7 @@
 	# Number Slow bitmap uploads: 12
 	# Number Slow draw: 89
 	# use with "stdbuf -o0 " to disable pipe buffering
-	# stdbuf -o0 adb shell /data/hwuitest shadowgrid2 400 | stdbuf -o0 ./hwuitestfilter.sh  | tee t.csv
+	# stdbuf -o0 adb shell /data/local/tmp/hwuimacro shadowgrid2 400 | stdbuf -o0 ./hwuitestfilter.sh  | tee t.csv
 	sed -e 's/ns//' -e 's/[\(\)%]/ /g' | awk '
 	BEGIN { startTime=0; lastTime=0; }
 	/^Stats since:/ {
diff --git a/tests/workloads/pwrtest.sh b/tests/workloads/pwrtest.sh
index 39f7b11..fd5d825 100755
--- a/tests/workloads/pwrtest.sh
+++ b/tests/workloads/pwrtest.sh
@@ -94,16 +94,16 @@
 
 case $DEVICE in
 (shamu|hammerhead)
-	HWUITEST=hwuitest
+	HWUIMACRO=hwuimacro
 	onSwipe="700 1847 700 400 50"
 	;;
 (*)
-	HWUITEST=hwuitest64
+	HWUIMACRO=hwuimacro64
 	onSwipe="500 1200 500 550 150"
 	;;
 esac
 
-scripts="defs.sh systemapps.sh recentfling.sh youtube.sh chromefling.sh $HWUITEST"
+scripts="defs.sh systemapps.sh recentfling.sh youtube.sh chromefling.sh"
 
 if ! $MONSOON >/dev/null 2>&1; then
 	echo $MONSOON must be in your PATH >&2
@@ -253,6 +253,7 @@
 
 echo Copying $scripts to device $devdir...
 copy_files
+adb shell ln -s /data/benchmarktest/hwuimacro/$HWUIMACRO $devdir/$HWUIMACRO
 tests=""
 
 # measure background power
@@ -332,9 +333,9 @@
 if [ $shadowgrid2Time -gt 0 ]; then
 	airplane_mode on
 	echo $(date) Test 4 : shadowgrid2 for $shadowgrid2Time minutes
-	start_job "./$HWUITEST shadowgrid2 100000"
+	start_job "./$HWUIMACRO --onscreen shadowgrid2 100000"
 	run_test shadowgrid2 $shadowgrid2Time
-	cleanup_job shadowgrid2 $HWUITEST
+	cleanup_job shadowgrid2 $HWUIMACRO
 	airplane_mode off
 	date
 	tests="$tests shadowgrid2"
