Merge "simpleperf: Use cross-arch unwinding."
diff --git a/simpleperf/OfflineUnwinder.cpp b/simpleperf/OfflineUnwinder.cpp
index cd53744..5b147de 100644
--- a/simpleperf/OfflineUnwinder.cpp
+++ b/simpleperf/OfflineUnwinder.cpp
@@ -16,94 +16,102 @@
 
 #include "OfflineUnwinder.h"
 
-#include <ucontext.h>
-
 #include <android-base/logging.h>
 #include <backtrace/Backtrace.h>
+#include <unwindstack/MachineArm.h>
+#include <unwindstack/MachineArm64.h>
+#include <unwindstack/MachineX86.h>
+#include <unwindstack/MachineX86_64.h>
+#include <unwindstack/Regs.h>
+#include <unwindstack/RegsArm.h>
+#include <unwindstack/RegsArm64.h>
+#include <unwindstack/RegsX86.h>
+#include <unwindstack/RegsX86_64.h>
+#include <unwindstack/UserArm.h>
+#include <unwindstack/UserArm64.h>
+#include <unwindstack/UserX86.h>
+#include <unwindstack/UserX86_64.h>
 
 #include "environment.h"
+#include "perf_regs.h"
 #include "read_apk.h"
 #include "thread_tree.h"
 
 namespace simpleperf {
 
-#define SetUContextReg(dst, perf_regno)          \
-  do {                                           \
-    uint64_t value;                              \
-    if (GetRegValue(regs, perf_regno, &value)) { \
-      (dst) = value;                             \
-    }                                            \
-  } while (0)
-
-static ucontext_t BuildUContextFromRegs(const RegSet& regs __attribute__((unused))) {
-  ucontext_t ucontext;
-  memset(&ucontext, 0, sizeof(ucontext));
-#if defined(__i386__)
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_GS], PERF_REG_X86_GS);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_FS], PERF_REG_X86_FS);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_ES], PERF_REG_X86_ES);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_DS], PERF_REG_X86_DS);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EAX], PERF_REG_X86_AX);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EBX], PERF_REG_X86_BX);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_ECX], PERF_REG_X86_CX);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EDX], PERF_REG_X86_DX);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_ESI], PERF_REG_X86_SI);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EDI], PERF_REG_X86_DI);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EBP], PERF_REG_X86_BP);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EIP], PERF_REG_X86_IP);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_ESP], PERF_REG_X86_SP);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_CS], PERF_REG_X86_CS);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_EFL], PERF_REG_X86_FLAGS);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_SS], PERF_REG_X86_SS);
-#elif defined(__x86_64__)
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R8], PERF_REG_X86_R8);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R9], PERF_REG_X86_R9);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R10], PERF_REG_X86_R10);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R11], PERF_REG_X86_R11);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R12], PERF_REG_X86_R12);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R13], PERF_REG_X86_R13);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R14], PERF_REG_X86_R14);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_R15], PERF_REG_X86_R15);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RDI], PERF_REG_X86_DI);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RSI], PERF_REG_X86_SI);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RBP], PERF_REG_X86_BP);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RBX], PERF_REG_X86_BX);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RDX], PERF_REG_X86_DX);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RAX], PERF_REG_X86_AX);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RCX], PERF_REG_X86_CX);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RSP], PERF_REG_X86_SP);
-  SetUContextReg(ucontext.uc_mcontext.gregs[REG_RIP], PERF_REG_X86_IP);
-#elif defined(__aarch64__)
-  for (size_t i = PERF_REG_ARM64_X0; i < PERF_REG_ARM64_MAX; ++i) {
-    SetUContextReg(ucontext.uc_mcontext.regs[i], i);
+static unwindstack::Regs* GetBacktraceRegs(const RegSet& regs) {
+  switch (regs.arch) {
+    case ARCH_ARM: {
+      unwindstack::arm_user_regs arm_user_regs;
+      memset(&arm_user_regs, 0, sizeof(arm_user_regs));
+      static_assert(
+          static_cast<int>(unwindstack::ARM_REG_R0) == static_cast<int>(PERF_REG_ARM_R0), "");
+      static_assert(
+          static_cast<int>(unwindstack::ARM_REG_LAST) == static_cast<int>(PERF_REG_ARM_MAX), "");
+      for (size_t i = unwindstack::ARM_REG_R0; i < unwindstack::ARM_REG_LAST; ++i) {
+        arm_user_regs.regs[i] = static_cast<uint32_t>(regs.data[i]);
+      }
+      return unwindstack::RegsArm::Read(&arm_user_regs);
+    }
+    case ARCH_ARM64: {
+      unwindstack::arm64_user_regs arm64_user_regs;
+      memset(&arm64_user_regs, 0, sizeof(arm64_user_regs));
+      static_assert(
+          static_cast<int>(unwindstack::ARM64_REG_R0) == static_cast<int>(PERF_REG_ARM64_X0), "");
+      static_assert(
+          static_cast<int>(unwindstack::ARM64_REG_R30) == static_cast<int>(PERF_REG_ARM64_LR), "");
+      memcpy(&arm64_user_regs.regs[unwindstack::ARM64_REG_R0], &regs.data[PERF_REG_ARM64_X0],
+             sizeof(uint64_t) * (PERF_REG_ARM64_LR - PERF_REG_ARM64_X0 + 1));
+      arm64_user_regs.sp = regs.data[PERF_REG_ARM64_SP];
+      arm64_user_regs.pc = regs.data[PERF_REG_ARM64_PC];
+      return unwindstack::RegsArm64::Read(&arm64_user_regs);
+    }
+    case ARCH_X86_32: {
+      unwindstack::x86_user_regs x86_user_regs;
+      memset(&x86_user_regs, 0, sizeof(x86_user_regs));
+      x86_user_regs.eax = static_cast<uint32_t>(regs.data[PERF_REG_X86_AX]);
+      x86_user_regs.ebx = static_cast<uint32_t>(regs.data[PERF_REG_X86_BX]);
+      x86_user_regs.ecx = static_cast<uint32_t>(regs.data[PERF_REG_X86_CX]);
+      x86_user_regs.edx = static_cast<uint32_t>(regs.data[PERF_REG_X86_DX]);
+      x86_user_regs.ebp = static_cast<uint32_t>(regs.data[PERF_REG_X86_BP]);
+      x86_user_regs.edi = static_cast<uint32_t>(regs.data[PERF_REG_X86_DI]);
+      x86_user_regs.esi = static_cast<uint32_t>(regs.data[PERF_REG_X86_SI]);
+      x86_user_regs.esp = static_cast<uint32_t>(regs.data[PERF_REG_X86_SP]);
+      x86_user_regs.eip = static_cast<uint32_t>(regs.data[PERF_REG_X86_IP]);
+      return unwindstack::RegsX86::Read(&x86_user_regs);
+    }
+    case ARCH_X86_64: {
+      unwindstack::x86_64_user_regs x86_64_user_regs;
+      memset(&x86_64_user_regs, 0, sizeof(x86_64_user_regs));
+      x86_64_user_regs.rax = regs.data[PERF_REG_X86_AX];
+      x86_64_user_regs.rbx = regs.data[PERF_REG_X86_BX];
+      x86_64_user_regs.rcx = regs.data[PERF_REG_X86_CX];
+      x86_64_user_regs.rdx = regs.data[PERF_REG_X86_DX];
+      x86_64_user_regs.r8 = regs.data[PERF_REG_X86_R8];
+      x86_64_user_regs.r9 = regs.data[PERF_REG_X86_R9];
+      x86_64_user_regs.r10 = regs.data[PERF_REG_X86_R10];
+      x86_64_user_regs.r11 = regs.data[PERF_REG_X86_R11];
+      x86_64_user_regs.r12 = regs.data[PERF_REG_X86_R12];
+      x86_64_user_regs.r13 = regs.data[PERF_REG_X86_R13];
+      x86_64_user_regs.r14 = regs.data[PERF_REG_X86_R14];
+      x86_64_user_regs.r15 = regs.data[PERF_REG_X86_R15];
+      x86_64_user_regs.rdi = regs.data[PERF_REG_X86_DI];
+      x86_64_user_regs.rsi = regs.data[PERF_REG_X86_SI];
+      x86_64_user_regs.rbp = regs.data[PERF_REG_X86_BP];
+      x86_64_user_regs.rsp = regs.data[PERF_REG_X86_SP];
+      x86_64_user_regs.rip = regs.data[PERF_REG_X86_IP];
+      return unwindstack::RegsX86_64::Read(&x86_64_user_regs);
+    }
+    default:
+      return nullptr;
   }
-#elif defined(__arm__)
-  SetUContextReg(ucontext.uc_mcontext.arm_r0, PERF_REG_ARM_R0);
-  SetUContextReg(ucontext.uc_mcontext.arm_r1, PERF_REG_ARM_R1);
-  SetUContextReg(ucontext.uc_mcontext.arm_r2, PERF_REG_ARM_R2);
-  SetUContextReg(ucontext.uc_mcontext.arm_r3, PERF_REG_ARM_R3);
-  SetUContextReg(ucontext.uc_mcontext.arm_r4, PERF_REG_ARM_R4);
-  SetUContextReg(ucontext.uc_mcontext.arm_r5, PERF_REG_ARM_R5);
-  SetUContextReg(ucontext.uc_mcontext.arm_r6, PERF_REG_ARM_R6);
-  SetUContextReg(ucontext.uc_mcontext.arm_r7, PERF_REG_ARM_R7);
-  SetUContextReg(ucontext.uc_mcontext.arm_r8, PERF_REG_ARM_R8);
-  SetUContextReg(ucontext.uc_mcontext.arm_r9, PERF_REG_ARM_R9);
-  SetUContextReg(ucontext.uc_mcontext.arm_r10, PERF_REG_ARM_R10);
-  SetUContextReg(ucontext.uc_mcontext.arm_fp, PERF_REG_ARM_FP);
-  SetUContextReg(ucontext.uc_mcontext.arm_ip, PERF_REG_ARM_IP);
-  SetUContextReg(ucontext.uc_mcontext.arm_sp, PERF_REG_ARM_SP);
-  SetUContextReg(ucontext.uc_mcontext.arm_lr, PERF_REG_ARM_LR);
-  SetUContextReg(ucontext.uc_mcontext.arm_pc, PERF_REG_ARM_PC);
-#endif
-  return ucontext;
 }
 
-OfflineUnwinder::OfflineUnwinder(bool strict_arch_check, bool collect_stat)
-    : strict_arch_check_(strict_arch_check), collect_stat_(collect_stat) {
+OfflineUnwinder::OfflineUnwinder(bool collect_stat) : collect_stat_(collect_stat) {
   Backtrace::SetGlobalElfCache(true);
 }
 
-bool OfflineUnwinder::UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs,
+bool OfflineUnwinder::UnwindCallChain(const ThreadEntry& thread, const RegSet& regs,
                                       const char* stack, size_t stack_size,
                                       std::vector<uint64_t>* ips, std::vector<uint64_t>* sps) {
   uint64_t start_time;
@@ -111,33 +119,11 @@
     start_time = GetSystemClock();
   }
   std::vector<uint64_t> result;
-  ArchType arch = (abi != PERF_SAMPLE_REGS_ABI_32) ?
-                      ScopedCurrentArch::GetCurrentArch() :
-                      ScopedCurrentArch::GetCurrentArch32();
-  if (!IsArchTheSame(arch, GetBuildArch(), strict_arch_check_)) {
-    LOG(ERROR) << "simpleperf is built in arch " << GetArchString(GetBuildArch())
-                << ", and can't do stack unwinding for arch " << GetArchString(arch);
-    return false;
-  }
   uint64_t sp_reg_value;
-  if (!GetSpRegValue(regs, arch, &sp_reg_value)) {
+  if (!regs.GetSpRegValue(&sp_reg_value)) {
     LOG(ERROR) << "can't get sp reg value";
     return false;
   }
-  if (arch != GetBuildArch()) {
-    uint64_t ip_reg_value;
-    if (!GetIpRegValue(regs, arch, &ip_reg_value)) {
-      LOG(ERROR) << "can't get ip reg value";
-      return false;
-    }
-    ips->push_back(ip_reg_value);
-    sps->push_back(sp_reg_value);
-    if (collect_stat_) {
-      unwinding_result_.used_time = GetSystemClock() - start_time;
-      unwinding_result_.stop_reason = UnwindingResult::DIFFERENT_ARCH;
-    }
-    return true;
-  }
   uint64_t stack_addr = sp_reg_value;
 
   std::vector<backtrace_map_t> bt_maps(thread.maps->size());
@@ -170,43 +156,38 @@
   stack_info.end = stack_addr + stack_size;
   stack_info.data = reinterpret_cast<const uint8_t*>(stack);
 
-  Backtrace::ArchEnum backtrace_arch;
-  if (arch == ARCH_ARM) {
-    backtrace_arch = Backtrace::ARCH_ARM;
-  } else if (arch == ARCH_ARM64) {
-    backtrace_arch = Backtrace::ARCH_ARM64;
-  } else if (arch == ARCH_X86_32) {
-    backtrace_arch = Backtrace::ARCH_X86;
-  } else if (arch == ARCH_X86_64) {
-    backtrace_arch = Backtrace::ARCH_X86_64;
-  } else {
-    abort();
+  std::unique_ptr<BacktraceMap> map(BacktraceMap::CreateOffline(thread.pid, bt_maps, stack_info));
+  std::unique_ptr<unwindstack::Regs> unwind_regs(GetBacktraceRegs(regs));
+  if (!map || !unwind_regs) {
+    return false;
   }
-  std::unique_ptr<Backtrace> backtrace(
-      Backtrace::CreateOffline(backtrace_arch, thread.pid, thread.tid, bt_maps, stack_info));
-  ucontext_t ucontext = BuildUContextFromRegs(regs);
-  if (backtrace->Unwind(0, &ucontext)) {
-    for (auto it = backtrace->begin(); it != backtrace->end(); ++it) {
+  std::vector<backtrace_frame_data_t> frames;
+  BacktraceUnwindError error;
+  if (Backtrace::Unwind(unwind_regs.get(), map.get(), &frames, 0u, nullptr, &error)) {
+    for (auto& frame : frames) {
       // Unwinding in arm architecture can return 0 pc address.
-      if (it->pc == 0) {
+      if (frame.pc == 0) {
         break;
       }
-      ips->push_back(it->pc);
-      sps->push_back(it->sp);
+      ips->push_back(frame.pc);
+      sps->push_back(frame.sp);
     }
   }
+
+  uint64_t ip_reg_value;
+  if (!regs.GetIpRegValue(&ip_reg_value)) {
+    LOG(ERROR) << "can't get ip reg value";
+    return false;
+  }
   if (ips->empty()) {
-    uint64_t ip_reg_value;
-    if (!GetIpRegValue(regs, arch, &ip_reg_value)) {
-      LOG(ERROR) << "can't get ip reg value";
-      return false;
-    }
     ips->push_back(ip_reg_value);
     sps->push_back(sp_reg_value);
+  } else {
+    // Check if the unwinder returns ip reg value as the first ip address in callstack.
+    CHECK_EQ((*ips)[0], ip_reg_value);
   }
   if (collect_stat_) {
     unwinding_result_.used_time = GetSystemClock() - start_time;
-    BacktraceUnwindError error = backtrace->GetError();
     switch (error.error_code) {
       case BACKTRACE_UNWIND_ERROR_EXCEED_MAX_FRAMES_LIMIT:
         unwinding_result_.stop_reason = UnwindingResult::EXCEED_MAX_FRAMES_LIMIT;
diff --git a/simpleperf/OfflineUnwinder.h b/simpleperf/OfflineUnwinder.h
index defe60a..dd57d7d 100644
--- a/simpleperf/OfflineUnwinder.h
+++ b/simpleperf/OfflineUnwinder.h
@@ -50,11 +50,10 @@
 
 class OfflineUnwinder {
  public:
-  OfflineUnwinder(bool strict_arch_check, bool collect_stat);
+  OfflineUnwinder(bool collect_stat);
 
-  bool UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs,
-                       const char* stack, size_t stack_size,
-                       std::vector<uint64_t>* ips, std::vector<uint64_t>* sps);
+  bool UnwindCallChain(const ThreadEntry& thread, const RegSet& regs, const char* stack,
+                       size_t stack_size, std::vector<uint64_t>* ips, std::vector<uint64_t>* sps);
 
   bool HasStat() const {
     return collect_stat_;
@@ -65,7 +64,6 @@
   }
 
  private:
-  bool strict_arch_check_ __attribute__((unused));
   bool collect_stat_;
   UnwindingResult unwinding_result_;
 };
diff --git a/simpleperf/cmd_debug_unwind.cpp b/simpleperf/cmd_debug_unwind.cpp
index cd96a75..ccb0b15 100644
--- a/simpleperf/cmd_debug_unwind.cpp
+++ b/simpleperf/cmd_debug_unwind.cpp
@@ -94,7 +94,7 @@
                ),
           input_filename_("perf.data"),
           output_filename_("perf.data.debug"),
-          offline_unwinder_(false, true),
+          offline_unwinder_(true),
           callchain_joiner_(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE, 1, true),
           selected_time_(0) {
   }
@@ -238,13 +238,11 @@
     if ((r.sample_type & need_type) == need_type && r.regs_user_data.reg_mask != 0 &&
         r.GetValidStackSize() > 0) {
       ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
-      RegSet regs = CreateRegSet(r.regs_user_data.abi, r.regs_user_data.reg_mask,
-                                 r.regs_user_data.regs);
+      RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
       std::vector<uint64_t> ips;
       std::vector<uint64_t> sps;
-      if (!offline_unwinder_.UnwindCallChain(r.regs_user_data.abi, *thread, regs,
-                                             r.stack_user_data.data, r.GetValidStackSize(),
-                                             &ips, &sps)) {
+      if (!offline_unwinder_.UnwindCallChain(*thread, regs, r.stack_user_data.data,
+                                             r.GetValidStackSize(), &ips, &sps)) {
         return false;
       }
 
diff --git a/simpleperf/cmd_debug_unwind_test.cpp b/simpleperf/cmd_debug_unwind_test.cpp
index e6d4431..8bf93d3 100644
--- a/simpleperf/cmd_debug_unwind_test.cpp
+++ b/simpleperf/cmd_debug_unwind_test.cpp
@@ -30,7 +30,6 @@
 #include "record_file.h"
 #include "test_util.h"
 
-#if defined(__aarch64__)
 static std::unique_ptr<Command> DebugUnwindCmd() {
   return CreateCommandInstance("debug-unwind");
 }
@@ -74,11 +73,8 @@
   int old_stdout_;
   std::unique_ptr<TemporaryFile> tmpfile_;
 };
-#endif  // defined(__aarch64__)
 
 TEST(cmd_debug_unwind, smoke) {
-  // TODO: Remove the arch limitation once using cross-platform unwinding in the new unwinder.
-#if defined(__aarch64__)
   std::string input_data = GetTestData(PERF_DATA_NO_UNWIND);
   CaptureStdout capture;
   TemporaryFile tmp_file;
@@ -90,14 +86,9 @@
   ASSERT_TRUE(DebugUnwindCmd()->Run({"-i", input_data, "-o", tmp_file.path, "--time",
                                      "1516379654300997"}));
   ASSERT_NE(capture.Finish().find("Unwinding sample count: 1"), std::string::npos);
-#else
-  GTEST_LOG_(INFO) << "This test does nothing on non-ARM64 devices.";
-#endif
 }
 
 TEST(cmd_debug_unwind, symfs_option) {
-  // TODO: Remove the arch limitation once using cross-platform unwinding in the new unwinder.
-#if defined(__aarch64__)
   std::string input_data = GetTestData(NATIVELIB_IN_APK_PERF_DATA);
   CaptureStdout capture;
   TemporaryFile tmp_file;
@@ -113,7 +104,4 @@
   std::unordered_map<std::string, std::string> info_map;
   ASSERT_TRUE(reader->ReadMetaInfoFeature(&info_map));
   ASSERT_EQ(info_map["debug_unwind"], "true");
-#else
-  GTEST_LOG_(INFO) << "This test does nothing on non-ARM64 devices.";
-#endif
 }
diff --git a/simpleperf/cmd_kmem.cpp b/simpleperf/cmd_kmem.cpp
index 65ad323..6ae908d 100644
--- a/simpleperf/cmd_kmem.cpp
+++ b/simpleperf/cmd_kmem.cpp
@@ -545,8 +545,7 @@
       slab_sample_tree_builder_.reset(
           new SlabSampleTreeBuilder(comparator, &thread_tree_));
       slab_sample_tree_builder_->SetCallChainSampleOptions(
-          accumulate_callchain_, print_callgraph_, !callgraph_show_callee_,
-          false);
+          accumulate_callchain_, print_callgraph_, !callgraph_show_callee_);
       sort_comparator.AddComparator(comparator);
       slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator));
       slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer));
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index 880badf..13313e0 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -366,10 +366,7 @@
     return false;
   }
   if (unwind_dwarf_callchain_) {
-    // Normally do strict arch check when unwinding stack. But allow unwinding
-    // 32-bit processes on 64-bit devices for system wide profiling.
-    bool strict_arch_check = !system_wide_collection_;
-    offline_unwinder_.reset(new OfflineUnwinder(strict_arch_check, false));
+    offline_unwinder_.reset(new OfflineUnwinder(false));
   }
   if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
     callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
@@ -1122,14 +1119,11 @@
       (r.GetValidStackSize() > 0)) {
     ThreadEntry* thread =
         thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
-    RegSet regs = CreateRegSet(r.regs_user_data.abi,
-                               r.regs_user_data.reg_mask,
-                               r.regs_user_data.regs);
+    RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
     std::vector<uint64_t> ips;
     std::vector<uint64_t> sps;
-    if (!offline_unwinder_->UnwindCallChain(r.regs_user_data.abi, *thread, regs,
-                                            r.stack_user_data.data, r.GetValidStackSize(),
-                                            &ips, &sps)) {
+    if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
+                                            r.GetValidStackSize(), &ips, &sps)) {
       return false;
     }
     r.ReplaceRegAndStackWithCallChain(ips);
diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp
index 395ae59..9aee30a 100644
--- a/simpleperf/cmd_report.cpp
+++ b/simpleperf/cmd_report.cpp
@@ -320,7 +320,6 @@
   bool accumulate_callchain;
   bool build_callchain;
   bool use_caller_as_callchain_root;
-  bool strict_unwind_arch_check;
   bool trace_offcpu;
 
   std::unique_ptr<ReportCmdSampleTreeBuilder> CreateSampleTreeBuilder() {
@@ -333,7 +332,7 @@
     builder->SetFilters(pid_filter, tid_filter, comm_filter, dso_filter, symbol_filter);
     builder->SetBranchSampleOption(use_branch_address);
     builder->SetCallChainSampleOptions(accumulate_callchain, build_callchain,
-                                       use_caller_as_callchain_root, strict_unwind_arch_check);
+                                       use_caller_as_callchain_root);
     return builder;
   }
 };
@@ -851,9 +850,6 @@
 
 bool ReportCommand::ReadSampleTreeFromRecordFile() {
   sample_tree_builder_options_.use_branch_address = use_branch_address_;
-  // Normally do strict arch check when unwinding stack. But allow unwinding
-  // 32-bit processes on 64-bit devices for system wide profiling.
-  sample_tree_builder_options_.strict_unwind_arch_check = !system_wide_collection_;
   sample_tree_builder_options_.accumulate_callchain = accumulate_callchain_;
   sample_tree_builder_options_.build_callchain = print_callgraph_;
   sample_tree_builder_options_.use_caller_as_callchain_root = !callgraph_show_callee_;
diff --git a/simpleperf/cmd_report_test.cpp b/simpleperf/cmd_report_test.cpp
index f6ab6f8..28f226d 100644
--- a/simpleperf/cmd_report_test.cpp
+++ b/simpleperf/cmd_report_test.cpp
@@ -507,19 +507,12 @@
 }
 
 TEST_F(ReportCommandTest, report_dwarf_callgraph_of_nativelib_in_apk) {
-  // NATIVELIB_IN_APK_PERF_DATA is recorded on arm64, so can only report
-  // callgraph on arm64.
-  if (GetBuildArch() == ARCH_ARM64) {
-    Report(NATIVELIB_IN_APK_PERF_DATA, {"-g"});
-    ASSERT_NE(content.find(GetUrlInApk(APK_FILE, NATIVELIB_IN_APK)),
-              std::string::npos);
-    ASSERT_NE(content.find("Func2"), std::string::npos);
-    ASSERT_NE(content.find("Func1"), std::string::npos);
-    ASSERT_NE(content.find("GlobalFunc"), std::string::npos);
-  } else {
-    GTEST_LOG_(INFO)
-        << "This test does nothing as it is only run on arm64 devices";
-  }
+  Report(NATIVELIB_IN_APK_PERF_DATA, {"-g"});
+  ASSERT_NE(content.find(GetUrlInApk(APK_FILE, NATIVELIB_IN_APK)),
+            std::string::npos);
+  ASSERT_NE(content.find("Func2"), std::string::npos);
+  ASSERT_NE(content.find("Func1"), std::string::npos);
+  ASSERT_NE(content.find("GlobalFunc"), std::string::npos);
 }
 
 TEST_F(ReportCommandTest, exclude_kernel_callchain) {
diff --git a/simpleperf/nonlinux_support/nonlinux_support.cpp b/simpleperf/nonlinux_support/nonlinux_support.cpp
index 4f53c0e..db5bed2 100644
--- a/simpleperf/nonlinux_support/nonlinux_support.cpp
+++ b/simpleperf/nonlinux_support/nonlinux_support.cpp
@@ -21,11 +21,10 @@
 #include "OfflineUnwinder.h"
 
 namespace simpleperf {
-OfflineUnwinder::OfflineUnwinder(bool strict_arch_check, bool collect_stat)
-    : strict_arch_check_(strict_arch_check), collect_stat_(collect_stat) {
+OfflineUnwinder::OfflineUnwinder(bool collect_stat) : collect_stat_(collect_stat) {
 }
 
-bool OfflineUnwinder::UnwindCallChain(int, const ThreadEntry&, const RegSet&, const char*, size_t,
+bool OfflineUnwinder::UnwindCallChain(const ThreadEntry&, const RegSet&, const char*, size_t,
                      std::vector<uint64_t>*, std::vector<uint64_t>*) {
   return false;
 }
diff --git a/simpleperf/perf_regs.cpp b/simpleperf/perf_regs.cpp
index 33c6491..e654b6b 100644
--- a/simpleperf/perf_regs.cpp
+++ b/simpleperf/perf_regs.cpp
@@ -16,6 +16,8 @@
 
 #include "perf_regs.h"
 
+#include <string.h>
+
 #include <unordered_map>
 #include <android-base/logging.h>
 #include <android-base/stringprintf.h>
@@ -77,25 +79,6 @@
   return "unknown";
 }
 
-// If strict_check, must have arch1 == arch2.
-// Otherwise, allow X86_32 with X86_64, ARM with ARM64.
-bool IsArchTheSame(ArchType arch1, ArchType arch2, bool strict_check) {
-  if (strict_check) {
-    return arch1 == arch2;
-  }
-  switch (arch1) {
-    case ARCH_X86_32:
-    case ARCH_X86_64:
-      return arch2 == ARCH_X86_32 || arch2 == ARCH_X86_64;
-    case ARCH_ARM64:
-    case ARCH_ARM:
-      return arch2 == ARCH_ARM64 || arch2 == ARCH_ARM;
-    default:
-      break;
-  }
-  return arch1 == arch2;
-}
-
 uint64_t GetSupportedRegMask(ArchType arch) {
   switch (arch) {
     case ARCH_X86_32:
@@ -167,53 +150,32 @@
   }
 }
 
-RegSet CreateRegSet(int abi, uint64_t valid_mask, const uint64_t* valid_regs) {
-  RegSet regs;
-  regs.valid_mask = valid_mask;
+RegSet::RegSet(int abi, uint64_t valid_mask, const uint64_t* valid_regs)
+    : valid_mask(valid_mask) {
+  arch = (abi == PERF_SAMPLE_REGS_ABI_32) ? ScopedCurrentArch::GetCurrentArch32()
+                                          : ScopedCurrentArch::GetCurrentArch();
+  memset(data, 0, sizeof(data));
   for (int i = 0, j = 0; i < 64; ++i) {
     if ((valid_mask >> i) & 1) {
-      regs.data[i] = valid_regs[j++];
+      data[i] = valid_regs[j++];
     }
   }
-  if (ScopedCurrentArch::GetCurrentArch() == ARCH_ARM64 &&
-      abi == PERF_SAMPLE_REGS_ABI_32) {
-    // The kernel dumps arm64 regs, but we need arm regs. So map arm64
-    // regs into arm regs.
-    regs.data[PERF_REG_ARM_PC] = regs.data[PERF_REG_ARM64_PC];
+  if (ScopedCurrentArch::GetCurrentArch() == ARCH_ARM64 && abi == PERF_SAMPLE_REGS_ABI_32) {
+    // The kernel dumps arm64 regs, but we need arm regs. So map arm64 regs into arm regs.
+    data[PERF_REG_ARM_PC] = data[PERF_REG_ARM64_PC];
   }
-  return regs;
 }
 
-void SetIpReg(ArchType arch, uint64_t ip, RegSet* regs) {
-  int regno;
-  switch (arch) {
-    case ARCH_X86_64:
-    case ARCH_X86_32:
-      regno = PERF_REG_X86_IP;
-      break;
-    case ARCH_ARM:
-      regno = PERF_REG_ARM_PC;
-      break;
-    case ARCH_ARM64:
-      regno = PERF_REG_ARM64_PC;
-      break;
-    default:
-      return;
-  }
-  regs->valid_mask |= (1ULL << regno);
-  regs->data[regno] = ip;
-}
-
-bool GetRegValue(const RegSet& regs, size_t regno, uint64_t* value) {
+bool RegSet::GetRegValue(size_t regno, uint64_t* value) const {
   CHECK_LT(regno, 64U);
-  if ((regs.valid_mask >> regno) & 1) {
-    *value = regs.data[regno];
+  if ((valid_mask >> regno) & 1) {
+    *value = data[regno];
     return true;
   }
   return false;
 }
 
-bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value) {
+bool RegSet::GetSpRegValue(uint64_t* value) const {
   size_t regno;
   switch (arch) {
     case ARCH_X86_32:
@@ -231,10 +193,10 @@
     default:
       return false;
   }
-  return GetRegValue(regs, regno, value);
+  return GetRegValue(regno, value);
 }
 
-bool GetIpRegValue(const RegSet& regs, ArchType arch, uint64_t* value) {
+bool RegSet::GetIpRegValue(uint64_t* value) const {
   size_t regno;
   switch (arch) {
     case ARCH_X86_64:
@@ -250,5 +212,5 @@
     default:
       return false;
   }
-  return GetRegValue(regs, regno, value);
+  return GetRegValue(regno, value);
 }
diff --git a/simpleperf/perf_regs.h b/simpleperf/perf_regs.h
index 7fbae73..86a12d0 100644
--- a/simpleperf/perf_regs.h
+++ b/simpleperf/perf_regs.h
@@ -60,7 +60,6 @@
 ArchType GetArchType(const std::string& arch);
 ArchType GetArchForAbi(ArchType machine_arch, int abi);
 std::string GetArchString(ArchType arch);
-bool IsArchTheSame(ArchType arch1, ArchType arch2, bool strict_check);
 uint64_t GetSupportedRegMask(ArchType arch);
 std::string GetRegName(size_t regno, ArchType arch);
 
@@ -88,14 +87,17 @@
 };
 
 struct RegSet {
+  ArchType arch;
+  // For each setting bit in valid_mask, there is a valid reg value in data[].
   uint64_t valid_mask;
+  // Stores reg values. Values for invalid regs are 0.
   uint64_t data[64];
+
+  RegSet(int abi, uint64_t valid_mask, const uint64_t* valid_regs);
+
+  bool GetRegValue(size_t regno, uint64_t* value) const;
+  bool GetSpRegValue(uint64_t* value) const;
+  bool GetIpRegValue(uint64_t* value) const;
 };
 
-RegSet CreateRegSet(int abi, uint64_t valid_mask, const uint64_t* valid_regs);
-
-bool GetRegValue(const RegSet& regs, size_t regno, uint64_t* value);
-bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value);
-bool GetIpRegValue(const RegSet& regs, ArchType arch, uint64_t* value);
-
 #endif  // SIMPLE_PERF_PERF_REGS_H_
diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h
index 36c6adc..a2ce19f 100644
--- a/simpleperf/sample_tree.h
+++ b/simpleperf/sample_tree.h
@@ -72,13 +72,12 @@
 
   void SetCallChainSampleOptions(bool accumulate_callchain,
                                  bool build_callchain,
-                                 bool use_caller_as_callchain_root,
-                                 bool strict_unwind_arch_check) {
+                                 bool use_caller_as_callchain_root) {
     accumulate_callchain_ = accumulate_callchain;
     build_callchain_ = build_callchain;
     use_caller_as_callchain_root_ = use_caller_as_callchain_root;
     if (accumulate_callchain_) {
-      offline_unwinder_.reset(new OfflineUnwinder(strict_unwind_arch_check, false));
+      offline_unwinder_.reset(new OfflineUnwinder(false));
     }
   }
 
@@ -111,14 +110,11 @@
           (r.regs_user_data.reg_mask != 0) &&
           (r.sample_type & PERF_SAMPLE_STACK_USER) &&
           (r.GetValidStackSize() > 0)) {
-        RegSet regs = CreateRegSet(r.regs_user_data.abi,
-                                   r.regs_user_data.reg_mask,
-                                   r.regs_user_data.regs);
+        RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
         std::vector<uint64_t> user_ips;
         std::vector<uint64_t> sps;
-        if (offline_unwinder_->UnwindCallChain(r.regs_user_data.abi, *thread, regs,
-                                               r.stack_user_data.data, r.GetValidStackSize(),
-                                               &user_ips, &sps)) {
+        if (offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
+                                               r.GetValidStackSize(), &user_ips, &sps)) {
           ips.push_back(PERF_CONTEXT_USER);
           ips.insert(ips.end(), user_ips.begin(), user_ips.end());
         }