simpleperf: fix reading dex files for unwinding while recording.

When simpleperf does unwinding while recording, it processes mmap
records before reading dex file linked list (via JITDebugReader).
To process mmap records, it creates Dso objects of type ELF_FILE.
Then after reading dex file linked list, it realizes some ELF_FILE Dso
should actually be DEX_FILE.

So this patch supports converting Dso objects of type ELF_FILE
into DEX_FILE when they have dex file offsets.

Bug: http://b/73126888
Test: run simpleperf_unit_test.
Test: run `simpleperf record -g --no-post-unwind` on an app.

Change-Id: I580a382724b17c1396a7f52d7b3f5df45bcbcfb7
diff --git a/simpleperf/cmd_report_sample_test.cpp b/simpleperf/cmd_report_sample_test.cpp
index 4dd3892..3346146 100644
--- a/simpleperf/cmd_report_sample_test.cpp
+++ b/simpleperf/cmd_report_sample_test.cpp
@@ -127,7 +127,7 @@
   ASSERT_NE(data.find("path: [kernel.kallsyms]"), std::string::npos);
   ASSERT_NE(data.find("symbol: binder_ioctl_write_read"), std::string::npos);
   ASSERT_NE(data.find("path: /system/lib64/libc.so"), std::string::npos);
-  GetProtobufReport(PERF_DATA_WITH_KERNEL_SYMBOLS_AVAILABLE_FALSE, &data,
+  GetProtobufReport(PERF_DATA_WITH_KERNEL_SYMBOLS_AVAILABLE_TRUE, &data,
                     {"--show-callchain", "--remove-unknown-kernel-symbols"});
   ASSERT_NE(data.find("time: 1368297633794862"), std::string::npos);
   ASSERT_NE(data.find("path: [kernel.kallsyms]"), std::string::npos);
diff --git a/simpleperf/dso.cpp b/simpleperf/dso.cpp
index 396bb35..29cfcc5 100644
--- a/simpleperf/dso.cpp
+++ b/simpleperf/dso.cpp
@@ -21,6 +21,7 @@
 
 #include <algorithm>
 #include <limits>
+#include <memory>
 #include <vector>
 
 #include <android-base/file.h>
@@ -326,6 +327,40 @@
   }
 }
 
+class DexFileDso : public Dso {
+ public:
+  DexFileDso(const std::string& path, const std::string& debug_file_path)
+      : Dso(DSO_DEX_FILE, path, debug_file_path) {}
+
+  void AddDexFileOffset(uint64_t dex_file_offset) override {
+    dex_file_offsets_.push_back(dex_file_offset);
+  }
+
+  const std::vector<uint64_t>* DexFileOffsets() override {
+    return &dex_file_offsets_;
+  }
+
+  std::vector<Symbol> LoadSymbols() override {
+    std::vector<Symbol> symbols;
+    std::vector<DexFileSymbol> dex_file_symbols;
+    if (!ReadSymbolsFromDexFile(debug_file_path_, dex_file_offsets_, &dex_file_symbols)) {
+      android::base::LogSeverity level = symbols_.empty() ? android::base::WARNING
+                                                          : android::base::DEBUG;
+      LOG(level) << "Failed to read symbols from " << debug_file_path_;
+      return symbols;
+    }
+    LOG(VERBOSE) << "Read symbols from " << debug_file_path_ << " successfully";
+    for (auto& symbol : dex_file_symbols) {
+      symbols.emplace_back(symbol.name, symbol.offset, symbol.len);
+    }
+    SortAndFixSymbols(symbols);
+    return symbols;
+  }
+
+ private:
+  std::vector<uint64_t> dex_file_offsets_;
+};
+
 class ElfDso : public Dso {
  public:
   ElfDso(const std::string& path, const std::string& debug_file_path)
@@ -356,8 +391,28 @@
     min_vaddr_ = min_vaddr;
   }
 
+  void AddDexFileOffset(uint64_t dex_file_offset) override {
+    if (type_ == DSO_ELF_FILE) {
+      // When simpleperf does unwinding while recording, it processes mmap records before reading
+      // dex file linked list (via JITDebugReader). To process mmap records, it creates Dso
+      // objects of type ELF_FILE. Then after reading dex file linked list, it realizes some
+      // ELF_FILE Dso objects should actually be DEX_FILE, because they have dex file offsets.
+      // So here converts ELF_FILE Dso into DEX_FILE Dso.
+      type_ = DSO_DEX_FILE;
+      dex_file_dso_.reset(new DexFileDso(path_, path_));
+    }
+    dex_file_dso_->AddDexFileOffset(dex_file_offset);
+  }
+
+  const std::vector<uint64_t>* DexFileOffsets() override {
+    return dex_file_dso_ ? dex_file_dso_->DexFileOffsets() : nullptr;
+  }
+
  protected:
   std::vector<Symbol> LoadSymbols() override {
+    if (dex_file_dso_) {
+      return dex_file_dso_->LoadSymbols();
+    }
     std::vector<Symbol> symbols;
     BuildId build_id = GetExpectedBuildId();
     auto symbol_callback = [&](const ElfFileSymbol& symbol) {
@@ -381,6 +436,7 @@
 
  private:
   uint64_t min_vaddr_;
+  std::unique_ptr<DexFileDso> dex_file_dso_;
 };
 
 class KernelDso : public Dso {
@@ -489,23 +545,6 @@
   return nullptr;
 }
 
-std::vector<Symbol> DexFileDso::LoadSymbols() {
-  std::vector<Symbol> symbols;
-  std::vector<DexFileSymbol> dex_file_symbols;
-  if (!ReadSymbolsFromDexFile(debug_file_path_, dex_file_offsets_, &dex_file_symbols)) {
-    android::base::LogSeverity level = symbols_.empty() ? android::base::WARNING
-                                                        : android::base::DEBUG;
-    LOG(level) << "Failed to read symbols from " << debug_file_path_;
-    return symbols;
-  }
-  LOG(VERBOSE) << "Read symbols from " << debug_file_path_ << " successfully";
-  for (auto& symbol : dex_file_symbols) {
-    symbols.emplace_back(symbol.name, symbol.offset, symbol.len);
-  }
-  SortAndFixSymbols(symbols);
-  return symbols;
-}
-
 const char* DsoTypeToString(DsoType dso_type) {
   switch (dso_type) {
     case DSO_KERNEL:
diff --git a/simpleperf/dso.h b/simpleperf/dso.h
index 3b79faa..c532f3c 100644
--- a/simpleperf/dso.h
+++ b/simpleperf/dso.h
@@ -156,6 +156,8 @@
   // Return the minimum virtual address in program header.
   virtual uint64_t MinVirtualAddress() { return 0; }
   virtual void SetMinVirtualAddress(uint64_t) {}
+  virtual void AddDexFileOffset(uint64_t) {}
+  virtual const std::vector<uint64_t>* DexFileOffsets() { return nullptr; }
 
   const Symbol* FindSymbol(uint64_t vaddr_in_dso);
 
@@ -182,7 +184,7 @@
   void Load();
   virtual std::vector<Symbol> LoadSymbols() = 0;
 
-  const DsoType type_;
+  DsoType type_;
   // path of the shared library used by the profiled program
   const std::string path_;
   // path of the shared library having symbol table and debug information
@@ -201,27 +203,6 @@
   android::base::LogSeverity symbol_warning_loglevel_;
 };
 
-class DexFileDso : public Dso {
- public:
-  void AddDexFileOffset(uint64_t dex_file_offset) {
-    dex_file_offsets_.push_back(dex_file_offset);
-  }
-
-  const std::vector<uint64_t>& DexFileOffsets() {
-    return dex_file_offsets_;
-  }
-
- protected:
-  DexFileDso(const std::string& path, const std::string& debug_file_path)
-      : Dso(DSO_DEX_FILE, path, debug_file_path) {}
-
-  std::vector<Symbol> LoadSymbols() override;
-
- private:
-  std::vector<uint64_t> dex_file_offsets_;
-  friend std::unique_ptr<Dso> Dso::CreateDso(DsoType, const std::string&, bool);
-};
-
 const char* DsoTypeToString(DsoType dso_type);
 bool GetBuildIdFromDsoPath(const std::string& dso_path, BuildId* build_id);
 
diff --git a/simpleperf/dso_test.cpp b/simpleperf/dso_test.cpp
index f42b276..cc44193 100644
--- a/simpleperf/dso_test.cpp
+++ b/simpleperf/dso_test.cpp
@@ -65,3 +65,23 @@
   ASSERT_EQ(finder.FindDebugFile("[vdso]", false, build_id), fake_vdso32);
   ASSERT_EQ(finder.FindDebugFile("[vdso]", true, build_id), fake_vdso64);
 }
+
+TEST(dso, dex_file_dso) {
+#if defined(__linux__)
+  for (DsoType dso_type : {DSO_DEX_FILE, DSO_ELF_FILE}) {
+    std::unique_ptr<Dso> dso = Dso::CreateDso(dso_type, GetTestData("base.vdex"));
+    ASSERT_TRUE(dso);
+    dso->AddDexFileOffset(0x28);
+    ASSERT_EQ(DSO_DEX_FILE, dso->type());
+    const Symbol* symbol = dso->FindSymbol(0x6c77e);
+    ASSERT_NE(symbol, nullptr);
+    ASSERT_EQ(symbol->addr, static_cast<uint64_t>(0x6c77e));
+    ASSERT_EQ(symbol->len, static_cast<uint64_t>(0x16));
+    ASSERT_STREQ(symbol->DemangledName(),
+                 "com.example.simpleperf.simpleperfexamplewithnative.MixActivity$1.run");
+    ASSERT_EQ(0u, dso->MinVirtualAddress());
+  }
+#else
+  GTEST_LOG_(INFO) << "This test only runs on linux because of libdexfile";
+#endif  // defined(__linux__)
+}
diff --git a/simpleperf/record_file_writer.cpp b/simpleperf/record_file_writer.cpp
index c91bd3e..136feea 100644
--- a/simpleperf/record_file_writer.cpp
+++ b/simpleperf/record_file_writer.cpp
@@ -317,10 +317,7 @@
     }
     std::sort(dump_symbols.begin(), dump_symbols.end(), Symbol::CompareByAddr);
 
-    const std::vector<uint64_t>* dex_file_offsets = nullptr;
-    if (dso->type() == DSO_DEX_FILE) {
-      dex_file_offsets = &static_cast<DexFileDso*>(dso)->DexFileOffsets();
-    }
+    const std::vector<uint64_t>* dex_file_offsets = dso->DexFileOffsets();
     if (!WriteFileFeature(dso->Path(), dso_type, min_vaddr, dump_symbols, dex_file_offsets)) {
       return false;
     }
diff --git a/simpleperf/testdata/base.vdex b/simpleperf/testdata/base.vdex
new file mode 100644
index 0000000..b0ea018
--- /dev/null
+++ b/simpleperf/testdata/base.vdex
Binary files differ
diff --git a/simpleperf/thread_tree.cpp b/simpleperf/thread_tree.cpp
index 580338c..3f157f0 100644
--- a/simpleperf/thread_tree.cpp
+++ b/simpleperf/thread_tree.cpp
@@ -299,18 +299,14 @@
   }
   dso->SetMinVirtualAddress(min_vaddr);
   dso->SetSymbols(symbols);
-  if (!dex_file_offsets.empty()) {
-    CHECK_EQ(static_cast<int>(dso_type), static_cast<int>(DSO_DEX_FILE));
-    for (uint64_t offset : dex_file_offsets) {
-      static_cast<DexFileDso*>(dso)->AddDexFileOffset(offset);
-    }
+  for (uint64_t offset : dex_file_offsets) {
+    dso->AddDexFileOffset(offset);
   }
 }
 
 void ThreadTree::AddDexFileOffset(const std::string& file_path, uint64_t dex_file_offset) {
   Dso* dso = FindUserDsoOrNew(file_path, 0, DSO_DEX_FILE);
-  CHECK_EQ(static_cast<int>(dso->type()), static_cast<int>(DSO_DEX_FILE));
-  static_cast<DexFileDso*>(dso)->AddDexFileOffset(dex_file_offset);
+  dso->AddDexFileOffset(dex_file_offset);
 }
 
 void ThreadTree::Update(const Record& record) {