blob: 381561e4efac63ab7a3242988c1394bb31e832b6 [file] [log] [blame]
Yabin Cuic573eaa2019-08-21 16:05:07 -07001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Yabin Cui65b8fab2023-01-31 09:50:53 -080017#include <stdint.h>
Yabin Cuic573eaa2019-08-21 16:05:07 -070018#include <stdio.h>
Yabin Cui0b60f9c2021-02-09 10:54:17 -080019#include <unistd.h>
Yabin Cuic573eaa2019-08-21 16:05:07 -070020
21#include <memory>
Yabin Cui561bf1b2020-11-03 12:11:07 -080022#include <optional>
Yi Kong61e78cd2020-01-13 15:43:01 -080023#include <string>
Yabin Cuic573eaa2019-08-21 16:05:07 -070024
Yabin Cui561bf1b2020-11-03 12:11:07 -080025#include <android-base/parseint.h>
Yabin Cuida89bf62021-12-08 14:11:24 -080026#include <android-base/strings.h>
Yabin Cui561bf1b2020-11-03 12:11:07 -080027
Yabin Cui82d48052023-11-22 15:51:32 -080028#include "BranchListFile.h"
ThiƩbaud Weksteen4848ee02020-10-23 16:06:59 +020029#include "ETMDecoder.h"
Yabin Cuif00f4fc2022-11-23 15:15:30 -080030#include "RegEx.h"
Yabin Cuic573eaa2019-08-21 16:05:07 -070031#include "command.h"
32#include "record_file.h"
Yabin Cui82d48052023-11-22 15:51:32 -080033#include "system/extras/simpleperf/branch_list.pb.h"
Yabin Cuic573eaa2019-08-21 16:05:07 -070034#include "thread_tree.h"
Yabin Cuifad7bbe2019-09-18 16:05:51 -070035#include "utils.h"
Yabin Cuic573eaa2019-08-21 16:05:07 -070036
Yabin Cui193f2382020-04-01 14:30:03 -070037namespace simpleperf {
38
Yabin Cuic573eaa2019-08-21 16:05:07 -070039namespace {
40
Yabin Cuifad7bbe2019-09-18 16:05:51 -070041using AddrPair = std::pair<uint64_t, uint64_t>;
42
43struct AddrPairHash {
44 size_t operator()(const AddrPair& ap) const noexcept {
45 size_t seed = 0;
46 HashCombine(seed, ap.first);
47 HashCombine(seed, ap.second);
48 return seed;
49 }
50};
51
Yabin Cui193f2382020-04-01 14:30:03 -070052enum class OutputFormat {
53 AutoFDO,
Yabin Cuid3928f92024-09-10 11:43:22 -070054 BOLT,
Yabin Cui193f2382020-04-01 14:30:03 -070055 BranchList,
56};
57
58struct AutoFDOBinaryInfo {
Yabin Cuiab9cb232024-09-05 14:45:54 -070059 std::vector<ElfSegment> executable_segments;
Yabin Cui7cba4982023-12-04 16:50:37 -080060 std::unordered_map<uint64_t, uint64_t> address_count_map;
Yabin Cuifad7bbe2019-09-18 16:05:51 -070061 std::unordered_map<AddrPair, uint64_t, AddrPairHash> range_count_map;
62 std::unordered_map<AddrPair, uint64_t, AddrPairHash> branch_count_map;
Yabin Cuia0208222021-12-10 10:24:29 -080063
Yabin Cui7cba4982023-12-04 16:50:37 -080064 void AddAddress(uint64_t addr) { OverflowSafeAdd(address_count_map[addr], 1); }
65
66 void AddRange(uint64_t begin, uint64_t end) {
67 OverflowSafeAdd(range_count_map[std::make_pair(begin, end)], 1);
68 }
69
70 void AddBranch(uint64_t from, uint64_t to) {
71 OverflowSafeAdd(branch_count_map[std::make_pair(from, to)], 1);
72 }
73
Yabin Cuife4af172021-12-14 15:57:51 -080074 void AddInstrRange(const ETMInstrRange& instr_range) {
75 uint64_t total_count = instr_range.branch_taken_count;
76 OverflowSafeAdd(total_count, instr_range.branch_not_taken_count);
77 OverflowSafeAdd(range_count_map[AddrPair(instr_range.start_addr, instr_range.end_addr)],
78 total_count);
79 if (instr_range.branch_taken_count > 0) {
80 OverflowSafeAdd(branch_count_map[AddrPair(instr_range.end_addr, instr_range.branch_to_addr)],
81 instr_range.branch_taken_count);
82 }
83 }
84
Yabin Cuia0208222021-12-10 10:24:29 -080085 void Merge(const AutoFDOBinaryInfo& other) {
Yabin Cui7cba4982023-12-04 16:50:37 -080086 for (const auto& p : other.address_count_map) {
87 auto res = address_count_map.emplace(p.first, p.second);
88 if (!res.second) {
89 OverflowSafeAdd(res.first->second, p.second);
90 }
91 }
Yabin Cuia0208222021-12-10 10:24:29 -080092 for (const auto& p : other.range_count_map) {
93 auto res = range_count_map.emplace(p.first, p.second);
94 if (!res.second) {
Yabin Cui1befd522021-12-10 12:24:22 -080095 OverflowSafeAdd(res.first->second, p.second);
Yabin Cuia0208222021-12-10 10:24:29 -080096 }
97 }
98 for (const auto& p : other.branch_count_map) {
99 auto res = branch_count_map.emplace(p.first, p.second);
100 if (!res.second) {
Yabin Cui1befd522021-12-10 12:24:22 -0800101 OverflowSafeAdd(res.first->second, p.second);
Yabin Cuia0208222021-12-10 10:24:29 -0800102 }
103 }
104 }
Yabin Cuiab9cb232024-09-05 14:45:54 -0700105
106 std::optional<uint64_t> VaddrToOffset(uint64_t vaddr) const {
107 for (const auto& segment : executable_segments) {
108 if (segment.vaddr <= vaddr && vaddr < segment.vaddr + segment.file_size) {
109 return vaddr - segment.vaddr + segment.file_offset;
110 }
111 }
112 return std::nullopt;
113 }
Yabin Cuifad7bbe2019-09-18 16:05:51 -0700114};
115
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800116using AutoFDOBinaryCallback = std::function<void(const BinaryKey&, AutoFDOBinaryInfo&)>;
Yabin Cui82d48052023-11-22 15:51:32 -0800117using ETMBinaryCallback = std::function<void(const BinaryKey&, ETMBinary&)>;
Yabin Cui544fa562023-12-01 15:32:00 -0800118using LBRDataCallback = std::function<void(LBRData&)>;
Yabin Cui561bf1b2020-11-03 12:11:07 -0800119
Yabin Cuiab9cb232024-09-05 14:45:54 -0700120static std::vector<ElfSegment> GetExecutableSegments(const Dso* dso) {
121 std::vector<ElfSegment> segments;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800122 ElfStatus status;
123 if (auto elf = ElfFile::Open(dso->GetDebugFilePath(), &status); elf) {
Yabin Cuiab9cb232024-09-05 14:45:54 -0700124 segments = elf->GetProgramHeader();
125 auto not_executable = [](const ElfSegment& s) { return !s.is_executable; };
126 segments.erase(std::remove_if(segments.begin(), segments.end(), not_executable),
127 segments.end());
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800128 }
Yabin Cuiab9cb232024-09-05 14:45:54 -0700129 return segments;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800130}
131
Yabin Cui544fa562023-12-01 15:32:00 -0800132// Base class for reading perf.data and generating AutoFDO or branch list data.
133class PerfDataReader {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800134 public:
Yabin Cui544fa562023-12-01 15:32:00 -0800135 static std::string GetDataType(RecordFileReader& reader) {
136 const EventAttrIds& attrs = reader.AttrSection();
137 if (attrs.size() != 1) {
138 return "unknown";
139 }
140 const perf_event_attr& attr = attrs[0].attr;
141 if (IsEtmEventType(attr.type)) {
142 return "etm";
143 }
144 if (attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
145 return "lbr";
146 }
147 return "unknown";
148 }
149
150 PerfDataReader(std::unique_ptr<RecordFileReader> reader, bool exclude_perf,
151 const RegEx* binary_name_regex)
152 : reader_(std::move(reader)),
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800153 exclude_perf_(exclude_perf),
Yabin Cuif5ff15d2023-04-28 17:42:02 -0700154 binary_filter_(binary_name_regex) {}
Yabin Cui544fa562023-12-01 15:32:00 -0800155 virtual ~PerfDataReader() {}
156
157 std::string GetDataType() const { return GetDataType(*reader_); }
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800158
Yabin Cui3024a602023-12-07 15:59:03 -0800159 void AddCallback(const AutoFDOBinaryCallback& callback) { autofdo_callback_ = callback; }
160 void AddCallback(const ETMBinaryCallback& callback) { etm_binary_callback_ = callback; }
161 void AddCallback(const LBRDataCallback& callback) { lbr_data_callback_ = callback; }
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800162
Yabin Cui544fa562023-12-01 15:32:00 -0800163 virtual bool Read() {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800164 if (exclude_perf_) {
Yabin Cui544fa562023-12-01 15:32:00 -0800165 const auto& info_map = reader_->GetMetaInfoFeature();
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800166 if (auto it = info_map.find("recording_process"); it == info_map.end()) {
Yabin Cui544fa562023-12-01 15:32:00 -0800167 LOG(ERROR) << reader_->FileName() << " doesn't support --exclude-perf";
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800168 return false;
169 } else {
170 int pid;
171 if (!android::base::ParseInt(it->second, &pid, 0)) {
Yabin Cui544fa562023-12-01 15:32:00 -0800172 LOG(ERROR) << "invalid recording_process " << it->second << " in " << reader_->FileName();
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800173 return false;
174 }
Yabin Cui544fa562023-12-01 15:32:00 -0800175 exclude_pid_ = pid;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800176 }
177 }
Yabin Cui544fa562023-12-01 15:32:00 -0800178
179 if (!reader_->LoadBuildIdAndFileFeatures(thread_tree_)) {
Yabin Cui90a547e2022-12-07 16:29:13 -0800180 return false;
181 }
Yabin Cuia0e438f2024-07-02 16:41:08 -0700182 if (reader_->HasFeature(PerfFileFormat::FEAT_INIT_MAP)) {
183 if (!reader_->ReadInitMapFeature([this](auto r) { return ProcessRecord(*r); })) {
184 return false;
185 }
186 }
Yabin Cui544fa562023-12-01 15:32:00 -0800187 if (!reader_->ReadDataSection([this](auto r) { return ProcessRecord(*r); })) {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800188 return false;
189 }
Yabin Cui544fa562023-12-01 15:32:00 -0800190 return PostProcess();
191 }
192
193 protected:
194 virtual bool ProcessRecord(Record& r) = 0;
195 virtual bool PostProcess() = 0;
196
Yabin Cui7cba4982023-12-04 16:50:37 -0800197 void ProcessAutoFDOBinaryInfo() {
198 for (auto& p : autofdo_binary_map_) {
199 const Dso* dso = p.first;
200 AutoFDOBinaryInfo& binary = p.second;
Yabin Cuiab9cb232024-09-05 14:45:54 -0700201 binary.executable_segments = GetExecutableSegments(dso);
Yabin Cui7cba4982023-12-04 16:50:37 -0800202 autofdo_callback_(BinaryKey(dso, 0), binary);
203 }
204 }
205
Yabin Cui544fa562023-12-01 15:32:00 -0800206 const std::string data_type_;
207 std::unique_ptr<RecordFileReader> reader_;
208 bool exclude_perf_;
209 BinaryFilter binary_filter_;
210
211 std::optional<int> exclude_pid_;
212 ThreadTree thread_tree_;
213 AutoFDOBinaryCallback autofdo_callback_;
Yabin Cui3024a602023-12-07 15:59:03 -0800214 ETMBinaryCallback etm_binary_callback_;
215 LBRDataCallback lbr_data_callback_;
Yabin Cui544fa562023-12-01 15:32:00 -0800216 // Store results for AutoFDO.
Yabin Cui7cba4982023-12-04 16:50:37 -0800217 std::unordered_map<const Dso*, AutoFDOBinaryInfo> autofdo_binary_map_;
Yabin Cui544fa562023-12-01 15:32:00 -0800218};
219
220class ETMThreadTreeWithFilter : public ETMThreadTree {
221 public:
222 ETMThreadTreeWithFilter(ThreadTree& thread_tree, std::optional<int>& exclude_pid)
223 : thread_tree_(thread_tree), exclude_pid_(exclude_pid) {}
224
225 void DisableThreadExitRecords() override { thread_tree_.DisableThreadExitRecords(); }
226
227 const ThreadEntry* FindThread(int tid) override {
228 const ThreadEntry* thread = thread_tree_.FindThread(tid);
229 if (thread != nullptr && exclude_pid_ && thread->pid == exclude_pid_) {
230 return nullptr;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800231 }
Yabin Cui544fa562023-12-01 15:32:00 -0800232 return thread;
233 }
234
235 const MapSet& GetKernelMaps() override { return thread_tree_.GetKernelMaps(); }
236
237 private:
238 ThreadTree& thread_tree_;
239 std::optional<int>& exclude_pid_;
240};
241
242// Read perf.data with ETM data and generate AutoFDO or branch list data.
243class ETMPerfDataReader : public PerfDataReader {
244 public:
245 ETMPerfDataReader(std::unique_ptr<RecordFileReader> reader, bool exclude_perf,
246 const RegEx* binary_name_regex, ETMDumpOption etm_dump_option)
247 : PerfDataReader(std::move(reader), exclude_perf, binary_name_regex),
248 etm_dump_option_(etm_dump_option),
249 etm_thread_tree_(thread_tree_, exclude_pid_) {}
250
Yabin Cui544fa562023-12-01 15:32:00 -0800251 bool Read() override {
252 if (reader_->HasFeature(PerfFileFormat::FEAT_ETM_BRANCH_LIST)) {
253 return ProcessETMBranchListFeature();
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800254 }
Yabin Cui544fa562023-12-01 15:32:00 -0800255 return PerfDataReader::Read();
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800256 }
257
258 private:
Yabin Cui544fa562023-12-01 15:32:00 -0800259 bool ProcessRecord(Record& r) override {
260 thread_tree_.Update(r);
261 if (r.type() == PERF_RECORD_AUXTRACE_INFO) {
262 etm_decoder_ = ETMDecoder::Create(static_cast<AuxTraceInfoRecord&>(r), etm_thread_tree_);
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800263 if (!etm_decoder_) {
264 return false;
265 }
266 etm_decoder_->EnableDump(etm_dump_option_);
267 if (autofdo_callback_) {
268 etm_decoder_->RegisterCallback(
269 [this](const ETMInstrRange& range) { ProcessInstrRange(range); });
Yabin Cui82d48052023-11-22 15:51:32 -0800270 } else if (etm_binary_callback_) {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800271 etm_decoder_->RegisterCallback(
Yabin Cui82d48052023-11-22 15:51:32 -0800272 [this](const ETMBranchList& branch) { ProcessETMBranchList(branch); });
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800273 }
Yabin Cui544fa562023-12-01 15:32:00 -0800274 } else if (r.type() == PERF_RECORD_AUX) {
275 AuxRecord& aux = static_cast<AuxRecord&>(r);
276 if (aux.data->aux_size > SIZE_MAX) {
Yabin Cui65b8fab2023-01-31 09:50:53 -0800277 LOG(ERROR) << "invalid aux size";
278 return false;
279 }
Yabin Cui544fa562023-12-01 15:32:00 -0800280 size_t aux_size = aux.data->aux_size;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800281 if (aux_size > 0) {
Yabin Cui216e7d42023-04-21 14:48:22 -0700282 bool error = false;
Yabin Cui544fa562023-12-01 15:32:00 -0800283 if (!reader_->ReadAuxData(aux.Cpu(), aux.data->aux_offset, aux_size, aux_data_buffer_,
284 error)) {
Yabin Cui216e7d42023-04-21 14:48:22 -0700285 return !error;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800286 }
Yabin Cuidf7215c2023-01-13 12:02:40 -0800287 if (!etm_decoder_) {
288 LOG(ERROR) << "ETMDecoder isn't created";
289 return false;
290 }
Yabin Cui544fa562023-12-01 15:32:00 -0800291 return etm_decoder_->ProcessData(aux_data_buffer_.data(), aux_size, !aux.Unformatted(),
292 aux.Cpu());
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800293 }
Yabin Cui544fa562023-12-01 15:32:00 -0800294 } else if (r.type() == PERF_RECORD_MMAP && r.InKernel()) {
295 auto& mmap_r = static_cast<MmapRecord&>(r);
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800296 if (android::base::StartsWith(mmap_r.filename, DEFAULT_KERNEL_MMAP_NAME)) {
297 kernel_map_start_addr_ = mmap_r.data->addr;
298 }
299 }
300 return true;
301 }
302
Yabin Cui544fa562023-12-01 15:32:00 -0800303 bool PostProcess() override {
304 if (etm_decoder_ && !etm_decoder_->FinishData()) {
305 return false;
306 }
307 if (autofdo_callback_) {
308 ProcessAutoFDOBinaryInfo();
309 } else if (etm_binary_callback_) {
310 ProcessETMBinary();
311 }
312 return true;
313 }
314
315 bool ProcessETMBranchListFeature() {
316 if (exclude_perf_) {
317 LOG(WARNING) << "--exclude-perf has no effect on perf.data with etm branch list";
318 }
319 if (autofdo_callback_) {
320 LOG(ERROR) << "convert to autofdo format isn't support on perf.data with etm branch list";
321 return false;
322 }
323 CHECK(etm_binary_callback_);
324 std::string s;
325 if (!reader_->ReadFeatureSection(PerfFileFormat::FEAT_ETM_BRANCH_LIST, &s)) {
326 return false;
327 }
328 ETMBinaryMap binary_map;
329 if (!StringToETMBinaryMap(s, binary_map)) {
330 return false;
331 }
332 for (auto& [key, binary] : binary_map) {
333 if (!binary_filter_.Filter(key.path)) {
334 continue;
335 }
336 etm_binary_callback_(key, binary);
337 }
338 return true;
339 }
340
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800341 void ProcessInstrRange(const ETMInstrRange& instr_range) {
Yabin Cuif5ff15d2023-04-28 17:42:02 -0700342 if (!binary_filter_.Filter(instr_range.dso)) {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800343 return;
344 }
345
Yabin Cuife4af172021-12-14 15:57:51 -0800346 autofdo_binary_map_[instr_range.dso].AddInstrRange(instr_range);
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800347 }
348
Yabin Cui82d48052023-11-22 15:51:32 -0800349 void ProcessETMBranchList(const ETMBranchList& branch_list) {
Yabin Cuif5ff15d2023-04-28 17:42:02 -0700350 if (!binary_filter_.Filter(branch_list.dso)) {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800351 return;
352 }
353
Yabin Cui82d48052023-11-22 15:51:32 -0800354 auto& branch_map = etm_binary_map_[branch_list.dso].branch_map;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800355 ++branch_map[branch_list.addr][branch_list.branch];
356 }
357
Yabin Cui82d48052023-11-22 15:51:32 -0800358 void ProcessETMBinary() {
359 for (auto& p : etm_binary_map_) {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800360 Dso* dso = p.first;
Yabin Cui82d48052023-11-22 15:51:32 -0800361 ETMBinary& binary = p.second;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800362 binary.dso_type = dso->type();
363 BinaryKey key(dso, 0);
364 if (binary.dso_type == DSO_KERNEL) {
365 if (kernel_map_start_addr_ == 0) {
366 LOG(WARNING) << "Can't convert kernel ip addresses without kernel start addr. So remove "
367 "branches for the kernel.";
368 continue;
369 }
370 if (dso->GetDebugFilePath() == dso->Path()) {
371 // vmlinux isn't available. We still use kernel ip addr. Put kernel start addr in proto
372 // for address conversion later.
373 key.kernel_start_addr = kernel_map_start_addr_;
374 }
375 }
Yabin Cui82d48052023-11-22 15:51:32 -0800376 etm_binary_callback_(key, binary);
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800377 }
378 }
379
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800380 ETMDumpOption etm_dump_option_;
Yabin Cui544fa562023-12-01 15:32:00 -0800381 ETMThreadTreeWithFilter etm_thread_tree_;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800382 std::vector<uint8_t> aux_data_buffer_;
383 std::unique_ptr<ETMDecoder> etm_decoder_;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800384 uint64_t kernel_map_start_addr_ = 0;
Yabin Cui544fa562023-12-01 15:32:00 -0800385 // Store etm branch list data.
Yabin Cui82d48052023-11-22 15:51:32 -0800386 std::unordered_map<Dso*, ETMBinary> etm_binary_map_;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800387};
388
Yabin Cui3024a602023-12-07 15:59:03 -0800389static std::optional<std::vector<AutoFDOBinaryInfo>> ConvertLBRDataToAutoFDO(
390 const LBRData& lbr_data) {
391 std::vector<AutoFDOBinaryInfo> binaries(lbr_data.binaries.size());
392 for (const LBRSample& sample : lbr_data.samples) {
393 if (sample.binary_id != 0) {
394 if (sample.binary_id > binaries.size()) {
395 LOG(ERROR) << "binary_id out of range";
396 return std::nullopt;
397 }
398 binaries[sample.binary_id - 1].AddAddress(sample.vaddr_in_file);
399 }
400 for (size_t i = 0; i < sample.branches.size(); ++i) {
401 const LBRBranch& branch = sample.branches[i];
402 if (branch.from_binary_id == 0) {
403 continue;
404 }
405 if (branch.from_binary_id > binaries.size()) {
406 LOG(ERROR) << "binary_id out of range";
407 return std::nullopt;
408 }
409 if (branch.from_binary_id == branch.to_binary_id) {
410 binaries[branch.from_binary_id - 1].AddBranch(branch.from_vaddr_in_file,
411 branch.to_vaddr_in_file);
412 }
413 if (i > 0 && branch.from_binary_id == sample.branches[i - 1].to_binary_id) {
414 uint64_t begin = sample.branches[i - 1].to_vaddr_in_file;
415 uint64_t end = branch.from_vaddr_in_file;
416 // Use the same logic to skip bogus LBR data as AutoFDO.
417 if (end < begin || end - begin > (1 << 20)) {
418 continue;
419 }
420 binaries[branch.from_binary_id - 1].AddRange(begin, end);
421 }
422 }
423 }
424 return binaries;
425}
426
Yabin Cui544fa562023-12-01 15:32:00 -0800427class LBRPerfDataReader : public PerfDataReader {
428 public:
429 LBRPerfDataReader(std::unique_ptr<RecordFileReader> reader, bool exclude_perf,
430 const RegEx* binary_name_regex)
431 : PerfDataReader(std::move(reader), exclude_perf, binary_name_regex) {}
Yabin Cui544fa562023-12-01 15:32:00 -0800432
433 private:
434 bool ProcessRecord(Record& r) override {
435 thread_tree_.Update(r);
436 if (r.type() == PERF_RECORD_SAMPLE) {
437 auto& sr = static_cast<SampleRecord&>(r);
438 ThreadEntry* thread = thread_tree_.FindThread(sr.tid_data.tid);
439 if (thread == nullptr) {
440 return true;
441 }
442 auto& stack = sr.branch_stack_data;
443 lbr_data_.samples.resize(lbr_data_.samples.size() + 1);
444 LBRSample& sample = lbr_data_.samples.back();
445 std::pair<uint32_t, uint64_t> binary_addr = IpToBinaryAddr(*thread, sr.ip_data.ip);
446 sample.binary_id = binary_addr.first;
Yabin Cui7cba4982023-12-04 16:50:37 -0800447 bool has_valid_binary_id = sample.binary_id != 0;
Yabin Cui544fa562023-12-01 15:32:00 -0800448 sample.vaddr_in_file = binary_addr.second;
449 sample.branches.resize(stack.stack_nr);
450 for (size_t i = 0; i < stack.stack_nr; ++i) {
451 uint64_t from_ip = stack.stack[i].from;
452 uint64_t to_ip = stack.stack[i].to;
453 LBRBranch& branch = sample.branches[i];
454 binary_addr = IpToBinaryAddr(*thread, from_ip);
455 branch.from_binary_id = binary_addr.first;
456 branch.from_vaddr_in_file = binary_addr.second;
457 binary_addr = IpToBinaryAddr(*thread, to_ip);
458 branch.to_binary_id = binary_addr.first;
459 branch.to_vaddr_in_file = binary_addr.second;
Yabin Cui7cba4982023-12-04 16:50:37 -0800460 if (branch.from_binary_id != 0 || branch.to_binary_id != 0) {
461 has_valid_binary_id = true;
462 }
463 }
464 if (!has_valid_binary_id) {
465 lbr_data_.samples.pop_back();
Yabin Cui544fa562023-12-01 15:32:00 -0800466 }
467 }
468 return true;
469 }
470
Yabin Cui7cba4982023-12-04 16:50:37 -0800471 bool PostProcess() override {
472 if (autofdo_callback_) {
Yabin Cui3024a602023-12-07 15:59:03 -0800473 std::optional<std::vector<AutoFDOBinaryInfo>> binaries = ConvertLBRDataToAutoFDO(lbr_data_);
474 if (!binaries) {
475 return false;
476 }
477 for (const auto& [dso, binary_id] : dso_map_) {
478 autofdo_binary_map_[dso] = std::move(binaries.value()[binary_id - 1]);
479 }
Yabin Cui7cba4982023-12-04 16:50:37 -0800480 ProcessAutoFDOBinaryInfo();
Yabin Cui3024a602023-12-07 15:59:03 -0800481 } else if (lbr_data_callback_) {
482 lbr_data_callback_(lbr_data_);
Yabin Cui7cba4982023-12-04 16:50:37 -0800483 }
484 return true;
485 }
Yabin Cui544fa562023-12-01 15:32:00 -0800486
487 std::pair<uint32_t, uint64_t> IpToBinaryAddr(ThreadEntry& thread, uint64_t ip) {
488 const MapEntry* map = thread_tree_.FindMap(&thread, ip);
489 Dso* dso = map->dso;
490 if (thread_tree_.IsUnknownDso(dso) || !binary_filter_.Filter(dso)) {
491 return std::make_pair(0, 0);
492 }
493 uint32_t binary_id = GetBinaryId(dso);
494 uint64_t vaddr_in_file = dso->IpToVaddrInFile(ip, map->start_addr, map->pgoff);
495 return std::make_pair(binary_id, vaddr_in_file);
496 }
497
498 uint32_t GetBinaryId(const Dso* dso) {
499 if (auto it = dso_map_.find(dso); it != dso_map_.end()) {
500 return it->second;
501 }
Yabin Cui3024a602023-12-07 15:59:03 -0800502 lbr_data_.binaries.emplace_back(dso, 0);
503 uint32_t binary_id = static_cast<uint32_t>(lbr_data_.binaries.size());
Yabin Cui544fa562023-12-01 15:32:00 -0800504 dso_map_[dso] = binary_id;
505 return binary_id;
506 }
507
Yabin Cui544fa562023-12-01 15:32:00 -0800508 LBRData lbr_data_;
509 // Map from dso to binary_id in lbr_data_.
510 std::unordered_map<const Dso*, uint32_t> dso_map_;
511};
512
Yabin Cui3024a602023-12-07 15:59:03 -0800513// Read a protobuf file specified by branch_list.proto.
514class BranchListReader {
Yabin Cui89465ad2021-12-14 14:24:55 -0800515 public:
Yabin Cui3024a602023-12-07 15:59:03 -0800516 BranchListReader(const std::string& filename, const RegEx* binary_name_regex)
Yabin Cuif5ff15d2023-04-28 17:42:02 -0700517 : filename_(filename), binary_filter_(binary_name_regex) {}
Yabin Cui89465ad2021-12-14 14:24:55 -0800518
Yabin Cui3024a602023-12-07 15:59:03 -0800519 void AddCallback(const ETMBinaryCallback& callback) { etm_binary_callback_ = callback; }
520 void AddCallback(const LBRDataCallback& callback) { lbr_data_callback_ = callback; }
Yabin Cui89465ad2021-12-14 14:24:55 -0800521
522 bool Read() {
Yabin Cuif5ff15d2023-04-28 17:42:02 -0700523 std::string s;
524 if (!android::base::ReadFileToString(filename_, &s)) {
525 PLOG(ERROR) << "failed to read " << filename_;
Yabin Cui89465ad2021-12-14 14:24:55 -0800526 return false;
527 }
Yabin Cui3024a602023-12-07 15:59:03 -0800528 ETMBinaryMap etm_data;
529 LBRData lbr_data;
530 if (!ParseBranchListData(s, etm_data, lbr_data)) {
Yabin Cuif5ff15d2023-04-28 17:42:02 -0700531 PLOG(ERROR) << "file is in wrong format: " << filename_;
Yabin Cui89465ad2021-12-14 14:24:55 -0800532 return false;
533 }
Yabin Cui3024a602023-12-07 15:59:03 -0800534 if (etm_binary_callback_ && !etm_data.empty()) {
535 ProcessETMData(etm_data);
536 }
537 if (lbr_data_callback_ && !lbr_data.samples.empty()) {
538 ProcessLBRData(lbr_data);
Yabin Cui89465ad2021-12-14 14:24:55 -0800539 }
540 return true;
541 }
542
543 private:
Yabin Cui3024a602023-12-07 15:59:03 -0800544 void ProcessETMData(ETMBinaryMap& etm_data) {
545 for (auto& [key, binary] : etm_data) {
546 if (!binary_filter_.Filter(key.path)) {
547 continue;
548 }
549 etm_binary_callback_(key, binary);
550 }
551 }
552
553 void ProcessLBRData(LBRData& lbr_data) {
554 // 1. Check if we need to remove binaries.
555 std::vector<uint32_t> new_ids(lbr_data.binaries.size());
556 uint32_t next_id = 1;
557
558 for (size_t i = 0; i < lbr_data.binaries.size(); ++i) {
559 if (!binary_filter_.Filter(lbr_data.binaries[i].path)) {
560 new_ids[i] = 0;
561 } else {
562 new_ids[i] = next_id++;
563 }
564 }
565
566 if (next_id <= lbr_data.binaries.size()) {
567 // 2. Modify lbr_data.binaries.
568 for (size_t i = 0; i < lbr_data.binaries.size(); ++i) {
569 if (new_ids[i] != 0) {
570 size_t new_pos = new_ids[i] - 1;
571 lbr_data.binaries[new_pos] = lbr_data.binaries[i];
572 }
573 }
574 lbr_data.binaries.resize(next_id - 1);
575
576 // 3. Modify lbr_data.samples.
577 auto convert_id = [&](uint32_t& binary_id) {
578 if (binary_id != 0) {
579 binary_id = (binary_id <= new_ids.size()) ? new_ids[binary_id - 1] : 0;
580 }
581 };
582 std::vector<LBRSample> new_samples;
583 for (LBRSample& sample : lbr_data.samples) {
584 convert_id(sample.binary_id);
585 bool has_valid_binary_id = sample.binary_id != 0;
586 for (LBRBranch& branch : sample.branches) {
587 convert_id(branch.from_binary_id);
588 convert_id(branch.to_binary_id);
589 if (branch.from_binary_id != 0 || branch.to_binary_id != 0) {
590 has_valid_binary_id = true;
591 }
592 }
593 if (has_valid_binary_id) {
594 new_samples.emplace_back(std::move(sample));
595 }
596 }
597 lbr_data.samples = std::move(new_samples);
598 }
599 lbr_data_callback_(lbr_data);
600 }
601
Yabin Cui89465ad2021-12-14 14:24:55 -0800602 const std::string filename_;
Yabin Cuif5ff15d2023-04-28 17:42:02 -0700603 BinaryFilter binary_filter_;
Yabin Cui3024a602023-12-07 15:59:03 -0800604 ETMBinaryCallback etm_binary_callback_;
605 LBRDataCallback lbr_data_callback_;
Yabin Cui89465ad2021-12-14 14:24:55 -0800606};
607
Yabin Cui82d48052023-11-22 15:51:32 -0800608// Convert ETMBinary into AutoFDOBinaryInfo.
609class ETMBranchListToAutoFDOConverter {
Yabin Cuife4af172021-12-14 15:57:51 -0800610 public:
Yabin Cui82d48052023-11-22 15:51:32 -0800611 std::unique_ptr<AutoFDOBinaryInfo> Convert(const BinaryKey& key, ETMBinary& binary) {
Yabin Cuife4af172021-12-14 15:57:51 -0800612 BuildId build_id = key.build_id;
613 std::unique_ptr<Dso> dso = Dso::CreateDsoWithBuildId(binary.dso_type, key.path, build_id);
614 if (!dso || !CheckBuildId(dso.get(), key.build_id)) {
615 return nullptr;
616 }
617 std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary(new AutoFDOBinaryInfo);
Yabin Cuiab9cb232024-09-05 14:45:54 -0700618 autofdo_binary->executable_segments = GetExecutableSegments(dso.get());
Yabin Cuife4af172021-12-14 15:57:51 -0800619
620 if (dso->type() == DSO_KERNEL) {
621 ModifyBranchMapForKernel(dso.get(), key.kernel_start_addr, binary);
622 }
623
624 auto process_instr_range = [&](const ETMInstrRange& range) {
625 CHECK_EQ(range.dso, dso.get());
626 autofdo_binary->AddInstrRange(range);
627 };
628
Yabin Cui82d48052023-11-22 15:51:32 -0800629 auto result = ConvertETMBranchMapToInstrRanges(dso.get(), binary.GetOrderedBranchMap(),
630 process_instr_range);
Yabin Cuife4af172021-12-14 15:57:51 -0800631 if (!result.ok()) {
632 LOG(WARNING) << "failed to build instr ranges for binary " << dso->Path() << ": "
633 << result.error();
634 return nullptr;
635 }
636 return autofdo_binary;
637 }
638
639 private:
640 bool CheckBuildId(Dso* dso, const BuildId& expected_build_id) {
641 if (expected_build_id.IsEmpty()) {
642 return true;
643 }
644 BuildId build_id;
645 return GetBuildIdFromDsoPath(dso->GetDebugFilePath(), &build_id) &&
646 build_id == expected_build_id;
647 }
648
Yabin Cui82d48052023-11-22 15:51:32 -0800649 void ModifyBranchMapForKernel(Dso* dso, uint64_t kernel_start_addr, ETMBinary& binary) {
Yabin Cuife4af172021-12-14 15:57:51 -0800650 if (kernel_start_addr == 0) {
651 // vmlinux has been provided when generating branch lists. Addresses in branch lists are
652 // already vaddrs in vmlinux.
653 return;
654 }
655 // Addresses are still kernel ip addrs in memory. Need to convert them to vaddrs in vmlinux.
Yabin Cui82d48052023-11-22 15:51:32 -0800656 UnorderedETMBranchMap new_branch_map;
Yabin Cuife4af172021-12-14 15:57:51 -0800657 for (auto& p : binary.branch_map) {
658 uint64_t vaddr_in_file = dso->IpToVaddrInFile(p.first, kernel_start_addr, 0);
659 new_branch_map[vaddr_in_file] = std::move(p.second);
660 }
661 binary.branch_map = std::move(new_branch_map);
662 }
663};
664
Yabin Cuia0208222021-12-10 10:24:29 -0800665// Write instruction ranges to a file in AutoFDO text format.
666class AutoFDOWriter {
667 public:
668 void AddAutoFDOBinary(const BinaryKey& key, AutoFDOBinaryInfo& binary) {
669 auto it = binary_map_.find(key);
670 if (it == binary_map_.end()) {
671 binary_map_[key] = std::move(binary);
672 } else {
673 it->second.Merge(binary);
674 }
675 }
676
Yabin Cuid3928f92024-09-10 11:43:22 -0700677 bool WriteAutoFDO(const std::string& output_filename) {
Yabin Cuia0208222021-12-10 10:24:29 -0800678 std::unique_ptr<FILE, decltype(&fclose)> output_fp(fopen(output_filename.c_str(), "w"), fclose);
679 if (!output_fp) {
680 PLOG(ERROR) << "failed to write to " << output_filename;
681 return false;
682 }
683 // autofdo_binary_map is used to store instruction ranges, which can have a large amount. And
684 // it has a larger access time (instruction ranges * executed time). So it's better to use
685 // unorder_maps to speed up access time. But we also want a stable output here, to compare
686 // output changes result from code changes. So generate a sorted output here.
687 std::vector<BinaryKey> keys;
688 for (auto& p : binary_map_) {
689 keys.emplace_back(p.first);
690 }
691 std::sort(keys.begin(), keys.end(),
692 [](const BinaryKey& key1, const BinaryKey& key2) { return key1.path < key2.path; });
693 if (keys.size() > 1) {
694 fprintf(output_fp.get(),
695 "// Please split this file. AutoFDO only accepts profile for one binary.\n");
696 }
697 for (const auto& key : keys) {
698 const AutoFDOBinaryInfo& binary = binary_map_[key];
Yabin Cuiab9cb232024-09-05 14:45:54 -0700699 // AutoFDO text format needs file_offsets instead of virtual addrs in a binary. So convert
700 // vaddrs to file offsets.
Yabin Cuia0208222021-12-10 10:24:29 -0800701
Yabin Cui1b6b4f12024-05-14 17:12:05 -0700702 // Write range_count_map. Sort the output by addrs.
Yabin Cui3a22c2e2024-05-17 13:30:20 -0700703 std::vector<std::pair<AddrPair, uint64_t>> range_counts;
704 for (std::pair<AddrPair, uint64_t> p : binary.range_count_map) {
Yabin Cuiab9cb232024-09-05 14:45:54 -0700705 std::optional<uint64_t> start_offset = binary.VaddrToOffset(p.first.first);
706 std::optional<uint64_t> end_offset = binary.VaddrToOffset(p.first.second);
707 if (start_offset && end_offset) {
708 p.first.first = start_offset.value();
709 p.first.second = end_offset.value();
Yabin Cui3a22c2e2024-05-17 13:30:20 -0700710 range_counts.emplace_back(p);
Yabin Cuia0208222021-12-10 10:24:29 -0800711 }
Yabin Cui1b6b4f12024-05-14 17:12:05 -0700712 }
Yabin Cui3a22c2e2024-05-17 13:30:20 -0700713 std::sort(range_counts.begin(), range_counts.end());
714 fprintf(output_fp.get(), "%zu\n", range_counts.size());
715 for (const auto& p : range_counts) {
716 fprintf(output_fp.get(), "%" PRIx64 "-%" PRIx64 ":%" PRIu64 "\n", p.first.first,
717 p.first.second, p.second);
Yabin Cuia0208222021-12-10 10:24:29 -0800718 }
719
Yabin Cui1b6b4f12024-05-14 17:12:05 -0700720 // Write addr_count_map. Sort the output by addrs.
Yabin Cui3a22c2e2024-05-17 13:30:20 -0700721 std::vector<std::pair<uint64_t, uint64_t>> address_counts;
722 for (std::pair<uint64_t, uint64_t> p : binary.address_count_map) {
Yabin Cuiab9cb232024-09-05 14:45:54 -0700723 std::optional<uint64_t> offset = binary.VaddrToOffset(p.first);
724 if (offset) {
725 p.first = offset.value();
Yabin Cui3a22c2e2024-05-17 13:30:20 -0700726 address_counts.emplace_back(p);
Yabin Cui1b6b4f12024-05-14 17:12:05 -0700727 }
728 }
Yabin Cui3a22c2e2024-05-17 13:30:20 -0700729 std::sort(address_counts.begin(), address_counts.end());
730 fprintf(output_fp.get(), "%zu\n", address_counts.size());
731 for (const auto& p : address_counts) {
732 fprintf(output_fp.get(), "%" PRIx64 ":%" PRIu64 "\n", p.first, p.second);
Yabin Cui7cba4982023-12-04 16:50:37 -0800733 }
Yabin Cuia0208222021-12-10 10:24:29 -0800734
Yabin Cui1b6b4f12024-05-14 17:12:05 -0700735 // Write branch_count_map. Sort the output by addrs.
Yabin Cui3a22c2e2024-05-17 13:30:20 -0700736 std::vector<std::pair<AddrPair, uint64_t>> branch_counts;
737 for (std::pair<AddrPair, uint64_t> p : binary.branch_count_map) {
Yabin Cuiab9cb232024-09-05 14:45:54 -0700738 std::optional<uint64_t> from_offset = binary.VaddrToOffset(p.first.first);
739 std::optional<uint64_t> to_offset = binary.VaddrToOffset(p.first.second);
740 if (from_offset) {
741 p.first.first = from_offset.value();
742 p.first.second = to_offset ? to_offset.value() : 0;
Yabin Cui3a22c2e2024-05-17 13:30:20 -0700743 branch_counts.emplace_back(p);
Yabin Cui1b6b4f12024-05-14 17:12:05 -0700744 }
745 }
Yabin Cui3a22c2e2024-05-17 13:30:20 -0700746 std::sort(branch_counts.begin(), branch_counts.end());
747 fprintf(output_fp.get(), "%zu\n", branch_counts.size());
748 for (const auto& p : branch_counts) {
749 fprintf(output_fp.get(), "%" PRIx64 "->%" PRIx64 ":%" PRIu64 "\n", p.first.first,
750 p.first.second, p.second);
Yabin Cuia0208222021-12-10 10:24:29 -0800751 }
752
753 // Write the binary path in comment.
Yabin Cuicc006812023-04-06 11:37:08 -0700754 fprintf(output_fp.get(), "// build_id: %s\n", key.build_id.ToString().c_str());
Yabin Cuia0208222021-12-10 10:24:29 -0800755 fprintf(output_fp.get(), "// %s\n\n", key.path.c_str());
756 }
757 return true;
758 }
759
Yabin Cuid3928f92024-09-10 11:43:22 -0700760 // Write bolt profile in format documented in
761 // https://github.com/llvm/llvm-project/blob/main/bolt/include/bolt/Profile/DataAggregator.h#L372.
762 bool WriteBolt(const std::string& output_filename) {
763 std::unique_ptr<FILE, decltype(&fclose)> output_fp(fopen(output_filename.c_str(), "w"), fclose);
764 if (!output_fp) {
765 PLOG(ERROR) << "failed to write to " << output_filename;
766 return false;
767 }
768 // autofdo_binary_map is used to store instruction ranges, which can have a large amount. And
769 // it has a larger access time (instruction ranges * executed time). So it's better to use
770 // unorder_maps to speed up access time. But we also want a stable output here, to compare
771 // output changes result from code changes. So generate a sorted output here.
772 std::vector<BinaryKey> keys;
773 for (auto& p : binary_map_) {
774 keys.emplace_back(p.first);
775 }
776 std::sort(keys.begin(), keys.end(),
777 [](const BinaryKey& key1, const BinaryKey& key2) { return key1.path < key2.path; });
778 if (keys.size() > 1) {
779 fprintf(output_fp.get(),
780 "// Please split this file. BOLT only accepts profile for one binary.\n");
781 }
Yabin Cuic1555522024-09-10 15:54:38 -0700782
Yabin Cuid3928f92024-09-10 11:43:22 -0700783 for (const auto& key : keys) {
784 const AutoFDOBinaryInfo& binary = binary_map_[key];
Yabin Cuid3928f92024-09-10 11:43:22 -0700785 // Write range_count_map. Sort the output by addrs.
786 std::vector<std::pair<AddrPair, uint64_t>> range_counts;
Yabin Cuic1555522024-09-10 15:54:38 -0700787 for (const auto& p : binary.range_count_map) {
788 range_counts.emplace_back(p);
Yabin Cuid3928f92024-09-10 11:43:22 -0700789 }
790 std::sort(range_counts.begin(), range_counts.end());
791 for (const auto& p : range_counts) {
792 fprintf(output_fp.get(), "F %" PRIx64 " %" PRIx64 " %" PRIu64 "\n", p.first.first,
793 p.first.second, p.second);
794 }
795
796 // Write branch_count_map. Sort the output by addrs.
797 std::vector<std::pair<AddrPair, uint64_t>> branch_counts;
Yabin Cuic1555522024-09-10 15:54:38 -0700798 for (const auto& p : binary.branch_count_map) {
799 branch_counts.emplace_back(p);
Yabin Cuid3928f92024-09-10 11:43:22 -0700800 }
801 std::sort(branch_counts.begin(), branch_counts.end());
802 for (const auto& p : branch_counts) {
803 fprintf(output_fp.get(), "B %" PRIx64 " %" PRIx64 " %" PRIu64 " 0\n", p.first.first,
804 p.first.second, p.second);
805 }
Yabin Cui38b72d22024-09-16 12:36:04 -0700806
807 // Write the binary path in comment.
808 fprintf(output_fp.get(), "// build_id: %s\n", key.build_id.ToString().c_str());
809 fprintf(output_fp.get(), "// %s\n", key.path.c_str());
Yabin Cuid3928f92024-09-10 11:43:22 -0700810 }
811 return true;
812 }
813
Yabin Cuia0208222021-12-10 10:24:29 -0800814 private:
815 std::unordered_map<BinaryKey, AutoFDOBinaryInfo, BinaryKeyHash> binary_map_;
816};
817
Yabin Cui3024a602023-12-07 15:59:03 -0800818// Merge branch list data.
819struct BranchListMerger {
Yabin Cui82d48052023-11-22 15:51:32 -0800820 void AddETMBinary(const BinaryKey& key, ETMBinary& binary) {
Yabin Cui3024a602023-12-07 15:59:03 -0800821 if (auto it = etm_data_.find(key); it != etm_data_.end()) {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800822 it->second.Merge(binary);
Yabin Cui3024a602023-12-07 15:59:03 -0800823 } else {
824 etm_data_[key] = std::move(binary);
Yabin Cui9f7569b2021-12-10 11:34:53 -0800825 }
826 }
827
Yabin Cui3024a602023-12-07 15:59:03 -0800828 void AddLBRData(LBRData& lbr_data) {
829 // 1. Merge binaries.
830 std::vector<uint32_t> new_ids(lbr_data.binaries.size());
831 for (size_t i = 0; i < lbr_data.binaries.size(); i++) {
832 const BinaryKey& key = lbr_data.binaries[i];
833 if (auto it = lbr_binary_id_map_.find(key); it != lbr_binary_id_map_.end()) {
834 new_ids[i] = it->second;
835 } else {
836 uint32_t next_id = static_cast<uint32_t>(lbr_binary_id_map_.size()) + 1;
837 new_ids[i] = next_id;
838 lbr_binary_id_map_[key] = next_id;
839 lbr_data_.binaries.emplace_back(key);
840 }
841 }
842
843 // 2. Merge samples.
844 auto convert_id = [&](uint32_t& binary_id) {
845 if (binary_id != 0) {
846 binary_id = (binary_id <= new_ids.size()) ? new_ids[binary_id - 1] : 0;
847 }
848 };
849
850 for (LBRSample& sample : lbr_data.samples) {
851 convert_id(sample.binary_id);
852 for (LBRBranch& branch : sample.branches) {
853 convert_id(branch.from_binary_id);
854 convert_id(branch.to_binary_id);
855 }
856 lbr_data_.samples.emplace_back(std::move(sample));
857 }
858 }
859
860 ETMBinaryMap& GetETMData() { return etm_data_; }
861
862 LBRData& GetLBRData() { return lbr_data_; }
863
864 private:
865 ETMBinaryMap etm_data_;
866 LBRData lbr_data_;
867 std::unordered_map<BinaryKey, uint32_t, BinaryKeyHash> lbr_binary_id_map_;
Yabin Cui4441e972021-12-15 13:51:58 -0800868};
869
Yabin Cui3024a602023-12-07 15:59:03 -0800870// Write branch lists to a protobuf file specified by branch_list.proto.
871static bool WriteBranchListFile(const std::string& output_filename, const ETMBinaryMap& etm_data,
872 const LBRData& lbr_data) {
873 std::string s;
874 if (!etm_data.empty()) {
875 if (!ETMBinaryMapToString(etm_data, s)) {
876 return false;
877 }
878 } else if (!lbr_data.samples.empty()) {
879 if (!LBRDataToString(lbr_data, s)) {
880 return false;
881 }
882 } else {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800883 // Don't produce empty output file.
Yabin Cui3024a602023-12-07 15:59:03 -0800884 LOG(INFO) << "Skip empty output file.";
885 unlink(output_filename.c_str());
Yabin Cui9f7569b2021-12-10 11:34:53 -0800886 return true;
887 }
Yabin Cui3024a602023-12-07 15:59:03 -0800888 if (!android::base::WriteStringToFile(s, output_filename)) {
889 PLOG(ERROR) << "failed to write to " << output_filename;
890 return false;
891 }
892 return true;
893}
Yabin Cui193f2382020-04-01 14:30:03 -0700894
Yabin Cuic573eaa2019-08-21 16:05:07 -0700895class InjectCommand : public Command {
896 public:
897 InjectCommand()
Yabin Cui193f2382020-04-01 14:30:03 -0700898 : Command("inject", "parse etm instruction tracing data",
Yabin Cuic573eaa2019-08-21 16:05:07 -0700899 // clang-format off
900"Usage: simpleperf inject [options]\n"
Yi Kong7ea069a2020-01-06 15:22:55 -0800901"--binary binary_name Generate data only for binaries matching binary_name regex.\n"
Yabin Cuida89bf62021-12-08 14:11:24 -0800902"-i file1,file2,... Input files. Default is perf.data. Support below formats:\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700903" 1. perf.data generated by recording cs-etm event type.\n"
904" 2. branch_list file generated by `inject --output branch-list`.\n"
Yabin Cuida89bf62021-12-08 14:11:24 -0800905" If a file name starts with @, it contains a list of input files.\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700906"-o <file> output file. Default is perf_inject.data.\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700907"--output <format> Select output file format:\n"
908" autofdo -- text format accepted by TextSampleReader\n"
909" of AutoFDO\n"
Yabin Cuid3928f92024-09-10 11:43:22 -0700910" bolt -- text format accepted by `perf2bolt --pa`\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700911" branch-list -- protobuf file in etm_branch_list.proto\n"
912" Default is autofdo.\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700913"--dump-etm type1,type2,... Dump etm data. A type is one of raw, packet and element.\n"
Yabin Cui561bf1b2020-11-03 12:11:07 -0800914"--exclude-perf Exclude trace data for the recording process.\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700915"--symdir <dir> Look for binaries in a directory recursively.\n"
Yabin Cui0bf695b2024-08-22 15:41:29 -0700916"--allow-mismatched-build-id Allow mismatched build ids when searching for debug binaries.\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700917"\n"
918"Examples:\n"
919"1. Generate autofdo text output.\n"
920"$ simpleperf inject -i perf.data -o autofdo.txt --output autofdo\n"
921"\n"
922"2. Generate branch list proto, then convert to autofdo text.\n"
923"$ simpleperf inject -i perf.data -o branch_list.data --output branch-list\n"
924"$ simpleperf inject -i branch_list.data -o autofdo.txt --output autofdo\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700925 // clang-format on
Yabin Cuia0208222021-12-10 10:24:29 -0800926 ) {}
Yabin Cuic573eaa2019-08-21 16:05:07 -0700927
928 bool Run(const std::vector<std::string>& args) override {
Yabin Cui193f2382020-04-01 14:30:03 -0700929 GOOGLE_PROTOBUF_VERIFY_VERSION;
Yabin Cuic573eaa2019-08-21 16:05:07 -0700930 if (!ParseOptions(args)) {
931 return false;
932 }
Yabin Cui193f2382020-04-01 14:30:03 -0700933
Yabin Cuiba51cf42021-12-15 13:21:54 -0800934 CHECK(!input_filenames_.empty());
935 if (IsPerfDataFile(input_filenames_[0])) {
936 switch (output_format_) {
937 case OutputFormat::AutoFDO:
Yabin Cuid3928f92024-09-10 11:43:22 -0700938 [[fallthrough]];
939 case OutputFormat::BOLT:
Yabin Cuiba51cf42021-12-15 13:21:54 -0800940 return ConvertPerfDataToAutoFDO();
941 case OutputFormat::BranchList:
942 return ConvertPerfDataToBranchList();
943 }
944 } else {
Yabin Cui04afffe2021-12-15 14:38:19 -0800945 switch (output_format_) {
946 case OutputFormat::AutoFDO:
Yabin Cuid3928f92024-09-10 11:43:22 -0700947 [[fallthrough]];
948 case OutputFormat::BOLT:
Yabin Cui04afffe2021-12-15 14:38:19 -0800949 return ConvertBranchListToAutoFDO();
950 case OutputFormat::BranchList:
951 return ConvertBranchListToBranchList();
Yabin Cuida89bf62021-12-08 14:11:24 -0800952 }
Yabin Cuic573eaa2019-08-21 16:05:07 -0700953 }
Yabin Cuic573eaa2019-08-21 16:05:07 -0700954 }
955
956 private:
957 bool ParseOptions(const std::vector<std::string>& args) {
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800958 const OptionFormatMap option_formats = {
Yabin Cui0bf695b2024-08-22 15:41:29 -0700959 {"--allow-mismatched-build-id", {OptionValueType::NONE, OptionType::SINGLE}},
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800960 {"--binary", {OptionValueType::STRING, OptionType::SINGLE}},
961 {"--dump-etm", {OptionValueType::STRING, OptionType::SINGLE}},
Yabin Cui561bf1b2020-11-03 12:11:07 -0800962 {"--exclude-perf", {OptionValueType::NONE, OptionType::SINGLE}},
Yabin Cuida89bf62021-12-08 14:11:24 -0800963 {"-i", {OptionValueType::STRING, OptionType::MULTIPLE}},
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800964 {"-o", {OptionValueType::STRING, OptionType::SINGLE}},
965 {"--output", {OptionValueType::STRING, OptionType::SINGLE}},
966 {"--symdir", {OptionValueType::STRING, OptionType::MULTIPLE}},
967 };
968 OptionValueMap options;
969 std::vector<std::pair<OptionName, OptionValue>> ordered_options;
970 if (!PreprocessOptions(args, option_formats, &options, &ordered_options, nullptr)) {
971 return false;
972 }
973
Yabin Cui0bf695b2024-08-22 15:41:29 -0700974 if (options.PullBoolValue("--allow-mismatched-build-id")) {
975 Dso::AllowMismatchedBuildId();
976 }
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800977 if (auto value = options.PullValue("--binary"); value) {
Yabin Cuif5d7a3b2024-07-11 17:11:55 -0700978 binary_name_regex_ = RegEx::Create(value->str_value);
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800979 if (binary_name_regex_ == nullptr) {
980 return false;
981 }
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800982 }
983 if (auto value = options.PullValue("--dump-etm"); value) {
Yabin Cuif5d7a3b2024-07-11 17:11:55 -0700984 if (!ParseEtmDumpOption(value->str_value, &etm_dump_option_)) {
Yabin Cuic573eaa2019-08-21 16:05:07 -0700985 return false;
986 }
987 }
Yabin Cui561bf1b2020-11-03 12:11:07 -0800988 exclude_perf_ = options.PullBoolValue("--exclude-perf");
Yabin Cuida89bf62021-12-08 14:11:24 -0800989
990 for (const OptionValue& value : options.PullValues("-i")) {
Yabin Cuif5d7a3b2024-07-11 17:11:55 -0700991 std::vector<std::string> files = android::base::Split(value.str_value, ",");
Yabin Cuida89bf62021-12-08 14:11:24 -0800992 for (std::string& file : files) {
993 if (android::base::StartsWith(file, "@")) {
994 if (!ReadFileList(file.substr(1), &input_filenames_)) {
995 return false;
996 }
997 } else {
998 input_filenames_.emplace_back(file);
999 }
1000 }
1001 }
1002 if (input_filenames_.empty()) {
1003 input_filenames_.emplace_back("perf.data");
1004 }
Yabin Cuie09cb9f2020-11-03 09:24:48 -08001005 options.PullStringValue("-o", &output_filename_);
1006 if (auto value = options.PullValue("--output"); value) {
Yabin Cuif5d7a3b2024-07-11 17:11:55 -07001007 const std::string& output = value->str_value;
Yabin Cuie09cb9f2020-11-03 09:24:48 -08001008 if (output == "autofdo") {
1009 output_format_ = OutputFormat::AutoFDO;
Yabin Cuid3928f92024-09-10 11:43:22 -07001010 } else if (output == "bolt") {
1011 output_format_ = OutputFormat::BOLT;
Yabin Cuie09cb9f2020-11-03 09:24:48 -08001012 } else if (output == "branch-list") {
1013 output_format_ = OutputFormat::BranchList;
1014 } else {
1015 LOG(ERROR) << "unknown format in --output option: " << output;
1016 return false;
1017 }
1018 }
Yabin Cuiab9cb232024-09-05 14:45:54 -07001019 if (std::vector<OptionValue> values = options.PullValues("--symdir"); !values.empty()) {
1020 for (const OptionValue& value : values) {
1021 if (!Dso::AddSymbolDir(value.str_value)) {
1022 return false;
1023 }
Yabin Cuie09cb9f2020-11-03 09:24:48 -08001024 }
Yabin Cuif2fe9f02021-12-14 13:06:50 -08001025 // Symbol dirs are cleaned when Dso count is decreased to zero, which can happen between
1026 // processing input files. To make symbol dirs always available, create a placeholder dso to
1027 // prevent cleaning from happening.
1028 placeholder_dso_ = Dso::CreateDso(DSO_UNKNOWN_FILE, "unknown");
Yabin Cuie09cb9f2020-11-03 09:24:48 -08001029 }
1030 CHECK(options.values.empty());
Yabin Cuic573eaa2019-08-21 16:05:07 -07001031 return true;
1032 }
1033
Yabin Cuida89bf62021-12-08 14:11:24 -08001034 bool ReadFileList(const std::string& path, std::vector<std::string>* file_list) {
1035 std::string data;
1036 if (!android::base::ReadFileToString(path, &data)) {
1037 PLOG(ERROR) << "failed to read " << path;
1038 return false;
1039 }
1040 std::vector<std::string> tokens = android::base::Tokenize(data, " \t\n\r");
1041 file_list->insert(file_list->end(), tokens.begin(), tokens.end());
1042 return true;
1043 }
1044
Yabin Cui544fa562023-12-01 15:32:00 -08001045 bool ReadPerfDataFiles(const std::function<void(PerfDataReader&)> reader_callback) {
1046 if (input_filenames_.empty()) {
1047 return true;
1048 }
1049
1050 std::string expected_data_type;
1051 for (const auto& filename : input_filenames_) {
1052 std::unique_ptr<RecordFileReader> file_reader = RecordFileReader::CreateInstance(filename);
1053 if (!file_reader) {
Yabin Cuiba51cf42021-12-15 13:21:54 -08001054 return false;
Yabin Cui193f2382020-04-01 14:30:03 -07001055 }
Yabin Cui544fa562023-12-01 15:32:00 -08001056 std::string data_type = PerfDataReader::GetDataType(*file_reader);
1057 if (expected_data_type.empty()) {
1058 expected_data_type = data_type;
1059 } else if (expected_data_type != data_type) {
1060 LOG(ERROR) << "files have different data type: " << input_filenames_[0] << ", " << filename;
1061 return false;
1062 }
1063 std::unique_ptr<PerfDataReader> reader;
1064 if (data_type == "etm") {
1065 reader.reset(new ETMPerfDataReader(std::move(file_reader), exclude_perf_,
1066 binary_name_regex_.get(), etm_dump_option_));
1067 } else if (data_type == "lbr") {
1068 reader.reset(
1069 new LBRPerfDataReader(std::move(file_reader), exclude_perf_, binary_name_regex_.get()));
1070 } else {
1071 LOG(ERROR) << "unsupported data type " << data_type << " in " << filename;
1072 return false;
1073 }
1074 reader_callback(*reader);
1075 if (!reader->Read()) {
1076 return false;
1077 }
1078 }
1079 return true;
1080 }
1081
1082 bool ConvertPerfDataToAutoFDO() {
1083 AutoFDOWriter autofdo_writer;
1084 auto afdo_callback = [&](const BinaryKey& key, AutoFDOBinaryInfo& binary) {
1085 autofdo_writer.AddAutoFDOBinary(key, binary);
1086 };
Yabin Cui3024a602023-12-07 15:59:03 -08001087 auto reader_callback = [&](PerfDataReader& reader) { reader.AddCallback(afdo_callback); };
Yabin Cui544fa562023-12-01 15:32:00 -08001088 if (!ReadPerfDataFiles(reader_callback)) {
1089 return false;
Yabin Cuic573eaa2019-08-21 16:05:07 -07001090 }
Yabin Cuid3928f92024-09-10 11:43:22 -07001091 if (output_format_ == OutputFormat::AutoFDO) {
1092 return autofdo_writer.WriteAutoFDO(output_filename_);
1093 }
1094 CHECK(output_format_ == OutputFormat::BOLT);
1095 return autofdo_writer.WriteBolt(output_filename_);
Yabin Cuic573eaa2019-08-21 16:05:07 -07001096 }
1097
Yabin Cuiba51cf42021-12-15 13:21:54 -08001098 bool ConvertPerfDataToBranchList() {
Yabin Cui3024a602023-12-07 15:59:03 -08001099 BranchListMerger merger;
Yabin Cui544fa562023-12-01 15:32:00 -08001100 auto etm_callback = [&](const BinaryKey& key, ETMBinary& binary) {
Yabin Cui3024a602023-12-07 15:59:03 -08001101 merger.AddETMBinary(key, binary);
Yabin Cuiba51cf42021-12-15 13:21:54 -08001102 };
Yabin Cui3024a602023-12-07 15:59:03 -08001103 auto lbr_callback = [&](LBRData& lbr_data) { merger.AddLBRData(lbr_data); };
1104
1105 auto reader_callback = [&](PerfDataReader& reader) {
1106 reader.AddCallback(etm_callback);
1107 reader.AddCallback(lbr_callback);
1108 };
Yabin Cui544fa562023-12-01 15:32:00 -08001109 if (!ReadPerfDataFiles(reader_callback)) {
1110 return false;
Yabin Cui193f2382020-04-01 14:30:03 -07001111 }
Yabin Cui3024a602023-12-07 15:59:03 -08001112 return WriteBranchListFile(output_filename_, merger.GetETMData(), merger.GetLBRData());
Yabin Cuiba51cf42021-12-15 13:21:54 -08001113 }
1114
1115 bool ConvertBranchListToAutoFDO() {
Yabin Cui4441e972021-12-15 13:51:58 -08001116 // Step1 : Merge branch lists from all input files.
Yabin Cui3024a602023-12-07 15:59:03 -08001117 BranchListMerger merger;
1118 auto etm_callback = [&](const BinaryKey& key, ETMBinary& binary) {
1119 merger.AddETMBinary(key, binary);
Yabin Cui89465ad2021-12-14 14:24:55 -08001120 };
Yabin Cui3024a602023-12-07 15:59:03 -08001121 auto lbr_callback = [&](LBRData& lbr_data) { merger.AddLBRData(lbr_data); };
Yabin Cuiba51cf42021-12-15 13:21:54 -08001122 for (const auto& input_filename : input_filenames_) {
Yabin Cui3024a602023-12-07 15:59:03 -08001123 BranchListReader reader(input_filename, binary_name_regex_.get());
1124 reader.AddCallback(etm_callback);
1125 reader.AddCallback(lbr_callback);
Yabin Cuiba51cf42021-12-15 13:21:54 -08001126 if (!reader.Read()) {
1127 return false;
1128 }
Yabin Cui193f2382020-04-01 14:30:03 -07001129 }
Yabin Cui4441e972021-12-15 13:51:58 -08001130
Yabin Cui3024a602023-12-07 15:59:03 -08001131 // Step2: Convert ETMBinary and LBRData to AutoFDOBinaryInfo.
Yabin Cui4441e972021-12-15 13:51:58 -08001132 AutoFDOWriter autofdo_writer;
Yabin Cui82d48052023-11-22 15:51:32 -08001133 ETMBranchListToAutoFDOConverter converter;
Yabin Cui3024a602023-12-07 15:59:03 -08001134 for (auto& p : merger.GetETMData()) {
Yabin Cui4441e972021-12-15 13:51:58 -08001135 const BinaryKey& key = p.first;
Yabin Cui82d48052023-11-22 15:51:32 -08001136 ETMBinary& binary = p.second;
Yabin Cui4441e972021-12-15 13:51:58 -08001137 std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary = converter.Convert(key, binary);
1138 if (autofdo_binary) {
1139 // Create new BinaryKey with kernel_start_addr = 0. Because AutoFDO output doesn't care
1140 // kernel_start_addr.
1141 autofdo_writer.AddAutoFDOBinary(BinaryKey(key.path, key.build_id), *autofdo_binary);
1142 }
1143 }
Yabin Cui3024a602023-12-07 15:59:03 -08001144 if (!merger.GetLBRData().samples.empty()) {
1145 LBRData& lbr_data = merger.GetLBRData();
1146 std::optional<std::vector<AutoFDOBinaryInfo>> binaries = ConvertLBRDataToAutoFDO(lbr_data);
1147 if (!binaries) {
1148 return false;
1149 }
1150 for (size_t i = 0; i < binaries.value().size(); ++i) {
Yabin Cuiab9cb232024-09-05 14:45:54 -07001151 BinaryKey& key = lbr_data.binaries[i];
1152 AutoFDOBinaryInfo& binary = binaries.value()[i];
1153 std::unique_ptr<Dso> dso = Dso::CreateDsoWithBuildId(DSO_ELF_FILE, key.path, key.build_id);
1154 if (!dso) {
1155 continue;
1156 }
1157 binary.executable_segments = GetExecutableSegments(dso.get());
1158 autofdo_writer.AddAutoFDOBinary(key, binary);
Yabin Cui3024a602023-12-07 15:59:03 -08001159 }
1160 }
Yabin Cui4441e972021-12-15 13:51:58 -08001161
1162 // Step3: Write AutoFDOBinaryInfo.
Yabin Cuid3928f92024-09-10 11:43:22 -07001163 if (output_format_ == OutputFormat::AutoFDO) {
1164 return autofdo_writer.WriteAutoFDO(output_filename_);
1165 }
1166 CHECK(output_format_ == OutputFormat::BOLT);
1167 return autofdo_writer.WriteBolt(output_filename_);
Yabin Cuifad7bbe2019-09-18 16:05:51 -07001168 }
1169
Yabin Cui04afffe2021-12-15 14:38:19 -08001170 bool ConvertBranchListToBranchList() {
1171 // Step1 : Merge branch lists from all input files.
Yabin Cui3024a602023-12-07 15:59:03 -08001172 BranchListMerger merger;
1173 auto etm_callback = [&](const BinaryKey& key, ETMBinary& binary) {
1174 merger.AddETMBinary(key, binary);
Yabin Cui04afffe2021-12-15 14:38:19 -08001175 };
Yabin Cui3024a602023-12-07 15:59:03 -08001176 auto lbr_callback = [&](LBRData& lbr_data) { merger.AddLBRData(lbr_data); };
Yabin Cui04afffe2021-12-15 14:38:19 -08001177 for (const auto& input_filename : input_filenames_) {
Yabin Cui3024a602023-12-07 15:59:03 -08001178 BranchListReader reader(input_filename, binary_name_regex_.get());
1179 reader.AddCallback(etm_callback);
1180 reader.AddCallback(lbr_callback);
Yabin Cui04afffe2021-12-15 14:38:19 -08001181 if (!reader.Read()) {
1182 return false;
1183 }
1184 }
Yabin Cui82d48052023-11-22 15:51:32 -08001185 // Step2: Write ETMBinary.
Yabin Cui3024a602023-12-07 15:59:03 -08001186 return WriteBranchListFile(output_filename_, merger.GetETMData(), merger.GetLBRData());
Yabin Cui04afffe2021-12-15 14:38:19 -08001187 }
1188
Yabin Cuif00f4fc2022-11-23 15:15:30 -08001189 std::unique_ptr<RegEx> binary_name_regex_;
Yabin Cui561bf1b2020-11-03 12:11:07 -08001190 bool exclude_perf_ = false;
Yabin Cuida89bf62021-12-08 14:11:24 -08001191 std::vector<std::string> input_filenames_;
Yabin Cuic573eaa2019-08-21 16:05:07 -07001192 std::string output_filename_ = "perf_inject.data";
Yabin Cui193f2382020-04-01 14:30:03 -07001193 OutputFormat output_format_ = OutputFormat::AutoFDO;
Yabin Cuic573eaa2019-08-21 16:05:07 -07001194 ETMDumpOption etm_dump_option_;
Yabin Cuifad7bbe2019-09-18 16:05:51 -07001195
Yabin Cuif2fe9f02021-12-14 13:06:50 -08001196 std::unique_ptr<Dso> placeholder_dso_;
Yabin Cuic573eaa2019-08-21 16:05:07 -07001197};
1198
1199} // namespace
1200
1201void RegisterInjectCommand() {
1202 return RegisterCommand("inject", [] { return std::unique_ptr<Command>(new InjectCommand); });
1203}
Yabin Cuiacbdb242020-07-07 15:56:34 -07001204
1205} // namespace simpleperf