blob: b2c573d48ff3751adb2336a212bce63010631dac [file] [log] [blame]
Yabin Cuic573eaa2019-08-21 16:05:07 -07001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <stdio.h>
Yabin Cui0b60f9c2021-02-09 10:54:17 -080018#include <unistd.h>
Yabin Cuic573eaa2019-08-21 16:05:07 -070019
20#include <memory>
Yabin Cui561bf1b2020-11-03 12:11:07 -080021#include <optional>
Yi Kong7ea069a2020-01-06 15:22:55 -080022#include <regex>
Yi Kong61e78cd2020-01-13 15:43:01 -080023#include <string>
Yabin Cuic573eaa2019-08-21 16:05:07 -070024
Yabin Cui561bf1b2020-11-03 12:11:07 -080025#include <android-base/parseint.h>
Yabin Cuida89bf62021-12-08 14:11:24 -080026#include <android-base/strings.h>
Yabin Cui561bf1b2020-11-03 12:11:07 -080027
ThiƩbaud Weksteen4848ee02020-10-23 16:06:59 +020028#include "ETMDecoder.h"
Yabin Cui193f2382020-04-01 14:30:03 -070029#include "cmd_inject_impl.h"
Yabin Cuic573eaa2019-08-21 16:05:07 -070030#include "command.h"
31#include "record_file.h"
Yabin Cui193f2382020-04-01 14:30:03 -070032#include "system/extras/simpleperf/etm_branch_list.pb.h"
Yabin Cuic573eaa2019-08-21 16:05:07 -070033#include "thread_tree.h"
Yabin Cuifad7bbe2019-09-18 16:05:51 -070034#include "utils.h"
Yabin Cuic573eaa2019-08-21 16:05:07 -070035
Yabin Cui193f2382020-04-01 14:30:03 -070036namespace simpleperf {
37
38std::string BranchToProtoString(const std::vector<bool>& branch) {
39 size_t bytes = (branch.size() + 7) / 8;
40 std::string res(bytes, '\0');
41 for (size_t i = 0; i < branch.size(); i++) {
42 if (branch[i]) {
43 res[i >> 3] |= 1 << (i & 7);
44 }
45 }
46 return res;
47}
48
49std::vector<bool> ProtoStringToBranch(const std::string& s, size_t bit_size) {
50 std::vector<bool> branch(bit_size, false);
51 for (size_t i = 0; i < bit_size; i++) {
52 if (s[i >> 3] & (1 << (i & 7))) {
53 branch[i] = true;
54 }
55 }
56 return branch;
57}
58
Yabin Cuic573eaa2019-08-21 16:05:07 -070059namespace {
60
Yabin Cui9f7569b2021-12-10 11:34:53 -080061constexpr const char* ETM_BRANCH_LIST_PROTO_MAGIC = "simpleperf:EtmBranchList";
62
Yabin Cuifad7bbe2019-09-18 16:05:51 -070063using AddrPair = std::pair<uint64_t, uint64_t>;
64
65struct AddrPairHash {
66 size_t operator()(const AddrPair& ap) const noexcept {
67 size_t seed = 0;
68 HashCombine(seed, ap.first);
69 HashCombine(seed, ap.second);
70 return seed;
71 }
72};
73
Yabin Cui193f2382020-04-01 14:30:03 -070074enum class OutputFormat {
75 AutoFDO,
76 BranchList,
77};
78
Yabin Cuif2fe9f02021-12-14 13:06:50 -080079// When processing binary info in an input file, the binaries are identified by their path.
80// But this isn't sufficient when merging binary info from multiple input files. Because
81// binaries for the same path may be changed between generating input files. So after processing
82// each input file, we create BinaryKeys to identify binaries, which consider path, build_id and
83// kernel_start_addr (for vmlinux). kernel_start_addr affects how addresses in BranchListBinaryInfo
84// are interpreted for vmlinux.
Yabin Cuia0208222021-12-10 10:24:29 -080085struct BinaryKey {
86 std::string path;
87 BuildId build_id;
88 uint64_t kernel_start_addr = 0;
89
90 BinaryKey() {}
91
Yabin Cui89465ad2021-12-14 14:24:55 -080092 BinaryKey(const std::string& path, BuildId build_id) : path(path), build_id(build_id) {}
93
Yabin Cuia0208222021-12-10 10:24:29 -080094 BinaryKey(Dso* dso, uint64_t kernel_start_addr) : path(dso->Path()) {
95 build_id = Dso::FindExpectedBuildIdForPath(dso->Path());
96 if (dso->type() == DSO_KERNEL) {
97 this->kernel_start_addr = kernel_start_addr;
98 }
99 }
100
101 bool operator==(const BinaryKey& other) const {
102 return path == other.path && build_id == other.build_id &&
103 kernel_start_addr == other.kernel_start_addr;
104 }
105};
106
107struct BinaryKeyHash {
108 size_t operator()(const BinaryKey& key) const noexcept {
109 size_t seed = 0;
110 HashCombine(seed, key.path);
111 HashCombine(seed, key.build_id);
112 if (key.kernel_start_addr != 0) {
113 HashCombine(seed, key.kernel_start_addr);
114 }
115 return seed;
116 }
117};
118
Yabin Cui1befd522021-12-10 12:24:22 -0800119static void OverflowSafeAdd(uint64_t& dest, uint64_t add) {
120 if (__builtin_add_overflow(dest, add, &dest)) {
Yabin Cuif7a64b82021-12-15 15:23:47 -0800121 LOG(WARNING) << "Branch count overflow happened.";
Yabin Cui1befd522021-12-10 12:24:22 -0800122 dest = UINT64_MAX;
123 }
124}
125
Yabin Cui193f2382020-04-01 14:30:03 -0700126struct AutoFDOBinaryInfo {
Yabin Cuia0208222021-12-10 10:24:29 -0800127 uint64_t first_load_segment_addr = 0;
Yabin Cuifad7bbe2019-09-18 16:05:51 -0700128 std::unordered_map<AddrPair, uint64_t, AddrPairHash> range_count_map;
129 std::unordered_map<AddrPair, uint64_t, AddrPairHash> branch_count_map;
Yabin Cuia0208222021-12-10 10:24:29 -0800130
Yabin Cuife4af172021-12-14 15:57:51 -0800131 void AddInstrRange(const ETMInstrRange& instr_range) {
132 uint64_t total_count = instr_range.branch_taken_count;
133 OverflowSafeAdd(total_count, instr_range.branch_not_taken_count);
134 OverflowSafeAdd(range_count_map[AddrPair(instr_range.start_addr, instr_range.end_addr)],
135 total_count);
136 if (instr_range.branch_taken_count > 0) {
137 OverflowSafeAdd(branch_count_map[AddrPair(instr_range.end_addr, instr_range.branch_to_addr)],
138 instr_range.branch_taken_count);
139 }
140 }
141
Yabin Cuia0208222021-12-10 10:24:29 -0800142 void Merge(const AutoFDOBinaryInfo& other) {
143 for (const auto& p : other.range_count_map) {
144 auto res = range_count_map.emplace(p.first, p.second);
145 if (!res.second) {
Yabin Cui1befd522021-12-10 12:24:22 -0800146 OverflowSafeAdd(res.first->second, p.second);
Yabin Cuia0208222021-12-10 10:24:29 -0800147 }
148 }
149 for (const auto& p : other.branch_count_map) {
150 auto res = branch_count_map.emplace(p.first, p.second);
151 if (!res.second) {
Yabin Cui1befd522021-12-10 12:24:22 -0800152 OverflowSafeAdd(res.first->second, p.second);
Yabin Cuia0208222021-12-10 10:24:29 -0800153 }
154 }
155 }
Yabin Cuifad7bbe2019-09-18 16:05:51 -0700156};
157
Yabin Cui9f7569b2021-12-10 11:34:53 -0800158using UnorderedBranchMap =
Yabin Cui193f2382020-04-01 14:30:03 -0700159 std::unordered_map<uint64_t, std::unordered_map<std::vector<bool>, uint64_t>>;
160
Yabin Cui9f7569b2021-12-10 11:34:53 -0800161struct BranchListBinaryInfo {
162 DsoType dso_type;
163 UnorderedBranchMap branch_map;
164
165 void Merge(const BranchListBinaryInfo& other) {
166 for (auto& other_p : other.branch_map) {
167 auto it = branch_map.find(other_p.first);
168 if (it == branch_map.end()) {
169 branch_map[other_p.first] = std::move(other_p.second);
170 } else {
171 auto& map2 = it->second;
172 for (auto& other_p2 : other_p.second) {
173 auto it2 = map2.find(other_p2.first);
174 if (it2 == map2.end()) {
175 map2[other_p2.first] = other_p2.second;
176 } else {
Yabin Cui1befd522021-12-10 12:24:22 -0800177 OverflowSafeAdd(it2->second, other_p2.second);
Yabin Cui9f7569b2021-12-10 11:34:53 -0800178 }
179 }
180 }
181 }
182 }
Yabin Cui89465ad2021-12-14 14:24:55 -0800183
184 BranchMap GetOrderedBranchMap() const {
185 BranchMap result;
186 for (const auto& p : branch_map) {
187 uint64_t addr = p.first;
188 const auto& b_map = p.second;
189 result[addr] = std::map<std::vector<bool>, uint64_t>(b_map.begin(), b_map.end());
190 }
191 return result;
192 }
Yabin Cui9f7569b2021-12-10 11:34:53 -0800193};
194
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800195using AutoFDOBinaryCallback = std::function<void(const BinaryKey&, AutoFDOBinaryInfo&)>;
196using BranchListBinaryCallback = std::function<void(const BinaryKey&, BranchListBinaryInfo&)>;
197
Yabin Cui561bf1b2020-11-03 12:11:07 -0800198class ThreadTreeWithFilter : public ThreadTree {
199 public:
200 void ExcludePid(pid_t pid) { exclude_pid_ = pid; }
201
Yabin Cuia89a3742021-02-11 13:14:54 -0800202 ThreadEntry* FindThread(int tid) const override {
Yabin Cui561bf1b2020-11-03 12:11:07 -0800203 ThreadEntry* thread = ThreadTree::FindThread(tid);
204 if (thread != nullptr && exclude_pid_ && thread->pid == exclude_pid_) {
205 return nullptr;
206 }
207 return thread;
208 }
209
210 private:
211 std::optional<pid_t> exclude_pid_;
212};
213
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800214class DsoFilter {
215 public:
216 DsoFilter(const std::regex& binary_name_regex) : binary_name_regex_(binary_name_regex) {}
217
218 bool FilterDso(Dso* dso) {
219 auto lookup = dso_filter_cache_.find(dso);
220 if (lookup != dso_filter_cache_.end()) {
221 return lookup->second;
222 }
223 bool match = std::regex_search(dso->Path(), binary_name_regex_);
224 dso_filter_cache_.insert({dso, match});
225 return match;
226 }
227
228 private:
229 std::regex binary_name_regex_;
230 std::unordered_map<Dso*, bool> dso_filter_cache_;
231};
232
233static uint64_t GetFirstLoadSegmentVaddr(Dso* dso) {
234 ElfStatus status;
235 if (auto elf = ElfFile::Open(dso->GetDebugFilePath(), &status); elf) {
236 for (const auto& segment : elf->GetProgramHeader()) {
237 if (segment.is_load) {
238 return segment.vaddr;
239 }
240 }
241 }
242 return 0;
243}
244
245// Read perf.data, and generate AutoFDOBinaryInfo or BranchListBinaryInfo.
246// To avoid resetting data, it only processes one input file per instance.
247class PerfDataReader {
248 public:
249 PerfDataReader(const std::string& filename, bool exclude_perf, ETMDumpOption etm_dump_option,
250 const std::regex& binary_name_regex)
251 : filename_(filename),
252 exclude_perf_(exclude_perf),
253 etm_dump_option_(etm_dump_option),
254 dso_filter_(binary_name_regex) {}
255
256 void SetCallback(const AutoFDOBinaryCallback& callback) { autofdo_callback_ = callback; }
257 void SetCallback(const BranchListBinaryCallback& callback) { branch_list_callback_ = callback; }
258
259 bool Read() {
260 record_file_reader_ = RecordFileReader::CreateInstance(filename_);
261 if (!record_file_reader_) {
262 return false;
263 }
264 if (exclude_perf_) {
265 const auto& info_map = record_file_reader_->GetMetaInfoFeature();
266 if (auto it = info_map.find("recording_process"); it == info_map.end()) {
267 LOG(ERROR) << filename_ << " doesn't support --exclude-perf";
268 return false;
269 } else {
270 int pid;
271 if (!android::base::ParseInt(it->second, &pid, 0)) {
272 LOG(ERROR) << "invalid recording_process " << it->second << " in " << filename_;
273 return false;
274 }
275 thread_tree_.ExcludePid(pid);
276 }
277 }
278 record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_);
279 if (!record_file_reader_->ReadDataSection([this](auto r) { return ProcessRecord(r.get()); })) {
280 return false;
281 }
282 if (etm_decoder_ && !etm_decoder_->FinishData()) {
283 return false;
284 }
285 if (autofdo_callback_) {
286 ProcessAutoFDOBinaryInfo();
287 } else if (branch_list_callback_) {
288 ProcessBranchListBinaryInfo();
289 }
290 return true;
291 }
292
293 private:
294 bool ProcessRecord(Record* r) {
295 thread_tree_.Update(*r);
296 if (r->type() == PERF_RECORD_AUXTRACE_INFO) {
297 etm_decoder_ = ETMDecoder::Create(*static_cast<AuxTraceInfoRecord*>(r), thread_tree_);
298 if (!etm_decoder_) {
299 return false;
300 }
301 etm_decoder_->EnableDump(etm_dump_option_);
302 if (autofdo_callback_) {
303 etm_decoder_->RegisterCallback(
304 [this](const ETMInstrRange& range) { ProcessInstrRange(range); });
305 } else if (branch_list_callback_) {
306 etm_decoder_->RegisterCallback(
307 [this](const ETMBranchList& branch) { ProcessBranchList(branch); });
308 }
309 } else if (r->type() == PERF_RECORD_AUX) {
310 AuxRecord* aux = static_cast<AuxRecord*>(r);
311 uint64_t aux_size = aux->data->aux_size;
312 if (aux_size > 0) {
313 if (aux_data_buffer_.size() < aux_size) {
314 aux_data_buffer_.resize(aux_size);
315 }
316 if (!record_file_reader_->ReadAuxData(aux->Cpu(), aux->data->aux_offset,
317 aux_data_buffer_.data(), aux_size)) {
318 LOG(ERROR) << "failed to read aux data in " << filename_;
319 return false;
320 }
321 return etm_decoder_->ProcessData(aux_data_buffer_.data(), aux_size, !aux->Unformatted(),
322 aux->Cpu());
323 }
324 } else if (r->type() == PERF_RECORD_MMAP && r->InKernel()) {
325 auto& mmap_r = *static_cast<MmapRecord*>(r);
326 if (android::base::StartsWith(mmap_r.filename, DEFAULT_KERNEL_MMAP_NAME)) {
327 kernel_map_start_addr_ = mmap_r.data->addr;
328 }
329 }
330 return true;
331 }
332
333 void ProcessInstrRange(const ETMInstrRange& instr_range) {
334 if (!dso_filter_.FilterDso(instr_range.dso)) {
335 return;
336 }
337
Yabin Cuife4af172021-12-14 15:57:51 -0800338 autofdo_binary_map_[instr_range.dso].AddInstrRange(instr_range);
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800339 }
340
341 void ProcessBranchList(const ETMBranchList& branch_list) {
342 if (!dso_filter_.FilterDso(branch_list.dso)) {
343 return;
344 }
345
346 auto& branch_map = branch_list_binary_map_[branch_list.dso].branch_map;
347 ++branch_map[branch_list.addr][branch_list.branch];
348 }
349
350 void ProcessAutoFDOBinaryInfo() {
351 for (auto& p : autofdo_binary_map_) {
352 Dso* dso = p.first;
353 AutoFDOBinaryInfo& binary = p.second;
354 binary.first_load_segment_addr = GetFirstLoadSegmentVaddr(dso);
355 autofdo_callback_(BinaryKey(dso, 0), binary);
356 }
357 }
358
359 void ProcessBranchListBinaryInfo() {
360 for (auto& p : branch_list_binary_map_) {
361 Dso* dso = p.first;
362 BranchListBinaryInfo& binary = p.second;
363 binary.dso_type = dso->type();
364 BinaryKey key(dso, 0);
365 if (binary.dso_type == DSO_KERNEL) {
366 if (kernel_map_start_addr_ == 0) {
367 LOG(WARNING) << "Can't convert kernel ip addresses without kernel start addr. So remove "
368 "branches for the kernel.";
369 continue;
370 }
371 if (dso->GetDebugFilePath() == dso->Path()) {
372 // vmlinux isn't available. We still use kernel ip addr. Put kernel start addr in proto
373 // for address conversion later.
374 key.kernel_start_addr = kernel_map_start_addr_;
375 }
376 }
377 branch_list_callback_(key, binary);
378 }
379 }
380
381 const std::string filename_;
382 bool exclude_perf_;
383 ETMDumpOption etm_dump_option_;
384 DsoFilter dso_filter_;
385 AutoFDOBinaryCallback autofdo_callback_;
386 BranchListBinaryCallback branch_list_callback_;
387
388 std::vector<uint8_t> aux_data_buffer_;
389 std::unique_ptr<ETMDecoder> etm_decoder_;
390 std::unique_ptr<RecordFileReader> record_file_reader_;
391 ThreadTreeWithFilter thread_tree_;
392 uint64_t kernel_map_start_addr_ = 0;
393 // Store results for AutoFDO.
394 std::unordered_map<Dso*, AutoFDOBinaryInfo> autofdo_binary_map_;
395 // Store results for BranchList.
396 std::unordered_map<Dso*, BranchListBinaryInfo> branch_list_binary_map_;
397};
398
Yabin Cui89465ad2021-12-14 14:24:55 -0800399// Read a protobuf file specified by etm_branch_list.proto, and generate BranchListBinaryInfo.
400class BranchListReader {
401 public:
402 BranchListReader(const std::string& filename, const std::regex binary_name_regex)
403 : filename_(filename), binary_name_regex_(binary_name_regex) {}
404
405 void SetCallback(const BranchListBinaryCallback& callback) { callback_ = callback; }
406
407 bool Read() {
408 auto fd = FileHelper::OpenReadOnly(filename_);
409 if (!fd.ok()) {
410 PLOG(ERROR) << "failed to open " << filename_;
411 return false;
412 }
413
414 proto::ETMBranchList branch_list_proto;
415 if (!branch_list_proto.ParseFromFileDescriptor(fd)) {
416 PLOG(ERROR) << "failed to read msg from " << filename_;
417 return false;
418 }
419 if (branch_list_proto.magic() != ETM_BRANCH_LIST_PROTO_MAGIC) {
420 PLOG(ERROR) << "file not in format etm_branch_list.proto: " << filename_;
421 return false;
422 }
423
424 for (size_t i = 0; i < branch_list_proto.binaries_size(); i++) {
425 const auto& binary_proto = branch_list_proto.binaries(i);
426 if (!std::regex_search(binary_proto.path(), binary_name_regex_)) {
427 continue;
428 }
429 BinaryKey key(binary_proto.path(), BuildId(binary_proto.build_id()));
430 if (binary_proto.has_kernel_info()) {
431 key.kernel_start_addr = binary_proto.kernel_info().kernel_start_addr();
432 }
433 BranchListBinaryInfo binary;
434 auto dso_type = ToDsoType(binary_proto.type());
435 if (!dso_type) {
436 LOG(ERROR) << "invalid binary type in " << filename_;
437 return false;
438 }
439 binary.dso_type = dso_type.value();
440 binary.branch_map = BuildUnorderedBranchMap(binary_proto);
441 callback_(key, binary);
442 }
443 return true;
444 }
445
446 private:
447 std::optional<DsoType> ToDsoType(proto::ETMBranchList_Binary::BinaryType binary_type) {
448 switch (binary_type) {
449 case proto::ETMBranchList_Binary::ELF_FILE:
450 return DSO_ELF_FILE;
451 case proto::ETMBranchList_Binary::KERNEL:
452 return DSO_KERNEL;
453 case proto::ETMBranchList_Binary::KERNEL_MODULE:
454 return DSO_KERNEL_MODULE;
455 default:
456 LOG(ERROR) << "unexpected binary type " << binary_type;
457 return std::nullopt;
458 }
459 }
460
461 UnorderedBranchMap BuildUnorderedBranchMap(const proto::ETMBranchList_Binary& binary_proto) {
462 UnorderedBranchMap branch_map;
463 for (size_t i = 0; i < binary_proto.addrs_size(); i++) {
464 const auto& addr_proto = binary_proto.addrs(i);
465 auto& b_map = branch_map[addr_proto.addr()];
466 for (size_t j = 0; j < addr_proto.branches_size(); j++) {
467 const auto& branch_proto = addr_proto.branches(j);
468 std::vector<bool> branch =
469 ProtoStringToBranch(branch_proto.branch(), branch_proto.branch_size());
470 b_map[branch] = branch_proto.count();
471 }
472 }
473 return branch_map;
474 }
475
476 const std::string filename_;
477 const std::regex binary_name_regex_;
478 BranchListBinaryCallback callback_;
479};
480
Yabin Cuife4af172021-12-14 15:57:51 -0800481// Convert BranchListBinaryInfo into AutoFDOBinaryInfo.
482class BranchListToAutoFDOConverter {
483 public:
484 std::unique_ptr<AutoFDOBinaryInfo> Convert(const BinaryKey& key, BranchListBinaryInfo& binary) {
485 BuildId build_id = key.build_id;
486 std::unique_ptr<Dso> dso = Dso::CreateDsoWithBuildId(binary.dso_type, key.path, build_id);
487 if (!dso || !CheckBuildId(dso.get(), key.build_id)) {
488 return nullptr;
489 }
490 std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary(new AutoFDOBinaryInfo);
491 autofdo_binary->first_load_segment_addr = GetFirstLoadSegmentVaddr(dso.get());
492
493 if (dso->type() == DSO_KERNEL) {
494 ModifyBranchMapForKernel(dso.get(), key.kernel_start_addr, binary);
495 }
496
497 auto process_instr_range = [&](const ETMInstrRange& range) {
498 CHECK_EQ(range.dso, dso.get());
499 autofdo_binary->AddInstrRange(range);
500 };
501
502 auto result =
503 ConvertBranchMapToInstrRanges(dso.get(), binary.GetOrderedBranchMap(), process_instr_range);
504 if (!result.ok()) {
505 LOG(WARNING) << "failed to build instr ranges for binary " << dso->Path() << ": "
506 << result.error();
507 return nullptr;
508 }
509 return autofdo_binary;
510 }
511
512 private:
513 bool CheckBuildId(Dso* dso, const BuildId& expected_build_id) {
514 if (expected_build_id.IsEmpty()) {
515 return true;
516 }
517 BuildId build_id;
518 return GetBuildIdFromDsoPath(dso->GetDebugFilePath(), &build_id) &&
519 build_id == expected_build_id;
520 }
521
522 void ModifyBranchMapForKernel(Dso* dso, uint64_t kernel_start_addr,
523 BranchListBinaryInfo& binary) {
524 if (kernel_start_addr == 0) {
525 // vmlinux has been provided when generating branch lists. Addresses in branch lists are
526 // already vaddrs in vmlinux.
527 return;
528 }
529 // Addresses are still kernel ip addrs in memory. Need to convert them to vaddrs in vmlinux.
530 UnorderedBranchMap new_branch_map;
531 for (auto& p : binary.branch_map) {
532 uint64_t vaddr_in_file = dso->IpToVaddrInFile(p.first, kernel_start_addr, 0);
533 new_branch_map[vaddr_in_file] = std::move(p.second);
534 }
535 binary.branch_map = std::move(new_branch_map);
536 }
537};
538
Yabin Cuia0208222021-12-10 10:24:29 -0800539// Write instruction ranges to a file in AutoFDO text format.
540class AutoFDOWriter {
541 public:
542 void AddAutoFDOBinary(const BinaryKey& key, AutoFDOBinaryInfo& binary) {
543 auto it = binary_map_.find(key);
544 if (it == binary_map_.end()) {
545 binary_map_[key] = std::move(binary);
546 } else {
547 it->second.Merge(binary);
548 }
549 }
550
551 bool Write(const std::string& output_filename) {
552 std::unique_ptr<FILE, decltype(&fclose)> output_fp(fopen(output_filename.c_str(), "w"), fclose);
553 if (!output_fp) {
554 PLOG(ERROR) << "failed to write to " << output_filename;
555 return false;
556 }
557 // autofdo_binary_map is used to store instruction ranges, which can have a large amount. And
558 // it has a larger access time (instruction ranges * executed time). So it's better to use
559 // unorder_maps to speed up access time. But we also want a stable output here, to compare
560 // output changes result from code changes. So generate a sorted output here.
561 std::vector<BinaryKey> keys;
562 for (auto& p : binary_map_) {
563 keys.emplace_back(p.first);
564 }
565 std::sort(keys.begin(), keys.end(),
566 [](const BinaryKey& key1, const BinaryKey& key2) { return key1.path < key2.path; });
567 if (keys.size() > 1) {
568 fprintf(output_fp.get(),
569 "// Please split this file. AutoFDO only accepts profile for one binary.\n");
570 }
571 for (const auto& key : keys) {
572 const AutoFDOBinaryInfo& binary = binary_map_[key];
573 // AutoFDO text format needs file_offsets instead of virtual addrs in a binary. And it uses
574 // below formula: vaddr = file_offset + GetFirstLoadSegmentVaddr().
575 uint64_t first_load_segment_addr = binary.first_load_segment_addr;
576
577 auto to_offset = [&](uint64_t vaddr) -> uint64_t {
578 if (vaddr == 0) {
579 return 0;
580 }
581 CHECK_GE(vaddr, first_load_segment_addr);
582 return vaddr - first_load_segment_addr;
583 };
584
585 // Write range_count_map.
586 std::map<AddrPair, uint64_t> range_count_map(binary.range_count_map.begin(),
587 binary.range_count_map.end());
588 fprintf(output_fp.get(), "%zu\n", range_count_map.size());
589 for (const auto& pair2 : range_count_map) {
590 const AddrPair& addr_range = pair2.first;
591 uint64_t count = pair2.second;
592
593 fprintf(output_fp.get(), "%" PRIx64 "-%" PRIx64 ":%" PRIu64 "\n",
594 to_offset(addr_range.first), to_offset(addr_range.second), count);
595 }
596
597 // Write addr_count_map.
598 fprintf(output_fp.get(), "0\n");
599
600 // Write branch_count_map.
601 std::map<AddrPair, uint64_t> branch_count_map(binary.branch_count_map.begin(),
602 binary.branch_count_map.end());
603 fprintf(output_fp.get(), "%zu\n", branch_count_map.size());
604 for (const auto& pair2 : branch_count_map) {
605 const AddrPair& branch = pair2.first;
606 uint64_t count = pair2.second;
607
608 fprintf(output_fp.get(), "%" PRIx64 "->%" PRIx64 ":%" PRIu64 "\n", to_offset(branch.first),
609 to_offset(branch.second), count);
610 }
611
612 // Write the binary path in comment.
613 fprintf(output_fp.get(), "// %s\n\n", key.path.c_str());
614 }
615 return true;
616 }
617
618 private:
619 std::unordered_map<BinaryKey, AutoFDOBinaryInfo, BinaryKeyHash> binary_map_;
620};
621
Yabin Cui4441e972021-12-15 13:51:58 -0800622// Merge BranchListBinaryInfo.
623struct BranchListMerger {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800624 void AddBranchListBinary(const BinaryKey& key, BranchListBinaryInfo& binary) {
Yabin Cui4441e972021-12-15 13:51:58 -0800625 auto it = binary_map.find(key);
626 if (it == binary_map.end()) {
627 binary_map[key] = std::move(binary);
Yabin Cui9f7569b2021-12-10 11:34:53 -0800628 } else {
629 it->second.Merge(binary);
630 }
631 }
632
Yabin Cui4441e972021-12-15 13:51:58 -0800633 std::unordered_map<BinaryKey, BranchListBinaryInfo, BinaryKeyHash> binary_map;
634};
635
636// Write branch lists to a protobuf file specified by etm_branch_list.proto.
637class BranchListWriter {
638 public:
639 bool Write(const std::string& output_filename,
640 const std::unordered_map<BinaryKey, BranchListBinaryInfo, BinaryKeyHash>& binary_map) {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800641 // Don't produce empty output file.
Yabin Cui4441e972021-12-15 13:51:58 -0800642 if (binary_map.empty()) {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800643 LOG(INFO) << "Skip empty output file.";
644 unlink(output_filename.c_str());
645 return true;
646 }
647 std::unique_ptr<FILE, decltype(&fclose)> output_fp(fopen(output_filename.c_str(), "wb"),
648 fclose);
649 if (!output_fp) {
650 PLOG(ERROR) << "failed to write to " << output_filename;
651 return false;
652 }
653
654 proto::ETMBranchList branch_list_proto;
655 branch_list_proto.set_magic(ETM_BRANCH_LIST_PROTO_MAGIC);
656 std::vector<char> branch_buf;
Yabin Cui4441e972021-12-15 13:51:58 -0800657 for (const auto& p : binary_map) {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800658 const BinaryKey& key = p.first;
659 const BranchListBinaryInfo& binary = p.second;
660 auto binary_proto = branch_list_proto.add_binaries();
661
662 binary_proto->set_path(key.path);
663 if (!key.build_id.IsEmpty()) {
664 binary_proto->set_build_id(key.build_id.ToString().substr(2));
665 }
666 auto opt_binary_type = ToProtoBinaryType(binary.dso_type);
667 if (!opt_binary_type.has_value()) {
668 return false;
669 }
670 binary_proto->set_type(opt_binary_type.value());
671
672 for (const auto& addr_p : binary.branch_map) {
673 auto addr_proto = binary_proto->add_addrs();
674 addr_proto->set_addr(addr_p.first);
675
676 for (const auto& branch_p : addr_p.second) {
677 const std::vector<bool>& branch = branch_p.first;
678 auto branch_proto = addr_proto->add_branches();
679
680 branch_proto->set_branch(BranchToProtoString(branch));
681 branch_proto->set_branch_size(branch.size());
682 branch_proto->set_count(branch_p.second);
683 }
684 }
685
686 if (binary.dso_type == DSO_KERNEL) {
687 binary_proto->mutable_kernel_info()->set_kernel_start_addr(key.kernel_start_addr);
688 }
689 }
690 if (!branch_list_proto.SerializeToFileDescriptor(fileno(output_fp.get()))) {
691 PLOG(ERROR) << "failed to write to " << output_filename;
692 return false;
693 }
694 return true;
695 }
696
697 private:
698 std::optional<proto::ETMBranchList_Binary::BinaryType> ToProtoBinaryType(DsoType dso_type) {
699 switch (dso_type) {
700 case DSO_ELF_FILE:
701 return proto::ETMBranchList_Binary::ELF_FILE;
702 case DSO_KERNEL:
703 return proto::ETMBranchList_Binary::KERNEL;
704 case DSO_KERNEL_MODULE:
705 return proto::ETMBranchList_Binary::KERNEL_MODULE;
706 default:
707 LOG(ERROR) << "unexpected dso type " << dso_type;
708 return std::nullopt;
709 }
710 }
Yabin Cui4441e972021-12-15 13:51:58 -0800711};
Yabin Cui193f2382020-04-01 14:30:03 -0700712
Yabin Cuic573eaa2019-08-21 16:05:07 -0700713class InjectCommand : public Command {
714 public:
715 InjectCommand()
Yabin Cui193f2382020-04-01 14:30:03 -0700716 : Command("inject", "parse etm instruction tracing data",
Yabin Cuic573eaa2019-08-21 16:05:07 -0700717 // clang-format off
718"Usage: simpleperf inject [options]\n"
Yi Kong7ea069a2020-01-06 15:22:55 -0800719"--binary binary_name Generate data only for binaries matching binary_name regex.\n"
Yabin Cuida89bf62021-12-08 14:11:24 -0800720"-i file1,file2,... Input files. Default is perf.data. Support below formats:\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700721" 1. perf.data generated by recording cs-etm event type.\n"
722" 2. branch_list file generated by `inject --output branch-list`.\n"
Yabin Cuida89bf62021-12-08 14:11:24 -0800723" If a file name starts with @, it contains a list of input files.\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700724"-o <file> output file. Default is perf_inject.data.\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700725"--output <format> Select output file format:\n"
726" autofdo -- text format accepted by TextSampleReader\n"
727" of AutoFDO\n"
728" branch-list -- protobuf file in etm_branch_list.proto\n"
729" Default is autofdo.\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700730"--dump-etm type1,type2,... Dump etm data. A type is one of raw, packet and element.\n"
Yabin Cui561bf1b2020-11-03 12:11:07 -0800731"--exclude-perf Exclude trace data for the recording process.\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700732"--symdir <dir> Look for binaries in a directory recursively.\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700733"\n"
734"Examples:\n"
735"1. Generate autofdo text output.\n"
736"$ simpleperf inject -i perf.data -o autofdo.txt --output autofdo\n"
737"\n"
738"2. Generate branch list proto, then convert to autofdo text.\n"
739"$ simpleperf inject -i perf.data -o branch_list.data --output branch-list\n"
740"$ simpleperf inject -i branch_list.data -o autofdo.txt --output autofdo\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700741 // clang-format on
Yabin Cuia0208222021-12-10 10:24:29 -0800742 ) {}
Yabin Cuic573eaa2019-08-21 16:05:07 -0700743
744 bool Run(const std::vector<std::string>& args) override {
Yabin Cui193f2382020-04-01 14:30:03 -0700745 GOOGLE_PROTOBUF_VERIFY_VERSION;
Yabin Cuic573eaa2019-08-21 16:05:07 -0700746 if (!ParseOptions(args)) {
747 return false;
748 }
Yabin Cui193f2382020-04-01 14:30:03 -0700749
Yabin Cuiba51cf42021-12-15 13:21:54 -0800750 CHECK(!input_filenames_.empty());
751 if (IsPerfDataFile(input_filenames_[0])) {
752 switch (output_format_) {
753 case OutputFormat::AutoFDO:
754 return ConvertPerfDataToAutoFDO();
755 case OutputFormat::BranchList:
756 return ConvertPerfDataToBranchList();
757 }
758 } else {
Yabin Cui04afffe2021-12-15 14:38:19 -0800759 switch (output_format_) {
760 case OutputFormat::AutoFDO:
761 return ConvertBranchListToAutoFDO();
762 case OutputFormat::BranchList:
763 return ConvertBranchListToBranchList();
Yabin Cuida89bf62021-12-08 14:11:24 -0800764 }
Yabin Cuic573eaa2019-08-21 16:05:07 -0700765 }
Yabin Cuic573eaa2019-08-21 16:05:07 -0700766 }
767
768 private:
769 bool ParseOptions(const std::vector<std::string>& args) {
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800770 const OptionFormatMap option_formats = {
771 {"--binary", {OptionValueType::STRING, OptionType::SINGLE}},
772 {"--dump-etm", {OptionValueType::STRING, OptionType::SINGLE}},
Yabin Cui561bf1b2020-11-03 12:11:07 -0800773 {"--exclude-perf", {OptionValueType::NONE, OptionType::SINGLE}},
Yabin Cuida89bf62021-12-08 14:11:24 -0800774 {"-i", {OptionValueType::STRING, OptionType::MULTIPLE}},
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800775 {"-o", {OptionValueType::STRING, OptionType::SINGLE}},
776 {"--output", {OptionValueType::STRING, OptionType::SINGLE}},
777 {"--symdir", {OptionValueType::STRING, OptionType::MULTIPLE}},
778 };
779 OptionValueMap options;
780 std::vector<std::pair<OptionName, OptionValue>> ordered_options;
781 if (!PreprocessOptions(args, option_formats, &options, &ordered_options, nullptr)) {
782 return false;
783 }
784
785 if (auto value = options.PullValue("--binary"); value) {
786 binary_name_regex_ = *value->str_value;
787 }
788 if (auto value = options.PullValue("--dump-etm"); value) {
789 if (!ParseEtmDumpOption(*value->str_value, &etm_dump_option_)) {
Yabin Cuic573eaa2019-08-21 16:05:07 -0700790 return false;
791 }
792 }
Yabin Cui561bf1b2020-11-03 12:11:07 -0800793 exclude_perf_ = options.PullBoolValue("--exclude-perf");
Yabin Cuida89bf62021-12-08 14:11:24 -0800794
795 for (const OptionValue& value : options.PullValues("-i")) {
796 std::vector<std::string> files = android::base::Split(*value.str_value, ",");
797 for (std::string& file : files) {
798 if (android::base::StartsWith(file, "@")) {
799 if (!ReadFileList(file.substr(1), &input_filenames_)) {
800 return false;
801 }
802 } else {
803 input_filenames_.emplace_back(file);
804 }
805 }
806 }
807 if (input_filenames_.empty()) {
808 input_filenames_.emplace_back("perf.data");
809 }
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800810 options.PullStringValue("-o", &output_filename_);
811 if (auto value = options.PullValue("--output"); value) {
812 const std::string& output = *value->str_value;
813 if (output == "autofdo") {
814 output_format_ = OutputFormat::AutoFDO;
815 } else if (output == "branch-list") {
816 output_format_ = OutputFormat::BranchList;
817 } else {
818 LOG(ERROR) << "unknown format in --output option: " << output;
819 return false;
820 }
821 }
822 if (auto value = options.PullValue("--symdir"); value) {
823 if (!Dso::AddSymbolDir(*value->str_value)) {
824 return false;
825 }
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800826 // Symbol dirs are cleaned when Dso count is decreased to zero, which can happen between
827 // processing input files. To make symbol dirs always available, create a placeholder dso to
828 // prevent cleaning from happening.
829 placeholder_dso_ = Dso::CreateDso(DSO_UNKNOWN_FILE, "unknown");
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800830 }
831 CHECK(options.values.empty());
Yabin Cuic573eaa2019-08-21 16:05:07 -0700832 return true;
833 }
834
Yabin Cuida89bf62021-12-08 14:11:24 -0800835 bool ReadFileList(const std::string& path, std::vector<std::string>* file_list) {
836 std::string data;
837 if (!android::base::ReadFileToString(path, &data)) {
838 PLOG(ERROR) << "failed to read " << path;
839 return false;
840 }
841 std::vector<std::string> tokens = android::base::Tokenize(data, " \t\n\r");
842 file_list->insert(file_list->end(), tokens.begin(), tokens.end());
843 return true;
844 }
845
Yabin Cuiba51cf42021-12-15 13:21:54 -0800846 bool ConvertPerfDataToAutoFDO() {
847 AutoFDOWriter autofdo_writer;
848 auto callback = [&](const BinaryKey& key, AutoFDOBinaryInfo& binary) {
849 autofdo_writer.AddAutoFDOBinary(key, binary);
850 };
851 for (const auto& input_filename : input_filenames_) {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800852 PerfDataReader reader(input_filename, exclude_perf_, etm_dump_option_, binary_name_regex_);
Yabin Cuiba51cf42021-12-15 13:21:54 -0800853 reader.SetCallback(callback);
854 if (!reader.Read()) {
855 return false;
Yabin Cui193f2382020-04-01 14:30:03 -0700856 }
Yabin Cuic573eaa2019-08-21 16:05:07 -0700857 }
Yabin Cuiba51cf42021-12-15 13:21:54 -0800858 return autofdo_writer.Write(output_filename_);
Yabin Cuic573eaa2019-08-21 16:05:07 -0700859 }
860
Yabin Cuiba51cf42021-12-15 13:21:54 -0800861 bool ConvertPerfDataToBranchList() {
Yabin Cui4441e972021-12-15 13:51:58 -0800862 BranchListMerger branch_list_merger;
Yabin Cuiba51cf42021-12-15 13:21:54 -0800863 auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
Yabin Cui4441e972021-12-15 13:51:58 -0800864 branch_list_merger.AddBranchListBinary(key, binary);
Yabin Cuiba51cf42021-12-15 13:21:54 -0800865 };
866 for (const auto& input_filename : input_filenames_) {
867 PerfDataReader reader(input_filename, exclude_perf_, etm_dump_option_, binary_name_regex_);
868 reader.SetCallback(callback);
869 if (!reader.Read()) {
870 return false;
871 }
Yabin Cui193f2382020-04-01 14:30:03 -0700872 }
Yabin Cui4441e972021-12-15 13:51:58 -0800873 BranchListWriter branch_list_writer;
874 return branch_list_writer.Write(output_filename_, branch_list_merger.binary_map);
Yabin Cuiba51cf42021-12-15 13:21:54 -0800875 }
876
877 bool ConvertBranchListToAutoFDO() {
Yabin Cui4441e972021-12-15 13:51:58 -0800878 // Step1 : Merge branch lists from all input files.
879 BranchListMerger branch_list_merger;
Yabin Cui89465ad2021-12-14 14:24:55 -0800880 auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
Yabin Cui4441e972021-12-15 13:51:58 -0800881 branch_list_merger.AddBranchListBinary(key, binary);
Yabin Cui89465ad2021-12-14 14:24:55 -0800882 };
Yabin Cuiba51cf42021-12-15 13:21:54 -0800883 for (const auto& input_filename : input_filenames_) {
884 BranchListReader reader(input_filename, binary_name_regex_);
885 reader.SetCallback(callback);
886 if (!reader.Read()) {
887 return false;
888 }
Yabin Cui193f2382020-04-01 14:30:03 -0700889 }
Yabin Cui4441e972021-12-15 13:51:58 -0800890
891 // Step2: Convert BranchListBinaryInfo to AutoFDOBinaryInfo.
892 AutoFDOWriter autofdo_writer;
893 BranchListToAutoFDOConverter converter;
894 for (auto& p : branch_list_merger.binary_map) {
895 const BinaryKey& key = p.first;
896 BranchListBinaryInfo& binary = p.second;
897 std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary = converter.Convert(key, binary);
898 if (autofdo_binary) {
899 // Create new BinaryKey with kernel_start_addr = 0. Because AutoFDO output doesn't care
900 // kernel_start_addr.
901 autofdo_writer.AddAutoFDOBinary(BinaryKey(key.path, key.build_id), *autofdo_binary);
902 }
903 }
904
905 // Step3: Write AutoFDOBinaryInfo.
Yabin Cuiba51cf42021-12-15 13:21:54 -0800906 return autofdo_writer.Write(output_filename_);
Yabin Cuifad7bbe2019-09-18 16:05:51 -0700907 }
908
Yabin Cui04afffe2021-12-15 14:38:19 -0800909 bool ConvertBranchListToBranchList() {
910 // Step1 : Merge branch lists from all input files.
911 BranchListMerger branch_list_merger;
912 auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
913 branch_list_merger.AddBranchListBinary(key, binary);
914 };
915 for (const auto& input_filename : input_filenames_) {
916 BranchListReader reader(input_filename, binary_name_regex_);
917 reader.SetCallback(callback);
918 if (!reader.Read()) {
919 return false;
920 }
921 }
922 // Step2: Write BranchListBinaryInfo.
923 BranchListWriter branch_list_writer;
924 return branch_list_writer.Write(output_filename_, branch_list_merger.binary_map);
925 }
926
Yi Kong7ea069a2020-01-06 15:22:55 -0800927 std::regex binary_name_regex_{""}; // Default to match everything.
Yabin Cui561bf1b2020-11-03 12:11:07 -0800928 bool exclude_perf_ = false;
Yabin Cuida89bf62021-12-08 14:11:24 -0800929 std::vector<std::string> input_filenames_;
Yabin Cuic573eaa2019-08-21 16:05:07 -0700930 std::string output_filename_ = "perf_inject.data";
Yabin Cui193f2382020-04-01 14:30:03 -0700931 OutputFormat output_format_ = OutputFormat::AutoFDO;
Yabin Cuic573eaa2019-08-21 16:05:07 -0700932 ETMDumpOption etm_dump_option_;
Yabin Cuifad7bbe2019-09-18 16:05:51 -0700933
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800934 std::unique_ptr<Dso> placeholder_dso_;
Yabin Cuic573eaa2019-08-21 16:05:07 -0700935};
936
937} // namespace
938
939void RegisterInjectCommand() {
940 return RegisterCommand("inject", [] { return std::unique_ptr<Command>(new InjectCommand); });
941}
Yabin Cuiacbdb242020-07-07 15:56:34 -0700942
943} // namespace simpleperf