blob: d3f66cd7ef738ed6ea81049510890f70a7d48eb5 [file] [log] [blame]
Yabin Cuic573eaa2019-08-21 16:05:07 -07001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Yabin Cui65b8fab2023-01-31 09:50:53 -080017#include <stdint.h>
Yabin Cuic573eaa2019-08-21 16:05:07 -070018#include <stdio.h>
Yabin Cui0b60f9c2021-02-09 10:54:17 -080019#include <unistd.h>
Yabin Cuic573eaa2019-08-21 16:05:07 -070020
21#include <memory>
Yabin Cui561bf1b2020-11-03 12:11:07 -080022#include <optional>
Yi Kong61e78cd2020-01-13 15:43:01 -080023#include <string>
Yabin Cuic573eaa2019-08-21 16:05:07 -070024
Yabin Cui561bf1b2020-11-03 12:11:07 -080025#include <android-base/parseint.h>
Yabin Cuida89bf62021-12-08 14:11:24 -080026#include <android-base/strings.h>
Yabin Cui561bf1b2020-11-03 12:11:07 -080027
Yabin Cuida3b6ce2023-04-28 17:42:02 -070028#include "ETMBranchListFile.h"
ThiƩbaud Weksteen4848ee02020-10-23 16:06:59 +020029#include "ETMDecoder.h"
Yabin Cuif00f4fc2022-11-23 15:15:30 -080030#include "RegEx.h"
Yabin Cuic573eaa2019-08-21 16:05:07 -070031#include "command.h"
32#include "record_file.h"
Yabin Cui193f2382020-04-01 14:30:03 -070033#include "system/extras/simpleperf/etm_branch_list.pb.h"
Yabin Cuic573eaa2019-08-21 16:05:07 -070034#include "thread_tree.h"
Yabin Cuifad7bbe2019-09-18 16:05:51 -070035#include "utils.h"
Yabin Cuic573eaa2019-08-21 16:05:07 -070036
Yabin Cui193f2382020-04-01 14:30:03 -070037namespace simpleperf {
38
Yabin Cuic573eaa2019-08-21 16:05:07 -070039namespace {
40
Yabin Cuifad7bbe2019-09-18 16:05:51 -070041using AddrPair = std::pair<uint64_t, uint64_t>;
42
43struct AddrPairHash {
44 size_t operator()(const AddrPair& ap) const noexcept {
45 size_t seed = 0;
46 HashCombine(seed, ap.first);
47 HashCombine(seed, ap.second);
48 return seed;
49 }
50};
51
Yabin Cui193f2382020-04-01 14:30:03 -070052enum class OutputFormat {
53 AutoFDO,
54 BranchList,
55};
56
57struct AutoFDOBinaryInfo {
Yabin Cuia0208222021-12-10 10:24:29 -080058 uint64_t first_load_segment_addr = 0;
Yabin Cuifad7bbe2019-09-18 16:05:51 -070059 std::unordered_map<AddrPair, uint64_t, AddrPairHash> range_count_map;
60 std::unordered_map<AddrPair, uint64_t, AddrPairHash> branch_count_map;
Yabin Cuia0208222021-12-10 10:24:29 -080061
Yabin Cuife4af172021-12-14 15:57:51 -080062 void AddInstrRange(const ETMInstrRange& instr_range) {
63 uint64_t total_count = instr_range.branch_taken_count;
64 OverflowSafeAdd(total_count, instr_range.branch_not_taken_count);
65 OverflowSafeAdd(range_count_map[AddrPair(instr_range.start_addr, instr_range.end_addr)],
66 total_count);
67 if (instr_range.branch_taken_count > 0) {
68 OverflowSafeAdd(branch_count_map[AddrPair(instr_range.end_addr, instr_range.branch_to_addr)],
69 instr_range.branch_taken_count);
70 }
71 }
72
Yabin Cuia0208222021-12-10 10:24:29 -080073 void Merge(const AutoFDOBinaryInfo& other) {
74 for (const auto& p : other.range_count_map) {
75 auto res = range_count_map.emplace(p.first, p.second);
76 if (!res.second) {
Yabin Cui1befd522021-12-10 12:24:22 -080077 OverflowSafeAdd(res.first->second, p.second);
Yabin Cuia0208222021-12-10 10:24:29 -080078 }
79 }
80 for (const auto& p : other.branch_count_map) {
81 auto res = branch_count_map.emplace(p.first, p.second);
82 if (!res.second) {
Yabin Cui1befd522021-12-10 12:24:22 -080083 OverflowSafeAdd(res.first->second, p.second);
Yabin Cuia0208222021-12-10 10:24:29 -080084 }
85 }
86 }
Yabin Cuifad7bbe2019-09-18 16:05:51 -070087};
88
Yabin Cuif2fe9f02021-12-14 13:06:50 -080089using AutoFDOBinaryCallback = std::function<void(const BinaryKey&, AutoFDOBinaryInfo&)>;
90using BranchListBinaryCallback = std::function<void(const BinaryKey&, BranchListBinaryInfo&)>;
91
Yabin Cui5a1b6262023-05-01 09:53:34 -070092class ETMThreadTreeWithFilter : public ETMThreadTree {
Yabin Cui561bf1b2020-11-03 12:11:07 -080093 public:
94 void ExcludePid(pid_t pid) { exclude_pid_ = pid; }
Yabin Cui5a1b6262023-05-01 09:53:34 -070095 ThreadTree& GetThreadTree() { return thread_tree_; }
96 void DisableThreadExitRecords() override { thread_tree_.DisableThreadExitRecords(); }
Yabin Cui561bf1b2020-11-03 12:11:07 -080097
Yabin Cui5a1b6262023-05-01 09:53:34 -070098 const ThreadEntry* FindThread(int tid) override {
99 const ThreadEntry* thread = thread_tree_.FindThread(tid);
Yabin Cui561bf1b2020-11-03 12:11:07 -0800100 if (thread != nullptr && exclude_pid_ && thread->pid == exclude_pid_) {
101 return nullptr;
102 }
103 return thread;
104 }
105
Yabin Cui5a1b6262023-05-01 09:53:34 -0700106 const MapSet& GetKernelMaps() override { return thread_tree_.GetKernelMaps(); }
107
Yabin Cui561bf1b2020-11-03 12:11:07 -0800108 private:
Yabin Cui5a1b6262023-05-01 09:53:34 -0700109 ThreadTree thread_tree_;
Yabin Cui561bf1b2020-11-03 12:11:07 -0800110 std::optional<pid_t> exclude_pid_;
111};
112
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700113class BinaryFilter {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800114 public:
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700115 BinaryFilter(const RegEx* binary_name_regex) : binary_name_regex_(binary_name_regex) {}
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800116
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700117 bool Filter(Dso* dso) {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800118 auto lookup = dso_filter_cache_.find(dso);
119 if (lookup != dso_filter_cache_.end()) {
120 return lookup->second;
121 }
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700122 bool match = Filter(dso->Path());
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800123 dso_filter_cache_.insert({dso, match});
124 return match;
125 }
126
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700127 bool Filter(const std::string& path) {
128 return binary_name_regex_ == nullptr || binary_name_regex_->Search(path);
129 }
130
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800131 private:
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800132 const RegEx* binary_name_regex_;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800133 std::unordered_map<Dso*, bool> dso_filter_cache_;
134};
135
136static uint64_t GetFirstLoadSegmentVaddr(Dso* dso) {
137 ElfStatus status;
138 if (auto elf = ElfFile::Open(dso->GetDebugFilePath(), &status); elf) {
139 for (const auto& segment : elf->GetProgramHeader()) {
140 if (segment.is_load) {
141 return segment.vaddr;
142 }
143 }
144 }
145 return 0;
146}
147
148// Read perf.data, and generate AutoFDOBinaryInfo or BranchListBinaryInfo.
149// To avoid resetting data, it only processes one input file per instance.
150class PerfDataReader {
151 public:
152 PerfDataReader(const std::string& filename, bool exclude_perf, ETMDumpOption etm_dump_option,
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800153 const RegEx* binary_name_regex)
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800154 : filename_(filename),
155 exclude_perf_(exclude_perf),
156 etm_dump_option_(etm_dump_option),
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700157 binary_filter_(binary_name_regex) {}
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800158
159 void SetCallback(const AutoFDOBinaryCallback& callback) { autofdo_callback_ = callback; }
160 void SetCallback(const BranchListBinaryCallback& callback) { branch_list_callback_ = callback; }
161
162 bool Read() {
163 record_file_reader_ = RecordFileReader::CreateInstance(filename_);
164 if (!record_file_reader_) {
165 return false;
166 }
Yabin Cuiadbb6342023-05-01 15:48:12 -0700167 if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_ETM_BRANCH_LIST)) {
168 return ProcessETMBranchListFeature();
169 }
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800170 if (exclude_perf_) {
171 const auto& info_map = record_file_reader_->GetMetaInfoFeature();
172 if (auto it = info_map.find("recording_process"); it == info_map.end()) {
173 LOG(ERROR) << filename_ << " doesn't support --exclude-perf";
174 return false;
175 } else {
176 int pid;
177 if (!android::base::ParseInt(it->second, &pid, 0)) {
178 LOG(ERROR) << "invalid recording_process " << it->second << " in " << filename_;
179 return false;
180 }
181 thread_tree_.ExcludePid(pid);
182 }
183 }
Yabin Cui5a1b6262023-05-01 09:53:34 -0700184 if (!record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_.GetThreadTree())) {
Yabin Cui90a547e2022-12-07 16:29:13 -0800185 return false;
186 }
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800187 if (!record_file_reader_->ReadDataSection([this](auto r) { return ProcessRecord(r.get()); })) {
188 return false;
189 }
190 if (etm_decoder_ && !etm_decoder_->FinishData()) {
191 return false;
192 }
193 if (autofdo_callback_) {
194 ProcessAutoFDOBinaryInfo();
195 } else if (branch_list_callback_) {
196 ProcessBranchListBinaryInfo();
197 }
198 return true;
199 }
200
201 private:
Yabin Cuiadbb6342023-05-01 15:48:12 -0700202 bool ProcessETMBranchListFeature() {
203 if (exclude_perf_) {
204 LOG(WARNING) << "--exclude-perf has no effect on perf.data with etm branch list";
205 }
206 if (autofdo_callback_) {
207 LOG(ERROR) << "convert to autofdo format isn't support on perf.data with etm branch list";
208 return false;
209 }
210 CHECK(branch_list_callback_);
211 std::string s;
212 if (!record_file_reader_->ReadFeatureSection(PerfFileFormat::FEAT_ETM_BRANCH_LIST, &s)) {
213 return false;
214 }
215 BranchListBinaryMap binary_map;
216 if (!StringToBranchListBinaryMap(s, binary_map)) {
217 return false;
218 }
219 for (auto& [key, binary] : binary_map) {
220 if (!binary_filter_.Filter(key.path)) {
221 continue;
222 }
223 branch_list_callback_(key, binary);
224 }
225 return true;
226 }
227
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800228 bool ProcessRecord(Record* r) {
Yabin Cui5a1b6262023-05-01 09:53:34 -0700229 thread_tree_.GetThreadTree().Update(*r);
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800230 if (r->type() == PERF_RECORD_AUXTRACE_INFO) {
231 etm_decoder_ = ETMDecoder::Create(*static_cast<AuxTraceInfoRecord*>(r), thread_tree_);
232 if (!etm_decoder_) {
233 return false;
234 }
235 etm_decoder_->EnableDump(etm_dump_option_);
236 if (autofdo_callback_) {
237 etm_decoder_->RegisterCallback(
238 [this](const ETMInstrRange& range) { ProcessInstrRange(range); });
239 } else if (branch_list_callback_) {
240 etm_decoder_->RegisterCallback(
241 [this](const ETMBranchList& branch) { ProcessBranchList(branch); });
242 }
243 } else if (r->type() == PERF_RECORD_AUX) {
244 AuxRecord* aux = static_cast<AuxRecord*>(r);
Yabin Cui65b8fab2023-01-31 09:50:53 -0800245 if (aux->data->aux_size > SIZE_MAX) {
246 LOG(ERROR) << "invalid aux size";
247 return false;
248 }
249 size_t aux_size = aux->data->aux_size;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800250 if (aux_size > 0) {
Yabin Cuic04455b2023-04-21 14:48:22 -0700251 bool error = false;
Yabin Cui65b8fab2023-01-31 09:50:53 -0800252 if (!record_file_reader_->ReadAuxData(aux->Cpu(), aux->data->aux_offset, aux_size,
Yabin Cuic04455b2023-04-21 14:48:22 -0700253 aux_data_buffer_, error)) {
254 return !error;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800255 }
Yabin Cuidf7215c2023-01-13 12:02:40 -0800256 if (!etm_decoder_) {
257 LOG(ERROR) << "ETMDecoder isn't created";
258 return false;
259 }
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800260 return etm_decoder_->ProcessData(aux_data_buffer_.data(), aux_size, !aux->Unformatted(),
261 aux->Cpu());
262 }
263 } else if (r->type() == PERF_RECORD_MMAP && r->InKernel()) {
264 auto& mmap_r = *static_cast<MmapRecord*>(r);
265 if (android::base::StartsWith(mmap_r.filename, DEFAULT_KERNEL_MMAP_NAME)) {
266 kernel_map_start_addr_ = mmap_r.data->addr;
267 }
268 }
269 return true;
270 }
271
272 void ProcessInstrRange(const ETMInstrRange& instr_range) {
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700273 if (!binary_filter_.Filter(instr_range.dso)) {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800274 return;
275 }
276
Yabin Cuife4af172021-12-14 15:57:51 -0800277 autofdo_binary_map_[instr_range.dso].AddInstrRange(instr_range);
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800278 }
279
280 void ProcessBranchList(const ETMBranchList& branch_list) {
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700281 if (!binary_filter_.Filter(branch_list.dso)) {
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800282 return;
283 }
284
285 auto& branch_map = branch_list_binary_map_[branch_list.dso].branch_map;
286 ++branch_map[branch_list.addr][branch_list.branch];
287 }
288
289 void ProcessAutoFDOBinaryInfo() {
290 for (auto& p : autofdo_binary_map_) {
291 Dso* dso = p.first;
292 AutoFDOBinaryInfo& binary = p.second;
293 binary.first_load_segment_addr = GetFirstLoadSegmentVaddr(dso);
294 autofdo_callback_(BinaryKey(dso, 0), binary);
295 }
296 }
297
298 void ProcessBranchListBinaryInfo() {
299 for (auto& p : branch_list_binary_map_) {
300 Dso* dso = p.first;
301 BranchListBinaryInfo& binary = p.second;
302 binary.dso_type = dso->type();
303 BinaryKey key(dso, 0);
304 if (binary.dso_type == DSO_KERNEL) {
305 if (kernel_map_start_addr_ == 0) {
306 LOG(WARNING) << "Can't convert kernel ip addresses without kernel start addr. So remove "
307 "branches for the kernel.";
308 continue;
309 }
310 if (dso->GetDebugFilePath() == dso->Path()) {
311 // vmlinux isn't available. We still use kernel ip addr. Put kernel start addr in proto
312 // for address conversion later.
313 key.kernel_start_addr = kernel_map_start_addr_;
314 }
315 }
316 branch_list_callback_(key, binary);
317 }
318 }
319
320 const std::string filename_;
321 bool exclude_perf_;
322 ETMDumpOption etm_dump_option_;
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700323 BinaryFilter binary_filter_;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800324 AutoFDOBinaryCallback autofdo_callback_;
325 BranchListBinaryCallback branch_list_callback_;
326
327 std::vector<uint8_t> aux_data_buffer_;
328 std::unique_ptr<ETMDecoder> etm_decoder_;
329 std::unique_ptr<RecordFileReader> record_file_reader_;
Yabin Cui5a1b6262023-05-01 09:53:34 -0700330 ETMThreadTreeWithFilter thread_tree_;
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800331 uint64_t kernel_map_start_addr_ = 0;
332 // Store results for AutoFDO.
333 std::unordered_map<Dso*, AutoFDOBinaryInfo> autofdo_binary_map_;
334 // Store results for BranchList.
335 std::unordered_map<Dso*, BranchListBinaryInfo> branch_list_binary_map_;
336};
337
Yabin Cui89465ad2021-12-14 14:24:55 -0800338// Read a protobuf file specified by etm_branch_list.proto, and generate BranchListBinaryInfo.
339class BranchListReader {
340 public:
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800341 BranchListReader(const std::string& filename, const RegEx* binary_name_regex)
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700342 : filename_(filename), binary_filter_(binary_name_regex) {}
Yabin Cui89465ad2021-12-14 14:24:55 -0800343
344 void SetCallback(const BranchListBinaryCallback& callback) { callback_ = callback; }
345
346 bool Read() {
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700347 std::string s;
348 if (!android::base::ReadFileToString(filename_, &s)) {
349 PLOG(ERROR) << "failed to read " << filename_;
Yabin Cui89465ad2021-12-14 14:24:55 -0800350 return false;
351 }
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700352 BranchListBinaryMap binary_map;
353 if (!StringToBranchListBinaryMap(s, binary_map)) {
354 PLOG(ERROR) << "file is in wrong format: " << filename_;
Yabin Cui89465ad2021-12-14 14:24:55 -0800355 return false;
356 }
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700357 for (auto& [key, binary] : binary_map) {
358 if (!binary_filter_.Filter(key.path)) {
Yabin Cui89465ad2021-12-14 14:24:55 -0800359 continue;
360 }
Yabin Cui89465ad2021-12-14 14:24:55 -0800361 callback_(key, binary);
362 }
363 return true;
364 }
365
366 private:
Yabin Cui89465ad2021-12-14 14:24:55 -0800367 const std::string filename_;
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700368 BinaryFilter binary_filter_;
Yabin Cui89465ad2021-12-14 14:24:55 -0800369 BranchListBinaryCallback callback_;
370};
371
Yabin Cuife4af172021-12-14 15:57:51 -0800372// Convert BranchListBinaryInfo into AutoFDOBinaryInfo.
373class BranchListToAutoFDOConverter {
374 public:
375 std::unique_ptr<AutoFDOBinaryInfo> Convert(const BinaryKey& key, BranchListBinaryInfo& binary) {
376 BuildId build_id = key.build_id;
377 std::unique_ptr<Dso> dso = Dso::CreateDsoWithBuildId(binary.dso_type, key.path, build_id);
378 if (!dso || !CheckBuildId(dso.get(), key.build_id)) {
379 return nullptr;
380 }
381 std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary(new AutoFDOBinaryInfo);
382 autofdo_binary->first_load_segment_addr = GetFirstLoadSegmentVaddr(dso.get());
383
384 if (dso->type() == DSO_KERNEL) {
385 ModifyBranchMapForKernel(dso.get(), key.kernel_start_addr, binary);
386 }
387
388 auto process_instr_range = [&](const ETMInstrRange& range) {
389 CHECK_EQ(range.dso, dso.get());
390 autofdo_binary->AddInstrRange(range);
391 };
392
393 auto result =
394 ConvertBranchMapToInstrRanges(dso.get(), binary.GetOrderedBranchMap(), process_instr_range);
395 if (!result.ok()) {
396 LOG(WARNING) << "failed to build instr ranges for binary " << dso->Path() << ": "
397 << result.error();
398 return nullptr;
399 }
400 return autofdo_binary;
401 }
402
403 private:
404 bool CheckBuildId(Dso* dso, const BuildId& expected_build_id) {
405 if (expected_build_id.IsEmpty()) {
406 return true;
407 }
408 BuildId build_id;
409 return GetBuildIdFromDsoPath(dso->GetDebugFilePath(), &build_id) &&
410 build_id == expected_build_id;
411 }
412
413 void ModifyBranchMapForKernel(Dso* dso, uint64_t kernel_start_addr,
414 BranchListBinaryInfo& binary) {
415 if (kernel_start_addr == 0) {
416 // vmlinux has been provided when generating branch lists. Addresses in branch lists are
417 // already vaddrs in vmlinux.
418 return;
419 }
420 // Addresses are still kernel ip addrs in memory. Need to convert them to vaddrs in vmlinux.
421 UnorderedBranchMap new_branch_map;
422 for (auto& p : binary.branch_map) {
423 uint64_t vaddr_in_file = dso->IpToVaddrInFile(p.first, kernel_start_addr, 0);
424 new_branch_map[vaddr_in_file] = std::move(p.second);
425 }
426 binary.branch_map = std::move(new_branch_map);
427 }
428};
429
Yabin Cuia0208222021-12-10 10:24:29 -0800430// Write instruction ranges to a file in AutoFDO text format.
431class AutoFDOWriter {
432 public:
433 void AddAutoFDOBinary(const BinaryKey& key, AutoFDOBinaryInfo& binary) {
434 auto it = binary_map_.find(key);
435 if (it == binary_map_.end()) {
436 binary_map_[key] = std::move(binary);
437 } else {
438 it->second.Merge(binary);
439 }
440 }
441
442 bool Write(const std::string& output_filename) {
443 std::unique_ptr<FILE, decltype(&fclose)> output_fp(fopen(output_filename.c_str(), "w"), fclose);
444 if (!output_fp) {
445 PLOG(ERROR) << "failed to write to " << output_filename;
446 return false;
447 }
448 // autofdo_binary_map is used to store instruction ranges, which can have a large amount. And
449 // it has a larger access time (instruction ranges * executed time). So it's better to use
450 // unorder_maps to speed up access time. But we also want a stable output here, to compare
451 // output changes result from code changes. So generate a sorted output here.
452 std::vector<BinaryKey> keys;
453 for (auto& p : binary_map_) {
454 keys.emplace_back(p.first);
455 }
456 std::sort(keys.begin(), keys.end(),
457 [](const BinaryKey& key1, const BinaryKey& key2) { return key1.path < key2.path; });
458 if (keys.size() > 1) {
459 fprintf(output_fp.get(),
460 "// Please split this file. AutoFDO only accepts profile for one binary.\n");
461 }
462 for (const auto& key : keys) {
463 const AutoFDOBinaryInfo& binary = binary_map_[key];
464 // AutoFDO text format needs file_offsets instead of virtual addrs in a binary. And it uses
465 // below formula: vaddr = file_offset + GetFirstLoadSegmentVaddr().
466 uint64_t first_load_segment_addr = binary.first_load_segment_addr;
467
468 auto to_offset = [&](uint64_t vaddr) -> uint64_t {
469 if (vaddr == 0) {
470 return 0;
471 }
472 CHECK_GE(vaddr, first_load_segment_addr);
473 return vaddr - first_load_segment_addr;
474 };
475
476 // Write range_count_map.
477 std::map<AddrPair, uint64_t> range_count_map(binary.range_count_map.begin(),
478 binary.range_count_map.end());
479 fprintf(output_fp.get(), "%zu\n", range_count_map.size());
480 for (const auto& pair2 : range_count_map) {
481 const AddrPair& addr_range = pair2.first;
482 uint64_t count = pair2.second;
483
484 fprintf(output_fp.get(), "%" PRIx64 "-%" PRIx64 ":%" PRIu64 "\n",
485 to_offset(addr_range.first), to_offset(addr_range.second), count);
486 }
487
488 // Write addr_count_map.
489 fprintf(output_fp.get(), "0\n");
490
491 // Write branch_count_map.
492 std::map<AddrPair, uint64_t> branch_count_map(binary.branch_count_map.begin(),
493 binary.branch_count_map.end());
494 fprintf(output_fp.get(), "%zu\n", branch_count_map.size());
495 for (const auto& pair2 : branch_count_map) {
496 const AddrPair& branch = pair2.first;
497 uint64_t count = pair2.second;
498
499 fprintf(output_fp.get(), "%" PRIx64 "->%" PRIx64 ":%" PRIu64 "\n", to_offset(branch.first),
500 to_offset(branch.second), count);
501 }
502
503 // Write the binary path in comment.
Yabin Cuicc006812023-04-06 11:37:08 -0700504 fprintf(output_fp.get(), "// build_id: %s\n", key.build_id.ToString().c_str());
Yabin Cuia0208222021-12-10 10:24:29 -0800505 fprintf(output_fp.get(), "// %s\n\n", key.path.c_str());
506 }
507 return true;
508 }
509
510 private:
511 std::unordered_map<BinaryKey, AutoFDOBinaryInfo, BinaryKeyHash> binary_map_;
512};
513
Yabin Cui4441e972021-12-15 13:51:58 -0800514// Merge BranchListBinaryInfo.
515struct BranchListMerger {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800516 void AddBranchListBinary(const BinaryKey& key, BranchListBinaryInfo& binary) {
Yabin Cui4441e972021-12-15 13:51:58 -0800517 auto it = binary_map.find(key);
518 if (it == binary_map.end()) {
519 binary_map[key] = std::move(binary);
Yabin Cui9f7569b2021-12-10 11:34:53 -0800520 } else {
521 it->second.Merge(binary);
522 }
523 }
524
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700525 BranchListBinaryMap binary_map;
Yabin Cui4441e972021-12-15 13:51:58 -0800526};
527
528// Write branch lists to a protobuf file specified by etm_branch_list.proto.
529class BranchListWriter {
530 public:
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700531 bool Write(const std::string& output_filename, const BranchListBinaryMap& binary_map) {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800532 // Don't produce empty output file.
Yabin Cui4441e972021-12-15 13:51:58 -0800533 if (binary_map.empty()) {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800534 LOG(INFO) << "Skip empty output file.";
535 unlink(output_filename.c_str());
536 return true;
537 }
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700538 std::string s;
539 if (!BranchListBinaryMapToString(binary_map, s)) {
540 LOG(ERROR) << "invalid BranchListBinaryMap";
Yabin Cui9f7569b2021-12-10 11:34:53 -0800541 return false;
542 }
Yabin Cuida3b6ce2023-04-28 17:42:02 -0700543 if (!android::base::WriteStringToFile(s, output_filename)) {
Yabin Cui9f7569b2021-12-10 11:34:53 -0800544 PLOG(ERROR) << "failed to write to " << output_filename;
545 return false;
546 }
547 return true;
548 }
Yabin Cui4441e972021-12-15 13:51:58 -0800549};
Yabin Cui193f2382020-04-01 14:30:03 -0700550
Yabin Cuic573eaa2019-08-21 16:05:07 -0700551class InjectCommand : public Command {
552 public:
553 InjectCommand()
Yabin Cui193f2382020-04-01 14:30:03 -0700554 : Command("inject", "parse etm instruction tracing data",
Yabin Cuic573eaa2019-08-21 16:05:07 -0700555 // clang-format off
556"Usage: simpleperf inject [options]\n"
Yi Kong7ea069a2020-01-06 15:22:55 -0800557"--binary binary_name Generate data only for binaries matching binary_name regex.\n"
Yabin Cuida89bf62021-12-08 14:11:24 -0800558"-i file1,file2,... Input files. Default is perf.data. Support below formats:\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700559" 1. perf.data generated by recording cs-etm event type.\n"
560" 2. branch_list file generated by `inject --output branch-list`.\n"
Yabin Cuida89bf62021-12-08 14:11:24 -0800561" If a file name starts with @, it contains a list of input files.\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700562"-o <file> output file. Default is perf_inject.data.\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700563"--output <format> Select output file format:\n"
564" autofdo -- text format accepted by TextSampleReader\n"
565" of AutoFDO\n"
566" branch-list -- protobuf file in etm_branch_list.proto\n"
567" Default is autofdo.\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700568"--dump-etm type1,type2,... Dump etm data. A type is one of raw, packet and element.\n"
Yabin Cui561bf1b2020-11-03 12:11:07 -0800569"--exclude-perf Exclude trace data for the recording process.\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700570"--symdir <dir> Look for binaries in a directory recursively.\n"
Yabin Cui193f2382020-04-01 14:30:03 -0700571"\n"
572"Examples:\n"
573"1. Generate autofdo text output.\n"
574"$ simpleperf inject -i perf.data -o autofdo.txt --output autofdo\n"
575"\n"
576"2. Generate branch list proto, then convert to autofdo text.\n"
577"$ simpleperf inject -i perf.data -o branch_list.data --output branch-list\n"
578"$ simpleperf inject -i branch_list.data -o autofdo.txt --output autofdo\n"
Yabin Cuic573eaa2019-08-21 16:05:07 -0700579 // clang-format on
Yabin Cuia0208222021-12-10 10:24:29 -0800580 ) {}
Yabin Cuic573eaa2019-08-21 16:05:07 -0700581
582 bool Run(const std::vector<std::string>& args) override {
Yabin Cui193f2382020-04-01 14:30:03 -0700583 GOOGLE_PROTOBUF_VERIFY_VERSION;
Yabin Cuic573eaa2019-08-21 16:05:07 -0700584 if (!ParseOptions(args)) {
585 return false;
586 }
Yabin Cui193f2382020-04-01 14:30:03 -0700587
Yabin Cuiba51cf42021-12-15 13:21:54 -0800588 CHECK(!input_filenames_.empty());
589 if (IsPerfDataFile(input_filenames_[0])) {
590 switch (output_format_) {
591 case OutputFormat::AutoFDO:
592 return ConvertPerfDataToAutoFDO();
593 case OutputFormat::BranchList:
594 return ConvertPerfDataToBranchList();
595 }
596 } else {
Yabin Cui04afffe2021-12-15 14:38:19 -0800597 switch (output_format_) {
598 case OutputFormat::AutoFDO:
599 return ConvertBranchListToAutoFDO();
600 case OutputFormat::BranchList:
601 return ConvertBranchListToBranchList();
Yabin Cuida89bf62021-12-08 14:11:24 -0800602 }
Yabin Cuic573eaa2019-08-21 16:05:07 -0700603 }
Yabin Cuic573eaa2019-08-21 16:05:07 -0700604 }
605
606 private:
607 bool ParseOptions(const std::vector<std::string>& args) {
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800608 const OptionFormatMap option_formats = {
609 {"--binary", {OptionValueType::STRING, OptionType::SINGLE}},
610 {"--dump-etm", {OptionValueType::STRING, OptionType::SINGLE}},
Yabin Cui561bf1b2020-11-03 12:11:07 -0800611 {"--exclude-perf", {OptionValueType::NONE, OptionType::SINGLE}},
Yabin Cuida89bf62021-12-08 14:11:24 -0800612 {"-i", {OptionValueType::STRING, OptionType::MULTIPLE}},
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800613 {"-o", {OptionValueType::STRING, OptionType::SINGLE}},
614 {"--output", {OptionValueType::STRING, OptionType::SINGLE}},
615 {"--symdir", {OptionValueType::STRING, OptionType::MULTIPLE}},
616 };
617 OptionValueMap options;
618 std::vector<std::pair<OptionName, OptionValue>> ordered_options;
619 if (!PreprocessOptions(args, option_formats, &options, &ordered_options, nullptr)) {
620 return false;
621 }
622
623 if (auto value = options.PullValue("--binary"); value) {
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800624 binary_name_regex_ = RegEx::Create(*value->str_value);
625 if (binary_name_regex_ == nullptr) {
626 return false;
627 }
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800628 }
629 if (auto value = options.PullValue("--dump-etm"); value) {
630 if (!ParseEtmDumpOption(*value->str_value, &etm_dump_option_)) {
Yabin Cuic573eaa2019-08-21 16:05:07 -0700631 return false;
632 }
633 }
Yabin Cui561bf1b2020-11-03 12:11:07 -0800634 exclude_perf_ = options.PullBoolValue("--exclude-perf");
Yabin Cuida89bf62021-12-08 14:11:24 -0800635
636 for (const OptionValue& value : options.PullValues("-i")) {
637 std::vector<std::string> files = android::base::Split(*value.str_value, ",");
638 for (std::string& file : files) {
639 if (android::base::StartsWith(file, "@")) {
640 if (!ReadFileList(file.substr(1), &input_filenames_)) {
641 return false;
642 }
643 } else {
644 input_filenames_.emplace_back(file);
645 }
646 }
647 }
648 if (input_filenames_.empty()) {
649 input_filenames_.emplace_back("perf.data");
650 }
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800651 options.PullStringValue("-o", &output_filename_);
652 if (auto value = options.PullValue("--output"); value) {
653 const std::string& output = *value->str_value;
654 if (output == "autofdo") {
655 output_format_ = OutputFormat::AutoFDO;
656 } else if (output == "branch-list") {
657 output_format_ = OutputFormat::BranchList;
658 } else {
659 LOG(ERROR) << "unknown format in --output option: " << output;
660 return false;
661 }
662 }
663 if (auto value = options.PullValue("--symdir"); value) {
664 if (!Dso::AddSymbolDir(*value->str_value)) {
665 return false;
666 }
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800667 // Symbol dirs are cleaned when Dso count is decreased to zero, which can happen between
668 // processing input files. To make symbol dirs always available, create a placeholder dso to
669 // prevent cleaning from happening.
670 placeholder_dso_ = Dso::CreateDso(DSO_UNKNOWN_FILE, "unknown");
Yabin Cuie09cb9f2020-11-03 09:24:48 -0800671 }
672 CHECK(options.values.empty());
Yabin Cuic573eaa2019-08-21 16:05:07 -0700673 return true;
674 }
675
Yabin Cuida89bf62021-12-08 14:11:24 -0800676 bool ReadFileList(const std::string& path, std::vector<std::string>* file_list) {
677 std::string data;
678 if (!android::base::ReadFileToString(path, &data)) {
679 PLOG(ERROR) << "failed to read " << path;
680 return false;
681 }
682 std::vector<std::string> tokens = android::base::Tokenize(data, " \t\n\r");
683 file_list->insert(file_list->end(), tokens.begin(), tokens.end());
684 return true;
685 }
686
Yabin Cuiba51cf42021-12-15 13:21:54 -0800687 bool ConvertPerfDataToAutoFDO() {
688 AutoFDOWriter autofdo_writer;
689 auto callback = [&](const BinaryKey& key, AutoFDOBinaryInfo& binary) {
690 autofdo_writer.AddAutoFDOBinary(key, binary);
691 };
692 for (const auto& input_filename : input_filenames_) {
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800693 PerfDataReader reader(input_filename, exclude_perf_, etm_dump_option_,
694 binary_name_regex_.get());
Yabin Cuiba51cf42021-12-15 13:21:54 -0800695 reader.SetCallback(callback);
696 if (!reader.Read()) {
697 return false;
Yabin Cui193f2382020-04-01 14:30:03 -0700698 }
Yabin Cuic573eaa2019-08-21 16:05:07 -0700699 }
Yabin Cuiba51cf42021-12-15 13:21:54 -0800700 return autofdo_writer.Write(output_filename_);
Yabin Cuic573eaa2019-08-21 16:05:07 -0700701 }
702
Yabin Cuiba51cf42021-12-15 13:21:54 -0800703 bool ConvertPerfDataToBranchList() {
Yabin Cui4441e972021-12-15 13:51:58 -0800704 BranchListMerger branch_list_merger;
Yabin Cuiba51cf42021-12-15 13:21:54 -0800705 auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
Yabin Cui4441e972021-12-15 13:51:58 -0800706 branch_list_merger.AddBranchListBinary(key, binary);
Yabin Cuiba51cf42021-12-15 13:21:54 -0800707 };
708 for (const auto& input_filename : input_filenames_) {
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800709 PerfDataReader reader(input_filename, exclude_perf_, etm_dump_option_,
710 binary_name_regex_.get());
Yabin Cuiba51cf42021-12-15 13:21:54 -0800711 reader.SetCallback(callback);
712 if (!reader.Read()) {
713 return false;
714 }
Yabin Cui193f2382020-04-01 14:30:03 -0700715 }
Yabin Cui4441e972021-12-15 13:51:58 -0800716 BranchListWriter branch_list_writer;
717 return branch_list_writer.Write(output_filename_, branch_list_merger.binary_map);
Yabin Cuiba51cf42021-12-15 13:21:54 -0800718 }
719
720 bool ConvertBranchListToAutoFDO() {
Yabin Cui4441e972021-12-15 13:51:58 -0800721 // Step1 : Merge branch lists from all input files.
722 BranchListMerger branch_list_merger;
Yabin Cui89465ad2021-12-14 14:24:55 -0800723 auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
Yabin Cui4441e972021-12-15 13:51:58 -0800724 branch_list_merger.AddBranchListBinary(key, binary);
Yabin Cui89465ad2021-12-14 14:24:55 -0800725 };
Yabin Cuiba51cf42021-12-15 13:21:54 -0800726 for (const auto& input_filename : input_filenames_) {
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800727 BranchListReader reader(input_filename, binary_name_regex_.get());
Yabin Cuiba51cf42021-12-15 13:21:54 -0800728 reader.SetCallback(callback);
729 if (!reader.Read()) {
730 return false;
731 }
Yabin Cui193f2382020-04-01 14:30:03 -0700732 }
Yabin Cui4441e972021-12-15 13:51:58 -0800733
734 // Step2: Convert BranchListBinaryInfo to AutoFDOBinaryInfo.
735 AutoFDOWriter autofdo_writer;
736 BranchListToAutoFDOConverter converter;
737 for (auto& p : branch_list_merger.binary_map) {
738 const BinaryKey& key = p.first;
739 BranchListBinaryInfo& binary = p.second;
740 std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary = converter.Convert(key, binary);
741 if (autofdo_binary) {
742 // Create new BinaryKey with kernel_start_addr = 0. Because AutoFDO output doesn't care
743 // kernel_start_addr.
744 autofdo_writer.AddAutoFDOBinary(BinaryKey(key.path, key.build_id), *autofdo_binary);
745 }
746 }
747
748 // Step3: Write AutoFDOBinaryInfo.
Yabin Cuiba51cf42021-12-15 13:21:54 -0800749 return autofdo_writer.Write(output_filename_);
Yabin Cuifad7bbe2019-09-18 16:05:51 -0700750 }
751
Yabin Cui04afffe2021-12-15 14:38:19 -0800752 bool ConvertBranchListToBranchList() {
753 // Step1 : Merge branch lists from all input files.
754 BranchListMerger branch_list_merger;
755 auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
756 branch_list_merger.AddBranchListBinary(key, binary);
757 };
758 for (const auto& input_filename : input_filenames_) {
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800759 BranchListReader reader(input_filename, binary_name_regex_.get());
Yabin Cui04afffe2021-12-15 14:38:19 -0800760 reader.SetCallback(callback);
761 if (!reader.Read()) {
762 return false;
763 }
764 }
765 // Step2: Write BranchListBinaryInfo.
766 BranchListWriter branch_list_writer;
767 return branch_list_writer.Write(output_filename_, branch_list_merger.binary_map);
768 }
769
Yabin Cuif00f4fc2022-11-23 15:15:30 -0800770 std::unique_ptr<RegEx> binary_name_regex_;
Yabin Cui561bf1b2020-11-03 12:11:07 -0800771 bool exclude_perf_ = false;
Yabin Cuida89bf62021-12-08 14:11:24 -0800772 std::vector<std::string> input_filenames_;
Yabin Cuic573eaa2019-08-21 16:05:07 -0700773 std::string output_filename_ = "perf_inject.data";
Yabin Cui193f2382020-04-01 14:30:03 -0700774 OutputFormat output_format_ = OutputFormat::AutoFDO;
Yabin Cuic573eaa2019-08-21 16:05:07 -0700775 ETMDumpOption etm_dump_option_;
Yabin Cuifad7bbe2019-09-18 16:05:51 -0700776
Yabin Cuif2fe9f02021-12-14 13:06:50 -0800777 std::unique_ptr<Dso> placeholder_dso_;
Yabin Cuic573eaa2019-08-21 16:05:07 -0700778};
779
780} // namespace
781
782void RegisterInjectCommand() {
783 return RegisterCommand("inject", [] { return std::unique_ptr<Command>(new InjectCommand); });
784}
Yabin Cuiacbdb242020-07-07 15:56:34 -0700785
786} // namespace simpleperf