Merge "simpleperf: remove cpu option tests."
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index b8278e6..50d8654 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -313,6 +313,9 @@
if (!event_selection_set_.FinishReadMmapEventData()) {
return false;
}
+ if (!record_file_writer_->SortDataSection()) {
+ return false;
+ }
// 7. Dump additional features, and close record file.
if (!DumpAdditionalFeatures(args)) {
@@ -608,6 +611,11 @@
}
}
event_selection_set_.SetInherit(child_inherit_);
+ // If Unwinding while recording, records are used before being sorted.
+ // By using low watermark, records are almost sorted when read from kernel.
+ if (dwarf_callchain_sampling_ && unwind_dwarf_callchain_ && !post_unwind_) {
+ event_selection_set_.SetLowWatermark();
+ }
return true;
}
diff --git a/simpleperf/dwarf_unwind.cpp b/simpleperf/dwarf_unwind.cpp
index d48a347..5444ef9 100644
--- a/simpleperf/dwarf_unwind.cpp
+++ b/simpleperf/dwarf_unwind.cpp
@@ -131,6 +131,10 @@
ucontext_t ucontext = BuildUContextFromRegs(regs);
if (backtrace->Unwind(0, &ucontext)) {
for (auto it = backtrace->begin(); it != backtrace->end(); ++it) {
+ // Unwinding in arm architecture can return 0 pc address.
+ if (it->pc == 0) {
+ break;
+ }
result.push_back(it->pc);
}
}
diff --git a/simpleperf/event_attr.cpp b/simpleperf/event_attr.cpp
index 10cef52..1936448 100644
--- a/simpleperf/event_attr.cpp
+++ b/simpleperf/event_attr.cpp
@@ -222,6 +222,10 @@
return attr.sample_id_all && (attr.sample_type & PERF_SAMPLE_TIME);
}
+bool IsCpuSupported(const perf_event_attr& attr) {
+ return attr.sample_id_all && (attr.sample_type & PERF_SAMPLE_CPU);
+}
+
std::string GetEventNameByAttr(const perf_event_attr& attr) {
for (const auto& event_type : GetAllEventTypes()) {
if (event_type.type == attr.type && event_type.config == attr.config) {
diff --git a/simpleperf/event_attr.h b/simpleperf/event_attr.h
index 030f7c9..9182bb9 100644
--- a/simpleperf/event_attr.h
+++ b/simpleperf/event_attr.h
@@ -32,6 +32,7 @@
size_t* event_id_pos_in_sample_records,
size_t* event_id_reverse_pos_in_non_sample_records);
bool IsTimestampSupported(const perf_event_attr& attr);
+bool IsCpuSupported(const perf_event_attr& attr);
// Return event name with modifier if the event is found, otherwise return "unknown".
std::string GetEventNameByAttr(const perf_event_attr& attr);
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index 1e765cb..6235e32 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -234,6 +234,14 @@
}
}
+void EventSelectionSet::SetLowWatermark() {
+ for (auto& group : groups_) {
+ for (auto& selection : group) {
+ selection.event_attr.wakeup_events = 1;
+ }
+ }
+}
+
static bool CheckIfCpusOnline(const std::vector<int>& cpus) {
std::vector<int> online_cpus = GetOnlineCpus();
for (const auto& cpu : cpus) {
@@ -389,18 +397,8 @@
}
}
- // Prepare record callback function and record cache.
+ // Prepare record callback function.
record_callback_ = callback;
- bool has_timestamp = true;
- for (const auto& group : groups_) {
- for (const auto& selection : group) {
- if (!IsTimestampSupported(selection.event_attr)) {
- has_timestamp = false;
- break;
- }
- }
- }
- record_cache_.reset(new RecordCache(has_timestamp));
return true;
}
@@ -417,13 +415,10 @@
}
std::vector<std::unique_ptr<Record>> records =
ReadRecordsFromBuffer(event_fd->attr(), data, size);
- record_cache_->Push(std::move(records));
- std::unique_ptr<Record> r = record_cache_->Pop();
- while (r != nullptr) {
+ for (auto& r : records) {
if (!record_callback_(r.get())) {
return false;
}
- r = record_cache_->Pop();
}
return true;
}
@@ -442,12 +437,5 @@
}
}
}
- // Clean up record cache.
- std::vector<std::unique_ptr<Record>> records = record_cache_->PopAll();
- for (auto& r : records) {
- if (!record_callback_(r.get())) {
- return false;
- }
- }
return true;
}
diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h
index ccf7996..a947320 100644
--- a/simpleperf/event_selection_set.h
+++ b/simpleperf/event_selection_set.h
@@ -85,6 +85,7 @@
void EnableFpCallChainSampling();
bool EnableDwarfCallChainSampling(uint32_t dump_stack_size);
void SetInherit(bool enable);
+ void SetLowWatermark();
bool OpenEventFilesForCpus(const std::vector<int>& cpus);
bool OpenEventFilesForThreadsOnCpus(const std::vector<pid_t>& threads,
@@ -107,7 +108,6 @@
std::vector<EventSelectionGroup> groups_;
std::function<bool(Record*)> record_callback_;
- std::unique_ptr<RecordCache> record_cache_;
DISALLOW_COPY_AND_ASSIGN(EventSelectionSet);
};
diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp
index cf9a201..587d23d 100644
--- a/simpleperf/record.cpp
+++ b/simpleperf/record.cpp
@@ -206,6 +206,7 @@
}
uint64_t Record::Timestamp() const { return sample_id.time_data.time; }
+uint32_t Record::Cpu() const { return sample_id.cpu_data.cpu; }
void Record::UpdateBinary(const char* new_binary) {
if (own_binary_) {
@@ -595,6 +596,7 @@
}
uint64_t SampleRecord::Timestamp() const { return time_data.time; }
+uint32_t SampleRecord::Cpu() const { return cpu_data.cpu; }
BuildIdRecord::BuildIdRecord(const char* p) : Record(p) {
const char* end = p + size();
@@ -817,9 +819,7 @@
RecordHeader header(p);
CHECK_LE(p + header.size, end);
CHECK_NE(0u, header.size);
- char* binary = new char[header.size];
- memcpy(binary, p, header.size);
- result.push_back(ReadRecordFromOwnedBuffer(attr, header.type, binary));
+ result.push_back(ReadRecordFromBuffer(attr, header.type, p));
p += header.size;
}
return result;
@@ -896,3 +896,12 @@
}
return result;
}
+
+std::unique_ptr<Record> RecordCache::ForcedPop() {
+ if (queue_.empty()) {
+ return nullptr;
+ }
+ Record* r = queue_.top().record;
+ queue_.pop();
+ return std::unique_ptr<Record>(r);
+}
diff --git a/simpleperf/record.h b/simpleperf/record.h
index c4fabd3..abfc163 100644
--- a/simpleperf/record.h
+++ b/simpleperf/record.h
@@ -249,6 +249,7 @@
const char* Binary() const { return binary_; }
virtual uint64_t Timestamp() const;
+ virtual uint32_t Cpu() const;
protected:
void UpdateBinary(const char* new_binary);
@@ -385,6 +386,7 @@
SampleRecord(const perf_event_attr& attr, const char* p);
void ReplaceRegAndStackWithCallChain(const std::vector<uint64_t>& ips);
uint64_t Timestamp() const override;
+ uint32_t Cpu() const override;
uint64_t GetValidStackSize() const {
// If stack_user_data.dyn_size == 0, it may be because the kernel misses
@@ -520,6 +522,7 @@
void Push(std::vector<std::unique_ptr<Record>> records);
std::unique_ptr<Record> Pop();
std::vector<std::unique_ptr<Record>> PopAll();
+ std::unique_ptr<Record> ForcedPop();
private:
struct RecordWithSeq {
diff --git a/simpleperf/record_file.h b/simpleperf/record_file.h
index 89c93e2..89e66e5 100644
--- a/simpleperf/record_file.h
+++ b/simpleperf/record_file.h
@@ -46,6 +46,7 @@
bool WriteAttrSection(const std::vector<AttrWithId>& attr_ids);
bool WriteRecord(const Record& record);
+ bool SortDataSection();
bool WriteFeatureHeader(size_t feature_count);
bool WriteBuildIdFeature(const std::vector<BuildIdRecord>& build_id_records);
@@ -66,6 +67,8 @@
bool WriteFileHeader();
bool WriteData(const void* buf, size_t len);
bool Write(const void* buf, size_t len);
+ std::unique_ptr<Record> ReadRecordFromFile(FILE* fp, std::vector<char>& buf);
+ bool WriteRecordToFile(FILE* fp, std::unique_ptr<Record> r);
bool SeekFileEnd(uint64_t* file_end);
bool WriteFeatureBegin(uint64_t* start_offset);
bool WriteFeatureEnd(int feature, uint64_t start_offset);
diff --git a/simpleperf/record_file_writer.cpp b/simpleperf/record_file_writer.cpp
index 67c196f..767809c 100644
--- a/simpleperf/record_file_writer.cpp
+++ b/simpleperf/record_file_writer.cpp
@@ -22,11 +22,14 @@
#include <unistd.h>
#include <set>
#include <string>
+#include <unordered_map>
#include <vector>
#include <android-base/file.h>
#include <android-base/logging.h>
+#include <android-base/test_utils.h>
+#include "event_attr.h"
#include "perf_event.h"
#include "record.h"
#include "utils.h"
@@ -171,6 +174,126 @@
return true;
}
+std::unique_ptr<Record> RecordFileWriter::ReadRecordFromFile(FILE* fp, std::vector<char>& buf) {
+ if (buf.size() < sizeof(perf_event_header)) {
+ buf.resize(sizeof(perf_event_header));
+ }
+ auto pheader = reinterpret_cast<perf_event_header*>(buf.data());
+ if (fread(pheader, sizeof(*pheader), 1, fp) != 1) {
+ PLOG(ERROR) << "read failed";
+ return nullptr;
+ }
+ if (pheader->size > sizeof(*pheader)) {
+ if (pheader->size > buf.size()) {
+ buf.resize(pheader->size);
+ }
+ pheader = reinterpret_cast<perf_event_header*>(buf.data());
+ if (fread(pheader + 1, pheader->size - sizeof(*pheader), 1, fp) != 1) {
+ PLOG(ERROR) << "read failed";
+ return nullptr;
+ }
+ }
+ return ReadRecordFromBuffer(event_attr_, pheader->type, buf.data());
+}
+
+bool RecordFileWriter::WriteRecordToFile(FILE* fp, std::unique_ptr<Record> r) {
+ if (fwrite(r->Binary(), r->size(), 1, fp) != 1) {
+ PLOG(ERROR) << "write failed";
+ return false;
+ }
+ return true;
+}
+
+// SortDataSection() sorts records in data section in time order.
+// This method is suitable for the situation that there is only one buffer
+// between kernel and simpleperf for each cpu. The order of records in each
+// cpu buffer is already sorted, so we only need to merge records from different
+// cpu buffers.
+// 1. Create one temporary file for each cpu, and write records to different
+// temporary files according to their cpu value.
+// 2. Use RecordCache to merge records from different temporary files.
+bool RecordFileWriter::SortDataSection() {
+ if (!IsTimestampSupported(event_attr_) || !IsCpuSupported(event_attr_)) {
+ // Omit the sort if either timestamp or cpu is not recorded.
+ return true;
+ }
+ struct CpuData {
+ TemporaryFile tmpfile;
+ FILE* fp;
+ std::vector<char> buf;
+ uint64_t data_size;
+
+ CpuData() : data_size(0) {
+ fp = fdopen(tmpfile.fd, "web+");
+ }
+ ~CpuData() {
+ fclose(fp);
+ }
+ };
+ std::unordered_map<uint32_t, CpuData> cpu_map;
+ if (fseek(record_fp_, data_section_offset_, SEEK_SET) == -1) {
+ PLOG(ERROR) << "fseek() failed";
+ return false;
+ }
+ uint64_t cur_size = 0;
+ std::vector<char> global_buf;
+ while (cur_size < data_section_size_) {
+ std::unique_ptr<Record> r = ReadRecordFromFile(record_fp_, global_buf);
+ if (r == nullptr) {
+ return false;
+ }
+ cur_size += r->size();
+ CpuData& cpu_data = cpu_map[r->Cpu()];
+ if (cpu_data.fp == nullptr) {
+ PLOG(ERROR) << "failed to open tmpfile";
+ return false;
+ }
+ cpu_data.data_size += r->size();
+ if (!WriteRecordToFile(cpu_data.fp, std::move(r))) {
+ return false;
+ }
+ }
+ if (fseek(record_fp_, data_section_offset_, SEEK_SET) == -1) {
+ PLOG(ERROR) << "fseek() failed";
+ return false;
+ }
+ RecordCache global_cache(true);
+ for (auto it = cpu_map.begin(); it != cpu_map.end(); ++it) {
+ if (fseek(it->second.fp, 0, SEEK_SET) == -1) {
+ PLOG(ERROR) << "fseek() failed";
+ return false;
+ }
+ std::unique_ptr<Record> r = ReadRecordFromFile(it->second.fp, it->second.buf);
+ if (r == nullptr) {
+ return false;
+ }
+ it->second.data_size -= r->size();
+ global_cache.Push(std::move(r));
+ }
+ while (true) {
+ std::unique_ptr<Record> r = global_cache.ForcedPop();
+ if (r == nullptr) {
+ break;
+ }
+ uint32_t cpu = r->Cpu();
+ if (!WriteRecordToFile(record_fp_, std::move(r))) {
+ return false;
+ }
+ // Each time writing one record of a cpu, push the next record from the
+ // temporary file belong to that cpu into the record cache.
+ CpuData& cpu_data = cpu_map[cpu];
+ if (cpu_data.data_size > 0) {
+ r = ReadRecordFromFile(cpu_data.fp, cpu_data.buf);
+ if (r == nullptr) {
+ return false;
+ }
+ cpu_data.data_size -= r->size();
+ global_cache.Push(std::move(r));
+ }
+ }
+ return true;
+}
+
bool RecordFileWriter::SeekFileEnd(uint64_t* file_end) {
if (fseek(record_fp_, 0, SEEK_END) == -1) {
PLOG(ERROR) << "fseek() failed";