blob: a36cab3d3829d9c120199d05a1557056ce61491d [file] [log] [blame]
Mathias Agopian61fd2ab2016-08-16 19:42:42 -07001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef TNT_UTILS_PROFILER_H
18#define TNT_UTILS_PROFILER_H
19
20#include <assert.h>
21#include <stdint.h>
22
23#include <array>
24#include <chrono>
25
26#if defined(__linux__)
27# include <unistd.h>
28# include <sys/ioctl.h>
29# include <linux/perf_event.h>
30#endif
31
32namespace utils {
33
34class Profiler {
35 enum {
36 INSTRUCTIONS = 0, // must be zero
37 CPU_CYCLES = 1,
38 DCACHE_REFS = 2,
39 DCACHE_MISSES = 3,
40 BRANCHES = 4,
41 BRANCH_MISSES = 5,
42 ICACHE_REFS = 6,
43 ICACHE_MISSES = 7,
44
45 // Must be last one
46 EVENT_COUNT
47 };
48
49public:
50
51 enum {
52 EV_CPU_CYCLES = 1 << CPU_CYCLES,
53 EV_L1D_REFS = 1 << DCACHE_REFS,
54 EV_L1D_MISSES = 1 << DCACHE_MISSES,
55 EV_BPU_REFS = 1 << BRANCHES,
56 EV_BPU_MISSES = 1 << BRANCH_MISSES,
57 EV_L1I_REFS = 1 << ICACHE_REFS,
58 EV_L1I_MISSES = 1 << ICACHE_MISSES,
59 // helpers
60 EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES,
61 EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES,
62 EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES,
63 };
64
65 static Profiler& get() noexcept;
66
67
68 Profiler(const Profiler& rhs) = delete;
69 Profiler(Profiler&& rhs) = delete;
70 Profiler& operator=(const Profiler& rhs) = delete;
71 Profiler& operator=(Profiler&& rhs) = delete;
72
73 // selects which events are enabled.
74 // By Default: EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES
75 uint32_t resetEvents(uint32_t eventMask) noexcept;
76
77 uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; }
78
79 // could return false if performance counters are not supported/enabled
80 bool isValid() const { return mCountersFd[0] >= 0; }
81
82 class Counters {
83 friend class Profiler;
84 uint64_t nr;
85 uint64_t time_enabled;
86 uint64_t time_running;
87 struct {
88 uint64_t value;
89 uint64_t id;
90 } counters[Profiler::EVENT_COUNT];
91
92 friend Counters operator-(Counters lhs, const Counters& rhs) noexcept {
93 lhs.nr -= rhs.nr;
94 lhs.time_enabled -= rhs.time_enabled;
95 lhs.time_running -= rhs.time_running;
96 for (size_t i=0 ; i<EVENT_COUNT ; ++i) {
97 lhs.counters[i].value -= rhs.counters[i].value;
98 }
99 return lhs;
100 }
101
102 public:
103 uint64_t getInstructions() const { return counters[INSTRUCTIONS].value; }
104 uint64_t getCpuCycles() const { return counters[CPU_CYCLES].value; }
105 uint64_t getL1DReferences() const { return counters[DCACHE_REFS].value; }
106 uint64_t getL1DMisses() const { return counters[DCACHE_MISSES].value; }
107 uint64_t getL1IReferences() const { return counters[ICACHE_REFS].value; }
108 uint64_t getL1IMisses() const { return counters[ICACHE_MISSES].value; }
109 uint64_t getBranchInstructions() const { return counters[BRANCHES].value; }
110 uint64_t getBranchMisses() const { return counters[BRANCH_MISSES].value; }
111
112 std::chrono::duration<uint64_t, std::nano> getWallTime() const {
113 return std::chrono::duration<uint64_t, std::nano>(time_enabled);
114 }
115
116 std::chrono::duration<uint64_t, std::nano> getRunningTime() const {
117 return std::chrono::duration<uint64_t, std::nano>(time_running);
118 }
119
120 double getIPC() const noexcept {
121 uint64_t cpuCycles = getCpuCycles();
122 uint64_t instructions = getInstructions();
123 return double(instructions) / double(cpuCycles);
124 }
125
126 double getCPI() const noexcept {
127 uint64_t cpuCycles = getCpuCycles();
128 uint64_t instructions = getInstructions();
129 return double(cpuCycles) / double(instructions);
130 }
131
132 double getL1DMissRate() const noexcept {
133 uint64_t cacheReferences = getL1DReferences();
134 uint64_t cacheMisses = getL1DMisses();
135 return double(cacheMisses) / double(cacheReferences);
136 }
137
138 double getL1DHitRate() const noexcept {
139 return 1.0 - getL1DMissRate();
140 }
141
142 double getL1IMissRate() const noexcept {
143 uint64_t cacheReferences = getL1IReferences();
144 uint64_t cacheMisses = getL1IMisses();
145 return double(cacheMisses) / double(cacheReferences);
146 }
147
148 double getL1IHitRate() const noexcept {
149 return 1.0 - getL1IMissRate();
150 }
151
152 double getBranchMissRate() const noexcept {
153 uint64_t branchReferences = getBranchInstructions();
154 uint64_t branchMisses = getBranchMisses();
155 return double(branchMisses) / double(branchReferences);
156 }
157
158 double getBranchHitRate() const noexcept {
159 return 1.0 - getBranchMissRate();
160 }
161
Mathias Agopiane8068292016-08-24 14:42:30 -0700162 double getMPKI(uint64_t misses) const noexcept {
163 return (misses * 1000.0) / getInstructions();
164 }
165
Mathias Agopian61fd2ab2016-08-16 19:42:42 -0700166 };
167
168#if defined(__linux__)
169
170 void reset() noexcept {
171 int fd = mCountersFd[0];
172 ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
173 }
174
175 void start() noexcept {
176 int fd = mCountersFd[0];
177 ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
178 }
179
180 void stop() noexcept {
181 int fd = mCountersFd[0];
182 ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
183 }
184
185 void readCounters(Counters* outCounters) noexcept {
186 Counters counters;
187 ssize_t n = read(mCountersFd[0], &counters, sizeof(Counters));
188 memset(outCounters, 0, sizeof(Counters));
189 if (n > 0) {
190 outCounters->nr = counters.nr;
191 outCounters->time_enabled = counters.time_enabled;
192 outCounters->time_running = counters.time_running;
193 for (size_t i=0 ; i<size_t(EVENT_COUNT) ; i++) {
194 if (mCountersFd[i] >= 0) {
195 outCounters->counters[i] = counters.counters[mIds[i]];
196 }
197 }
198 }
199 }
200
201#else // !__linux__
202
203 void reset() noexcept { }
204 void start() noexcept { }
205 void stop() noexcept { }
206 void readCounters(Counters* counters) noexcept { }
207
208#endif // __linux__
209
210 bool hasBranchRates() const noexcept {
211 return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0);
212 }
213
214 bool hasICacheRates() const noexcept {
215 return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0);
216 }
217
218private:
219 Profiler() noexcept;
220 ~Profiler() noexcept;
221
222 std::array<uint8_t, EVENT_COUNT> mIds;
223 std::array<int, EVENT_COUNT> mCountersFd;
224 uint32_t mEnabledEvents = 0;
225};
226
227} // namespace utils
228
229#endif // TNT_UTILS_PROFILER_H