Mathias Agopian | 61fd2ab | 2016-08-16 19:42:42 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef TNT_UTILS_PROFILER_H |
| 18 | #define TNT_UTILS_PROFILER_H |
| 19 | |
| 20 | #include <assert.h> |
| 21 | #include <stdint.h> |
| 22 | |
| 23 | #include <array> |
| 24 | #include <chrono> |
| 25 | |
| 26 | #if defined(__linux__) |
| 27 | # include <unistd.h> |
| 28 | # include <sys/ioctl.h> |
| 29 | # include <linux/perf_event.h> |
| 30 | #endif |
| 31 | |
| 32 | namespace utils { |
| 33 | |
| 34 | class Profiler { |
| 35 | enum { |
| 36 | INSTRUCTIONS = 0, // must be zero |
| 37 | CPU_CYCLES = 1, |
| 38 | DCACHE_REFS = 2, |
| 39 | DCACHE_MISSES = 3, |
| 40 | BRANCHES = 4, |
| 41 | BRANCH_MISSES = 5, |
| 42 | ICACHE_REFS = 6, |
| 43 | ICACHE_MISSES = 7, |
| 44 | |
| 45 | // Must be last one |
| 46 | EVENT_COUNT |
| 47 | }; |
| 48 | |
| 49 | public: |
| 50 | |
| 51 | enum { |
| 52 | EV_CPU_CYCLES = 1 << CPU_CYCLES, |
| 53 | EV_L1D_REFS = 1 << DCACHE_REFS, |
| 54 | EV_L1D_MISSES = 1 << DCACHE_MISSES, |
| 55 | EV_BPU_REFS = 1 << BRANCHES, |
| 56 | EV_BPU_MISSES = 1 << BRANCH_MISSES, |
| 57 | EV_L1I_REFS = 1 << ICACHE_REFS, |
| 58 | EV_L1I_MISSES = 1 << ICACHE_MISSES, |
| 59 | // helpers |
| 60 | EV_L1D_RATES = EV_L1D_REFS | EV_L1D_MISSES, |
| 61 | EV_L1I_RATES = EV_L1I_REFS | EV_L1I_MISSES, |
| 62 | EV_BPU_RATES = EV_BPU_REFS | EV_BPU_MISSES, |
| 63 | }; |
| 64 | |
| 65 | static Profiler& get() noexcept; |
| 66 | |
| 67 | |
| 68 | Profiler(const Profiler& rhs) = delete; |
| 69 | Profiler(Profiler&& rhs) = delete; |
| 70 | Profiler& operator=(const Profiler& rhs) = delete; |
| 71 | Profiler& operator=(Profiler&& rhs) = delete; |
| 72 | |
| 73 | // selects which events are enabled. |
| 74 | // By Default: EV_CPU_CYCLES | EV_L1D_RATES | EV_BPU_RATES |
| 75 | uint32_t resetEvents(uint32_t eventMask) noexcept; |
| 76 | |
| 77 | uint32_t getEnabledEvents() const noexcept { return mEnabledEvents; } |
| 78 | |
| 79 | // could return false if performance counters are not supported/enabled |
| 80 | bool isValid() const { return mCountersFd[0] >= 0; } |
| 81 | |
| 82 | class Counters { |
| 83 | friend class Profiler; |
| 84 | uint64_t nr; |
| 85 | uint64_t time_enabled; |
| 86 | uint64_t time_running; |
| 87 | struct { |
| 88 | uint64_t value; |
| 89 | uint64_t id; |
| 90 | } counters[Profiler::EVENT_COUNT]; |
| 91 | |
| 92 | friend Counters operator-(Counters lhs, const Counters& rhs) noexcept { |
| 93 | lhs.nr -= rhs.nr; |
| 94 | lhs.time_enabled -= rhs.time_enabled; |
| 95 | lhs.time_running -= rhs.time_running; |
| 96 | for (size_t i=0 ; i<EVENT_COUNT ; ++i) { |
| 97 | lhs.counters[i].value -= rhs.counters[i].value; |
| 98 | } |
| 99 | return lhs; |
| 100 | } |
| 101 | |
| 102 | public: |
| 103 | uint64_t getInstructions() const { return counters[INSTRUCTIONS].value; } |
| 104 | uint64_t getCpuCycles() const { return counters[CPU_CYCLES].value; } |
| 105 | uint64_t getL1DReferences() const { return counters[DCACHE_REFS].value; } |
| 106 | uint64_t getL1DMisses() const { return counters[DCACHE_MISSES].value; } |
| 107 | uint64_t getL1IReferences() const { return counters[ICACHE_REFS].value; } |
| 108 | uint64_t getL1IMisses() const { return counters[ICACHE_MISSES].value; } |
| 109 | uint64_t getBranchInstructions() const { return counters[BRANCHES].value; } |
| 110 | uint64_t getBranchMisses() const { return counters[BRANCH_MISSES].value; } |
| 111 | |
| 112 | std::chrono::duration<uint64_t, std::nano> getWallTime() const { |
| 113 | return std::chrono::duration<uint64_t, std::nano>(time_enabled); |
| 114 | } |
| 115 | |
| 116 | std::chrono::duration<uint64_t, std::nano> getRunningTime() const { |
| 117 | return std::chrono::duration<uint64_t, std::nano>(time_running); |
| 118 | } |
| 119 | |
| 120 | double getIPC() const noexcept { |
| 121 | uint64_t cpuCycles = getCpuCycles(); |
| 122 | uint64_t instructions = getInstructions(); |
| 123 | return double(instructions) / double(cpuCycles); |
| 124 | } |
| 125 | |
| 126 | double getCPI() const noexcept { |
| 127 | uint64_t cpuCycles = getCpuCycles(); |
| 128 | uint64_t instructions = getInstructions(); |
| 129 | return double(cpuCycles) / double(instructions); |
| 130 | } |
| 131 | |
| 132 | double getL1DMissRate() const noexcept { |
| 133 | uint64_t cacheReferences = getL1DReferences(); |
| 134 | uint64_t cacheMisses = getL1DMisses(); |
| 135 | return double(cacheMisses) / double(cacheReferences); |
| 136 | } |
| 137 | |
| 138 | double getL1DHitRate() const noexcept { |
| 139 | return 1.0 - getL1DMissRate(); |
| 140 | } |
| 141 | |
| 142 | double getL1IMissRate() const noexcept { |
| 143 | uint64_t cacheReferences = getL1IReferences(); |
| 144 | uint64_t cacheMisses = getL1IMisses(); |
| 145 | return double(cacheMisses) / double(cacheReferences); |
| 146 | } |
| 147 | |
| 148 | double getL1IHitRate() const noexcept { |
| 149 | return 1.0 - getL1IMissRate(); |
| 150 | } |
| 151 | |
| 152 | double getBranchMissRate() const noexcept { |
| 153 | uint64_t branchReferences = getBranchInstructions(); |
| 154 | uint64_t branchMisses = getBranchMisses(); |
| 155 | return double(branchMisses) / double(branchReferences); |
| 156 | } |
| 157 | |
| 158 | double getBranchHitRate() const noexcept { |
| 159 | return 1.0 - getBranchMissRate(); |
| 160 | } |
| 161 | |
Mathias Agopian | e806829 | 2016-08-24 14:42:30 -0700 | [diff] [blame] | 162 | double getMPKI(uint64_t misses) const noexcept { |
| 163 | return (misses * 1000.0) / getInstructions(); |
| 164 | } |
| 165 | |
Mathias Agopian | 61fd2ab | 2016-08-16 19:42:42 -0700 | [diff] [blame] | 166 | }; |
| 167 | |
| 168 | #if defined(__linux__) |
| 169 | |
| 170 | void reset() noexcept { |
| 171 | int fd = mCountersFd[0]; |
| 172 | ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); |
| 173 | } |
| 174 | |
| 175 | void start() noexcept { |
| 176 | int fd = mCountersFd[0]; |
| 177 | ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); |
| 178 | } |
| 179 | |
| 180 | void stop() noexcept { |
| 181 | int fd = mCountersFd[0]; |
| 182 | ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); |
| 183 | } |
| 184 | |
| 185 | void readCounters(Counters* outCounters) noexcept { |
| 186 | Counters counters; |
| 187 | ssize_t n = read(mCountersFd[0], &counters, sizeof(Counters)); |
| 188 | memset(outCounters, 0, sizeof(Counters)); |
| 189 | if (n > 0) { |
| 190 | outCounters->nr = counters.nr; |
| 191 | outCounters->time_enabled = counters.time_enabled; |
| 192 | outCounters->time_running = counters.time_running; |
| 193 | for (size_t i=0 ; i<size_t(EVENT_COUNT) ; i++) { |
| 194 | if (mCountersFd[i] >= 0) { |
| 195 | outCounters->counters[i] = counters.counters[mIds[i]]; |
| 196 | } |
| 197 | } |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | #else // !__linux__ |
| 202 | |
| 203 | void reset() noexcept { } |
| 204 | void start() noexcept { } |
| 205 | void stop() noexcept { } |
| 206 | void readCounters(Counters* counters) noexcept { } |
| 207 | |
| 208 | #endif // __linux__ |
| 209 | |
| 210 | bool hasBranchRates() const noexcept { |
| 211 | return (mCountersFd[BRANCHES] >= 0) && (mCountersFd[BRANCH_MISSES] >= 0); |
| 212 | } |
| 213 | |
| 214 | bool hasICacheRates() const noexcept { |
| 215 | return (mCountersFd[ICACHE_REFS] >= 0) && (mCountersFd[ICACHE_MISSES] >= 0); |
| 216 | } |
| 217 | |
| 218 | private: |
| 219 | Profiler() noexcept; |
| 220 | ~Profiler() noexcept; |
| 221 | |
| 222 | std::array<uint8_t, EVENT_COUNT> mIds; |
| 223 | std::array<int, EVENT_COUNT> mCountersFd; |
| 224 | uint32_t mEnabledEvents = 0; |
| 225 | }; |
| 226 | |
| 227 | } // namespace utils |
| 228 | |
| 229 | #endif // TNT_UTILS_PROFILER_H |