Yabin Cui | 4176ccf | 2017-12-08 13:12:34 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "CallChainJoiner.h" |
| 18 | |
| 19 | #include <android-base/logging.h> |
| 20 | |
| 21 | #include "environment.h" |
| 22 | #include "utils.h" |
| 23 | |
| 24 | namespace simpleperf { |
| 25 | namespace call_chain_joiner_impl { |
| 26 | |
| 27 | LRUCache::LRUCache(size_t cache_size, size_t matched_node_count_to_extend_callchain) |
| 28 | : node_set_(1024 * 16, CacheNodeHash, CacheNodeEqual) { |
| 29 | cache_stat_.cache_size = cache_size; |
| 30 | cache_stat_.max_node_count = cache_size / sizeof(CacheNode); |
| 31 | CHECK_GE(cache_stat_.max_node_count, 2u); |
| 32 | CHECK_GE(matched_node_count_to_extend_callchain, 1u); |
| 33 | cache_stat_.matched_node_count_to_extend_callchain = matched_node_count_to_extend_callchain; |
| 34 | nodes_ = new CacheNode[cache_stat_.max_node_count + 1]; // with 1 sentinel node |
| 35 | // nodes_[0] is the sentinel node of the LRU linked list. |
| 36 | nodes_[0].is_leaf = 1; |
| 37 | nodes_[0].parent_index = 0; |
| 38 | nodes_[0].leaf_link_prev = nodes_[0].leaf_link_next = 0; |
| 39 | } |
| 40 | |
| 41 | LRUCache::~LRUCache() { |
| 42 | delete[] nodes_; |
| 43 | } |
| 44 | |
| 45 | void LRUCache::AddCallChain(pid_t tid, std::vector<uint64_t>& ips, std::vector<uint64_t>& sps) { |
| 46 | // 1. Build current call chain. |
| 47 | std::vector<CacheNode*> chain; |
| 48 | for (size_t i = 0; i < ips.size(); ++i) { |
| 49 | CacheNode* node = GetNode(tid, ips[i], sps[i]); |
| 50 | chain.push_back(node); |
| 51 | } |
| 52 | |
| 53 | // 2. Check if the (tid, ip, sp) tuples of the top [matched_node_count_to_extend_callchain] |
| 54 | // nodes of current call chain exist in the cache and have the same relation as in current |
| 55 | // call chain. If true, we can extend current call chain. |
| 56 | bool can_extend = true; |
| 57 | if (chain.size() < cache_stat_.matched_node_count_to_extend_callchain) { |
| 58 | can_extend = false; |
| 59 | } else { |
| 60 | size_t chain_pos = chain.size() - 2; |
| 61 | for (size_t i = 1; i < cache_stat_.matched_node_count_to_extend_callchain; ++i) { |
| 62 | if (GetParent(chain[chain_pos]) != chain[chain_pos + 1]) { |
| 63 | can_extend = false; |
| 64 | break; |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | std::vector<uint64_t> origin_ip = ips; |
| 69 | |
| 70 | // 3. Add current call chain to the cache. |
| 71 | for (size_t i = 0; i + 1 < chain.size(); ++i) { |
| 72 | LinkParent(chain[i], chain[i + 1]); |
| 73 | } |
| 74 | // 4. Optionally extend current call chain. |
| 75 | if (can_extend) { |
| 76 | CacheNode* top = chain.back(); |
| 77 | while ((top = GetParent(top)) != nullptr) { |
| 78 | if (top->sp == chain.back()->sp) { |
| 79 | // This is to prevent a loop case as below, which is unlikely to happen. |
| 80 | // The cache has node A.parent = B, while current call chain has node B.parent = A. |
| 81 | // This makes a loop of A -> B -> A -> B... |
| 82 | bool has_loop = false; |
| 83 | for (auto it = chain.rbegin(); it != chain.rend() && (*it)->sp == top->sp; ++it) { |
| 84 | if ((*it)->ip == top->ip) { |
| 85 | has_loop = true; |
| 86 | break; |
| 87 | } |
| 88 | } |
| 89 | if (has_loop) { |
| 90 | UnlinkParent(chain.back()); |
| 91 | ips.resize(chain.size()); |
| 92 | sps.resize(chain.size()); |
| 93 | break; |
| 94 | } |
| 95 | } |
| 96 | ips.push_back(top->ip); |
| 97 | sps.push_back(top->sp); |
| 98 | } |
| 99 | } else { |
| 100 | UnlinkParent(chain.back()); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | bool LRUCache::CacheNodeEqual(const CacheNode* n1, const CacheNode* n2) { |
| 105 | return n1->tid == n2->tid && n1->ip == n2->ip && n1->sp == n2->sp; |
| 106 | } |
| 107 | |
| 108 | size_t LRUCache::CacheNodeHash(const CacheNode* n) { |
Yabin Cui | 35b2d16 | 2018-06-28 16:47:33 -0700 | [diff] [blame] | 109 | return static_cast<size_t>(n->tid ^ n->ip ^ n->sp); |
Yabin Cui | 4176ccf | 2017-12-08 13:12:34 -0800 | [diff] [blame] | 110 | } |
| 111 | |
| 112 | CacheNode* LRUCache::GetNode(uint32_t tid, uint64_t ip, uint64_t sp) { |
| 113 | CacheNode* node = FindNode(tid, ip, sp); |
| 114 | if (node != nullptr) { |
| 115 | if (node->is_leaf) { |
| 116 | // Update the node's position in the LRU linked list. |
| 117 | RemoveNodeFromLRUList(node); |
| 118 | AppendNodeToLRUList(node); |
| 119 | } |
| 120 | return node; |
| 121 | } |
| 122 | node = AllocNode(); |
| 123 | node->tid = tid; |
| 124 | node->ip = ip; |
| 125 | node->sp = sp; |
| 126 | node->is_leaf = 1; |
| 127 | node->parent_index = 0; |
| 128 | node->leaf_link_prev = node->leaf_link_next = GetNodeIndex(node); |
| 129 | node_set_.insert(node); |
| 130 | AppendNodeToLRUList(node); |
| 131 | return node; |
| 132 | } |
| 133 | |
| 134 | CacheNode* LRUCache::AllocNode() { |
| 135 | if (cache_stat_.used_node_count < cache_stat_.max_node_count) { |
| 136 | return &nodes_[++cache_stat_.used_node_count]; |
| 137 | } |
| 138 | // Recycle the node at the front of the LRU linked list. |
| 139 | CacheNode* node = &nodes_[nodes_->leaf_link_next]; |
| 140 | RemoveNodeFromLRUList(node); |
| 141 | node_set_.erase(node); |
| 142 | CacheNode* parent = GetParent(node); |
| 143 | if (parent != nullptr) { |
| 144 | DecreaseChildCountOfNode(parent); |
| 145 | } |
| 146 | cache_stat_.recycled_node_count++; |
| 147 | return node; |
| 148 | } |
| 149 | |
| 150 | void LRUCache::LinkParent(CacheNode* child, CacheNode* new_parent) { |
| 151 | CacheNode* old_parent = GetParent(child); |
| 152 | if (old_parent != nullptr) { |
| 153 | DecreaseChildCountOfNode(old_parent); |
| 154 | } |
| 155 | child->parent_index = GetNodeIndex(new_parent); |
| 156 | if (new_parent->is_leaf) { |
| 157 | RemoveNodeFromLRUList(new_parent); |
| 158 | new_parent->is_leaf = 0; |
| 159 | new_parent->children_count = 1; |
| 160 | } else { |
| 161 | new_parent->children_count++; |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | void LRUCache::UnlinkParent(CacheNode* child) { |
| 166 | CacheNode* old_parent = GetParent(child); |
| 167 | if (old_parent != nullptr) { |
| 168 | DecreaseChildCountOfNode(old_parent); |
| 169 | } |
| 170 | child->parent_index = 0; |
| 171 | } |
| 172 | |
| 173 | } // call_chain_joiner_impl |
| 174 | |
| 175 | using namespace call_chain_joiner_impl; |
| 176 | |
| 177 | static bool WriteCallChain(FILE* fp, pid_t pid, pid_t tid, CallChainJoiner::ChainType type, |
| 178 | const std::vector<uint64_t>& ips, |
| 179 | const std::vector<uint64_t>& sps, |
| 180 | size_t ip_count) { |
| 181 | // Below is the content of a call chain stored in file. |
| 182 | // uint32_t pid; |
| 183 | // uint32_t tid; |
| 184 | // uint32_t chain_type; |
| 185 | // uint32_t ip_count; |
| 186 | // uint64_t ips[]; |
| 187 | // uint64_t sps[]; |
| 188 | // uint32_t size; |
| 189 | uint32_t size = 4 * sizeof(uint32_t) + sizeof(uint64_t) * ip_count * 2 + sizeof(uint32_t); |
| 190 | std::vector<char> data(size); |
| 191 | char* p = data.data(); |
| 192 | MoveToBinaryFormat(pid, p); |
| 193 | MoveToBinaryFormat(tid, p); |
| 194 | MoveToBinaryFormat(type, p); |
| 195 | MoveToBinaryFormat(static_cast<uint32_t>(ip_count), p); |
| 196 | MoveToBinaryFormat(ips.data(), ip_count, p); |
| 197 | MoveToBinaryFormat(sps.data(), ip_count, p); |
| 198 | MoveToBinaryFormat(size, p); |
| 199 | if (fwrite(data.data(), size, 1, fp) != 1) { |
| 200 | PLOG(ERROR) << "fwrite"; |
| 201 | return false; |
| 202 | } |
| 203 | return true; |
| 204 | } |
| 205 | |
| 206 | static bool ReadCallChain(FILE* fp, pid_t& pid, pid_t& tid, CallChainJoiner::ChainType& type, |
| 207 | std::vector<uint64_t>& ips, std::vector<uint64_t>& sps) { |
| 208 | std::vector<char> data(4 * sizeof(uint32_t)); |
| 209 | if (fread(data.data(), data.size(), 1, fp) != 1) { |
| 210 | PLOG(ERROR) << "fread"; |
| 211 | return false; |
| 212 | } |
| 213 | const char* p = data.data(); |
| 214 | MoveFromBinaryFormat(pid, p); |
| 215 | MoveFromBinaryFormat(tid, p); |
| 216 | MoveFromBinaryFormat(type, p); |
| 217 | uint32_t ip_count; |
| 218 | MoveFromBinaryFormat(ip_count, p); |
| 219 | data.resize(sizeof(uint64_t) * ip_count * 2 + sizeof(uint32_t)); |
| 220 | if (fread(data.data(), data.size(), 1, fp) != 1) { |
| 221 | PLOG(ERROR) << "fread"; |
| 222 | return false; |
| 223 | } |
| 224 | p = data.data(); |
| 225 | ips.resize(ip_count); |
| 226 | MoveFromBinaryFormat(ips.data(), ip_count, p); |
| 227 | sps.resize(ip_count); |
| 228 | MoveFromBinaryFormat(sps.data(), ip_count, p); |
| 229 | return true; |
| 230 | } |
| 231 | |
| 232 | static bool ReadCallChainInReverseOrder(FILE* fp, pid_t& pid, pid_t& tid, |
| 233 | CallChainJoiner::ChainType& type, |
| 234 | std::vector<uint64_t>& ips, |
| 235 | std::vector<uint64_t>& sps) { |
| 236 | uint32_t size; |
| 237 | if (fseek(fp, -4, SEEK_CUR) != 0 || fread(&size, sizeof(size), 1, fp) != 1) { |
| 238 | PLOG(ERROR) << "fread"; |
| 239 | return false; |
| 240 | } |
| 241 | std::vector<char> data(size - 4); |
| 242 | if (fseek(fp, -static_cast<int>(size), SEEK_CUR) != 0 || |
| 243 | fread(data.data(), data.size(), 1, fp) != 1 || |
| 244 | fseek(fp, -static_cast<int>(data.size()), SEEK_CUR) != 0) { |
| 245 | PLOG(ERROR) << "fread"; |
| 246 | return false; |
| 247 | } |
| 248 | const char* p = data.data(); |
| 249 | MoveFromBinaryFormat(pid, p); |
| 250 | MoveFromBinaryFormat(tid, p); |
| 251 | MoveFromBinaryFormat(type, p); |
| 252 | uint32_t ip_count; |
| 253 | MoveFromBinaryFormat(ip_count, p); |
| 254 | ips.resize(ip_count); |
| 255 | MoveFromBinaryFormat(ips.data(), ip_count, p); |
| 256 | sps.resize(ip_count); |
| 257 | MoveFromBinaryFormat(sps.data(), ip_count, p); |
| 258 | return true; |
| 259 | } |
| 260 | |
| 261 | static FILE* CreateTempFp() { |
Yabin Cui | c68e66d | 2018-03-07 15:47:15 -0800 | [diff] [blame] | 262 | std::unique_ptr<TemporaryFile> tmpfile = ScopedTempFiles::CreateTempFile(); |
Yabin Cui | 4176ccf | 2017-12-08 13:12:34 -0800 | [diff] [blame] | 263 | FILE* fp = fdopen(tmpfile->release(), "web+"); |
| 264 | if (fp == nullptr) { |
| 265 | PLOG(ERROR) << "fdopen"; |
| 266 | return nullptr; |
| 267 | } |
| 268 | return fp; |
| 269 | } |
| 270 | |
| 271 | CallChainJoiner::CallChainJoiner(size_t cache_size, size_t matched_node_count_to_extend_callchain, |
| 272 | bool keep_original_callchains) |
| 273 | : keep_original_callchains_(keep_original_callchains), |
| 274 | original_chains_fp_(nullptr), |
| 275 | joined_chains_fp_(nullptr), |
| 276 | next_chain_index_(0u) { |
| 277 | cache_stat_.cache_size = cache_size; |
| 278 | cache_stat_.matched_node_count_to_extend_callchain = matched_node_count_to_extend_callchain; |
| 279 | } |
| 280 | |
| 281 | CallChainJoiner::~CallChainJoiner() { |
| 282 | if (original_chains_fp_ != nullptr) { |
| 283 | fclose(original_chains_fp_); |
| 284 | } |
| 285 | if (joined_chains_fp_ != nullptr) { |
| 286 | fclose(joined_chains_fp_); |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | bool CallChainJoiner::AddCallChain(pid_t pid, pid_t tid, ChainType type, |
| 291 | const std::vector<uint64_t>& ips, |
| 292 | const std::vector<uint64_t>& sps) { |
| 293 | CHECK_EQ(ips.size(), sps.size()); |
| 294 | CHECK_GT(ips.size(), 0u); |
| 295 | CHECK(type == ORIGINAL_OFFLINE || type == ORIGINAL_REMOTE); |
| 296 | // Make sure sps are in non-decreasing order, and there is no duplicated items. |
| 297 | size_t ip_count = ips.size(); |
| 298 | for (size_t i = 1; i < ips.size(); ++i) { |
| 299 | if (sps[i] < sps[i - 1]) { |
| 300 | ip_count = i; |
| 301 | break; |
| 302 | } else if (sps[i] == sps[i - 1]) { |
| 303 | bool has_duplicated_items = false; |
| 304 | for (size_t j = i; j > 0 && sps[j - 1] == sps[i]; --j) { |
| 305 | if (ips[j - 1] == ips[i]) { |
| 306 | has_duplicated_items = true; |
| 307 | break; |
| 308 | } |
| 309 | } |
| 310 | if (has_duplicated_items) { |
| 311 | ip_count = i; |
| 312 | break; |
| 313 | } |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | if (original_chains_fp_ == nullptr) { |
| 318 | original_chains_fp_ = CreateTempFp(); |
| 319 | if (original_chains_fp_ == nullptr) { |
| 320 | return false; |
| 321 | } |
| 322 | } |
| 323 | stat_.chain_count++; |
| 324 | return WriteCallChain(original_chains_fp_, pid, tid, type, ips, sps, ip_count); |
| 325 | } |
| 326 | |
| 327 | bool CallChainJoiner::JoinCallChains() { |
| 328 | if (stat_.chain_count == 0u) { |
| 329 | return true; |
| 330 | } |
| 331 | LRUCache cache(cache_stat_.cache_size, cache_stat_.matched_node_count_to_extend_callchain); |
| 332 | std::unique_ptr<FILE, decltype(&fclose)> tmp_fp(CreateTempFp(), fclose); |
| 333 | if (!tmp_fp) { |
| 334 | return false; |
| 335 | } |
| 336 | joined_chains_fp_ = CreateTempFp(); |
| 337 | if (joined_chains_fp_ == nullptr) { |
| 338 | return false; |
| 339 | } |
| 340 | pid_t pid; |
| 341 | pid_t tid; |
| 342 | ChainType type; |
| 343 | std::vector<uint64_t> ips; |
| 344 | std::vector<uint64_t> sps; |
| 345 | if (fseek(original_chains_fp_, 0, SEEK_END) != 0) { |
| 346 | PLOG(ERROR) << "fseek"; |
| 347 | return false; |
| 348 | } |
| 349 | std::vector<std::pair<FILE*, FILE*>> file_pairs = { |
| 350 | std::make_pair(original_chains_fp_, tmp_fp.get()), |
| 351 | std::make_pair(tmp_fp.get(), joined_chains_fp_) |
| 352 | }; |
| 353 | for (size_t pass = 0; pass < 2u; ++pass) { |
| 354 | auto& pair = file_pairs[pass]; |
| 355 | for (size_t i = 0; i < stat_.chain_count; ++i) { |
| 356 | if (!ReadCallChainInReverseOrder(pair.first, pid, tid, type, ips, sps)) { |
| 357 | return false; |
| 358 | } |
| 359 | if (pass == 0u) { |
| 360 | if (type == ORIGINAL_OFFLINE) { |
| 361 | type = JOINED_OFFLINE; |
| 362 | } else if (type == ORIGINAL_REMOTE) { |
| 363 | type = JOINED_REMOTE; |
| 364 | } |
| 365 | stat_.before_join_node_count += ips.size(); |
| 366 | } |
| 367 | |
| 368 | cache.AddCallChain(tid, ips, sps); |
| 369 | |
| 370 | if (pass == 1u) { |
| 371 | stat_.after_join_node_count += ips.size(); |
| 372 | stat_.after_join_max_chain_length = std::max(stat_.after_join_max_chain_length, ips.size()); |
| 373 | } |
| 374 | |
| 375 | if (!WriteCallChain(pair.second, pid, tid, type, ips, sps, ips.size())) { |
| 376 | return false; |
| 377 | } |
| 378 | } |
| 379 | } |
| 380 | cache_stat_ = cache.Stat(); |
| 381 | return true; |
| 382 | } |
| 383 | |
| 384 | bool CallChainJoiner::GetNextCallChain(pid_t& pid, pid_t& tid, ChainType& type, |
| 385 | std::vector<uint64_t>& ips, |
| 386 | std::vector<uint64_t>& sps) { |
| 387 | if (next_chain_index_ == stat_.chain_count * 2) { |
| 388 | // No more chains. |
| 389 | return false; |
| 390 | } |
| 391 | if (next_chain_index_ == 0u) { |
| 392 | if (fseek(original_chains_fp_, 0, SEEK_SET) != 0 || |
| 393 | fseek(joined_chains_fp_, 0, SEEK_SET) != 0) { |
| 394 | PLOG(ERROR) << "fseek"; |
| 395 | return false; |
| 396 | } |
| 397 | } |
| 398 | FILE* fp; |
| 399 | if (keep_original_callchains_) { |
| 400 | fp = (next_chain_index_ & 1) ? joined_chains_fp_ : original_chains_fp_; |
| 401 | next_chain_index_++; |
| 402 | } else { |
| 403 | fp = joined_chains_fp_; |
| 404 | next_chain_index_ += 2; |
| 405 | } |
| 406 | return ReadCallChain(fp, pid, tid, type, ips, sps); |
| 407 | } |
| 408 | |
| 409 | void CallChainJoiner::DumpStat() { |
| 410 | LOG(DEBUG) << "call chain joiner stat:"; |
| 411 | LOG(DEBUG) << " cache_size: " << cache_stat_.cache_size; |
| 412 | LOG(DEBUG) << " matched_node_count_to_extend_callchain: " |
| 413 | << cache_stat_.matched_node_count_to_extend_callchain; |
| 414 | LOG(DEBUG) << " max_node_count in cache: " << cache_stat_.max_node_count; |
| 415 | LOG(DEBUG) << " used_node_count in cache: " << cache_stat_.used_node_count; |
| 416 | LOG(DEBUG) << " recycled_node_count in cache: " << cache_stat_.recycled_node_count; |
| 417 | LOG(DEBUG) << " call_chain_count: " << stat_.chain_count; |
| 418 | LOG(DEBUG) << " before_join_node_count: " << stat_.before_join_node_count; |
| 419 | if (stat_.chain_count > 0u) { |
| 420 | LOG(DEBUG) << " before_join_average_chain_length: " |
| 421 | << (stat_.before_join_node_count * 1.0 / stat_.chain_count); |
| 422 | } |
| 423 | LOG(DEBUG) << " after_join_node_count: " << stat_.after_join_node_count; |
| 424 | if (stat_.chain_count > 0u) { |
| 425 | LOG(DEBUG) << " after_join_average_chain_length: " |
| 426 | << (stat_.after_join_node_count * 1.0 / stat_.chain_count); |
| 427 | } |
| 428 | LOG(DEBUG) << " after_join_max_chain_length: " << stat_.after_join_max_chain_length; |
| 429 | } |
| 430 | |
| 431 | } // namespace simpleperf |