Statsd: pull once per event time If a pull happens at the same event time, we should reuse the existing data, regardless of whether or not the cool down has been met. For example, if an app upgrade happens at time t, and two metrics need to pull atom a, if metric one pulls at time t, but metric two initiates the pull at time t+2, we should still reuse the pull from time t since that is when the app upgrade happened. Bug: 156294650 Test: atest statsd_test Change-Id: I4efc49545093f6683bf6dd89ed68c5dfa5b44d8f

commit: 7b975a85db963e8e4a75fdaed13aec52a8d0f316 [log] [tgz]
author: Tej Singh <singhtejinder@google.com> Mon May 11 11:05:08 2020 -0700
committer: Tej Singh <singhtejinder@google.com> Wed May 13 01:50:24 2020 -0700
tree: f4248cf4f8668cf2881f681ceb76b4f2cbb66636
parent: 1ecd4528fb3c07af3279428e1131909ad640b27d [diff]
diff --git a/cmds/statsd/src/StatsService.cpp b/cmds/statsd/src/StatsService.cpp
index a65f5f7..4ffa040 100644
--- a/cmds/statsd/src/StatsService.cpp
+++ b/cmds/statsd/src/StatsService.cpp

@@ -826,7 +826,7 @@
         uids.push_back(AID_SYSTEM);
     }
     vector<shared_ptr<LogEvent>> stats;
-    if (mPullerManager->Pull(s, uids, &stats)) {
+    if (mPullerManager->Pull(s, uids, getElapsedRealtimeNs(), &stats)) {
         for (const auto& it : stats) {
             dprintf(out, "Pull from %d: %s\n", s, it->ToString().c_str());
         }

diff --git a/cmds/statsd/src/external/StatsPuller.cpp b/cmds/statsd/src/external/StatsPuller.cpp
index 829a603..9df4d1f 100644
--- a/cmds/statsd/src/external/StatsPuller.cpp
+++ b/cmds/statsd/src/external/StatsPuller.cpp

@@ -38,14 +38,16 @@
       mPullTimeoutNs(pullTimeoutNs),
       mCoolDownNs(coolDownNs),
       mAdditiveFields(additiveFields),
-      mLastPullTimeNs(0) {
+      mLastPullTimeNs(0),
+      mLastEventTimeNs(0) {
 }
 
-bool StatsPuller::Pull(std::vector<std::shared_ptr<LogEvent>>* data) {
+bool StatsPuller::Pull(const int64_t eventTimeNs, std::vector<std::shared_ptr<LogEvent>>* data) {
     lock_guard<std::mutex> lock(mLock);
     int64_t elapsedTimeNs = getElapsedRealtimeNs();
     StatsdStats::getInstance().notePull(mTagId);
-    const bool shouldUseCache = elapsedTimeNs - mLastPullTimeNs < mCoolDownNs;
+    const bool shouldUseCache =
+            (mLastEventTimeNs == eventTimeNs) || (elapsedTimeNs - mLastPullTimeNs < mCoolDownNs);
     if (shouldUseCache) {
         if (mHasGoodData) {
             (*data) = mCachedData;
@@ -54,13 +56,13 @@
         }
         return mHasGoodData;
     }
-
     if (mLastPullTimeNs > 0) {
         StatsdStats::getInstance().updateMinPullIntervalSec(
                 mTagId, (elapsedTimeNs - mLastPullTimeNs) / NS_PER_SEC);
     }
     mCachedData.clear();
     mLastPullTimeNs = elapsedTimeNs;
+    mLastEventTimeNs = eventTimeNs;
     mHasGoodData = PullInternal(&mCachedData);
     if (!mHasGoodData) {
         return mHasGoodData;
@@ -70,7 +72,7 @@
     const bool pullTimeOut = pullDurationNs > mPullTimeoutNs;
     if (pullTimeOut) {
         // Something went wrong. Discard the data.
-        clearCacheLocked();
+        mCachedData.clear();
         mHasGoodData = false;
         StatsdStats::getInstance().notePullTimeout(mTagId);
         ALOGW("Pull for atom %d exceeds timeout %lld nano seconds.", mTagId,
@@ -104,6 +106,7 @@
     int ret = mCachedData.size();
     mCachedData.clear();
     mLastPullTimeNs = 0;
+    mLastEventTimeNs = 0;
     return ret;
 }
 

diff --git a/cmds/statsd/src/external/StatsPuller.h b/cmds/statsd/src/external/StatsPuller.h
index fee571c..470d15e 100644
--- a/cmds/statsd/src/external/StatsPuller.h
+++ b/cmds/statsd/src/external/StatsPuller.h

@@ -51,7 +51,7 @@
     //   2) pull takes longer than mPullTimeoutNs (intrinsic to puller)
     // If a metric wants to make any change to the data, like timestamps, it
     // should make a copy as this data may be shared with multiple metrics.
-    bool Pull(std::vector<std::shared_ptr<LogEvent>>* data);
+    bool Pull(const int64_t eventTimeNs, std::vector<std::shared_ptr<LogEvent>>* data);
 
     // Clear cache immediately
     int ForceClearCache();
@@ -94,6 +94,11 @@
 
     int64_t mLastPullTimeNs;
 
+    // All pulls happen due to an event (app upgrade, bucket boundary, condition change, etc).
+    // If multiple pulls need to be done at the same event time, we will always use the cache after
+    // the first pull.
+    int64_t mLastEventTimeNs;
+
     // Cache of data from last pull. If next request comes before cool down finishes,
     // cached data will be returned.
     // Cached data is cleared when

diff --git a/cmds/statsd/src/external/StatsPullerManager.cpp b/cmds/statsd/src/external/StatsPullerManager.cpp
index 1a52eb9..8a9ec74 100644
--- a/cmds/statsd/src/external/StatsPullerManager.cpp
+++ b/cmds/statsd/src/external/StatsPullerManager.cpp

@@ -91,20 +91,21 @@
       mPullAtomCallbackDeathRecipient(AIBinder_DeathRecipient_new(pullAtomCallbackDied)) {
 }
 
-bool StatsPullerManager::Pull(int tagId, const ConfigKey& configKey,
+bool StatsPullerManager::Pull(int tagId, const ConfigKey& configKey, const int64_t eventTimeNs,
                               vector<shared_ptr<LogEvent>>* data, bool useUids) {
     std::lock_guard<std::mutex> _l(mLock);
-    return PullLocked(tagId, configKey, data, useUids);
+    return PullLocked(tagId, configKey, eventTimeNs, data, useUids);
 }
 
-bool StatsPullerManager::Pull(int tagId, const vector<int32_t>& uids,
+bool StatsPullerManager::Pull(int tagId, const vector<int32_t>& uids, const int64_t eventTimeNs,
                               vector<std::shared_ptr<LogEvent>>* data, bool useUids) {
     std::lock_guard<std::mutex> _l(mLock);
-    return PullLocked(tagId, uids, data, useUids);
+    return PullLocked(tagId, uids, eventTimeNs, data, useUids);
 }
 
 bool StatsPullerManager::PullLocked(int tagId, const ConfigKey& configKey,
-                                    vector<shared_ptr<LogEvent>>* data, bool useUids) {
+                                    const int64_t eventTimeNs, vector<shared_ptr<LogEvent>>* data,
+                                    bool useUids) {
     vector<int32_t> uids;
     if (useUids) {
         auto uidProviderIt = mPullUidProviders.find(configKey);
@@ -123,18 +124,19 @@
         }
         uids = pullUidProvider->getPullAtomUids(tagId);
     }
-    return PullLocked(tagId, uids, data, useUids);
+    return PullLocked(tagId, uids, eventTimeNs, data, useUids);
 }
 
 bool StatsPullerManager::PullLocked(int tagId, const vector<int32_t>& uids,
-                                    vector<shared_ptr<LogEvent>>* data, bool useUids) {
+                                    const int64_t eventTimeNs, vector<shared_ptr<LogEvent>>* data,
+                                    bool useUids) {
     VLOG("Initiating pulling %d", tagId);
     if (useUids) {
         for (int32_t uid : uids) {
             PullerKey key = {.atomTag = tagId, .uid = uid};
             auto pullerIt = kAllPullAtomInfo.find(key);
             if (pullerIt != kAllPullAtomInfo.end()) {
-                bool ret = pullerIt->second->Pull(data);
+                bool ret = pullerIt->second->Pull(eventTimeNs, data);
                 VLOG("pulled %zu items", data->size());
                 if (!ret) {
                     StatsdStats::getInstance().notePullFailed(tagId);
@@ -149,7 +151,7 @@
         PullerKey key = {.atomTag = tagId, .uid = -1};
         auto pullerIt = kAllPullAtomInfo.find(key);
         if (pullerIt != kAllPullAtomInfo.end()) {
-            bool ret = pullerIt->second->Pull(data);
+            bool ret = pullerIt->second->Pull(eventTimeNs, data);
             VLOG("pulled %zu items", data->size());
             if (!ret) {
                 StatsdStats::getInstance().notePullFailed(tagId);
@@ -290,7 +292,8 @@
     }
     for (const auto& pullInfo : needToPull) {
         vector<shared_ptr<LogEvent>> data;
-        bool pullSuccess = PullLocked(pullInfo.first->atomTag, pullInfo.first->configKey, &data);
+        bool pullSuccess = PullLocked(pullInfo.first->atomTag, pullInfo.first->configKey,
+                                      elapsedTimeNs, &data);
         if (!pullSuccess) {
             VLOG("pull failed at %lld, will try again later", (long long)elapsedTimeNs);
         }

diff --git a/cmds/statsd/src/external/StatsPullerManager.h b/cmds/statsd/src/external/StatsPullerManager.h
index 5e18aaa..194a0f5 100644
--- a/cmds/statsd/src/external/StatsPullerManager.h
+++ b/cmds/statsd/src/external/StatsPullerManager.h

@@ -101,11 +101,11 @@
     //      registered for any of the uids for this atom.
     // If the metric wants to make any change to the data, like timestamps, they
     // should make a copy as this data may be shared with multiple metrics.
-    virtual bool Pull(int tagId, const ConfigKey& configKey,
+    virtual bool Pull(int tagId, const ConfigKey& configKey, const int64_t eventTimeNs,
                       vector<std::shared_ptr<LogEvent>>* data, bool useUids = true);
 
     // Same as above, but directly specify the allowed uids to pull from.
-    virtual bool Pull(int tagId, const vector<int32_t>& uids,
+    virtual bool Pull(int tagId, const vector<int32_t>& uids, const int64_t eventTimeNs,
                       vector<std::shared_ptr<LogEvent>>* data, bool useUids = true);
 
     // Clear pull data cache immediately.
@@ -152,11 +152,11 @@
     // mapping from Config Key to the PullUidProvider for that config
     std::map<ConfigKey, wp<PullUidProvider>> mPullUidProviders;
 
-    bool PullLocked(int tagId, const ConfigKey& configKey, vector<std::shared_ptr<LogEvent>>* data,
-                    bool useUids = true);
+    bool PullLocked(int tagId, const ConfigKey& configKey, const int64_t eventTimeNs,
+                    vector<std::shared_ptr<LogEvent>>* data, bool useUids = true);
 
-    bool PullLocked(int tagId, const vector<int32_t>& uids, vector<std::shared_ptr<LogEvent>>* data,
-                    bool useUids);
+    bool PullLocked(int tagId, const vector<int32_t>& uids, const int64_t eventTimeNs,
+                    vector<std::shared_ptr<LogEvent>>* data, bool useUids);
 
     // locks for data receiver and StatsCompanionService changes
     std::mutex mLock;

diff --git a/cmds/statsd/src/metrics/GaugeMetricProducer.cpp b/cmds/statsd/src/metrics/GaugeMetricProducer.cpp
index cc4c565..1d4d0b3 100644
--- a/cmds/statsd/src/metrics/GaugeMetricProducer.cpp
+++ b/cmds/statsd/src/metrics/GaugeMetricProducer.cpp

@@ -321,7 +321,7 @@
         return;
     }
     vector<std::shared_ptr<LogEvent>> allData;
-    if (!mPullerManager->Pull(mPullTagId, mConfigKey, &allData)) {
+    if (!mPullerManager->Pull(mPullTagId, mConfigKey, timestampNs, &allData)) {
         ALOGE("Gauge Stats puller failed for tag: %d at %lld", mPullTagId, (long long)timestampNs);
         return;
     }

diff --git a/cmds/statsd/src/metrics/ValueMetricProducer.cpp b/cmds/statsd/src/metrics/ValueMetricProducer.cpp
index e5ec72e..bf636a4 100644
--- a/cmds/statsd/src/metrics/ValueMetricProducer.cpp
+++ b/cmds/statsd/src/metrics/ValueMetricProducer.cpp

@@ -508,7 +508,7 @@
 
 void ValueMetricProducer::pullAndMatchEventsLocked(const int64_t timestampNs) {
     vector<std::shared_ptr<LogEvent>> allData;
-    if (!mPullerManager->Pull(mPullTagId, mConfigKey, &allData)) {
+    if (!mPullerManager->Pull(mPullTagId, mConfigKey, timestampNs, &allData)) {
         ALOGE("Stats puller failed for tag: %d at %lld", mPullTagId, (long long)timestampNs);
         invalidateCurrentBucket(timestampNs, BucketDropReason::PULL_FAILED);
         return;

diff --git a/cmds/statsd/src/shell/ShellSubscriber.cpp b/cmds/statsd/src/shell/ShellSubscriber.cpp
index 7b68721..361b161 100644
--- a/cmds/statsd/src/shell/ShellSubscriber.cpp
+++ b/cmds/statsd/src/shell/ShellSubscriber.cpp

@@ -152,6 +152,7 @@
             }
 
             int64_t nowMillis = getElapsedRealtimeMillis();
+            int64_t nowNanos = getElapsedRealtimeNs();
             for (PullInfo& pullInfo : mSubscriptionInfo->mPulledInfo) {
                 if (pullInfo.mPrevPullElapsedRealtimeMs + pullInfo.mInterval >= nowMillis) {
                     continue;
@@ -161,7 +162,7 @@
                 getUidsForPullAtom(&uids, pullInfo);
 
                 vector<std::shared_ptr<LogEvent>> data;
-                mPullerMgr->Pull(pullInfo.mPullerMatcher.atom_id(), uids, &data);
+                mPullerMgr->Pull(pullInfo.mPullerMatcher.atom_id(), uids, nowNanos, &data);
                 VLOG("Pulled %zu atoms with id %d", data.size(), pullInfo.mPullerMatcher.atom_id());
                 writePulledAtomsLocked(data, pullInfo.mPullerMatcher);
commit	7b975a85db963e8e4a75fdaed13aec52a8d0f316	[log] [tgz]
author	Tej Singh <singhtejinder@google.com>	Mon May 11 11:05:08 2020 -0700
committer	Tej Singh <singhtejinder@google.com>	Wed May 13 01:50:24 2020 -0700
tree	f4248cf4f8668cf2881f681ceb76b4f2cbb66636
parent	1ecd4528fb3c07af3279428e1131909ad640b27d [diff]