Fix Race Condition
Currently, it is possible for two threads in statsd to concurrently
access/modify memory in ConditionTrackers since they do not have locks.
This happens when one thread is processing LogEvents (lock on
StatsLogProcessor mutex), while the other thread receives uidmap updates
and locks on the mutex in the MetricProducer. This Cl changes uidmap
updates to also go through the mutex in StatsLogProcessor.
Test: bit statsd_test:*
Test: atest CtsStatsdHostTestCases
Test: local test (ag/9725088) that forced the race condition now passes
Bug: 144373785
Change-Id: I04ae2f7ed025f5ce8bc4fdeb7f10717e20d76282
diff --git a/cmds/statsd/src/StatsLogProcessor.cpp b/cmds/statsd/src/StatsLogProcessor.cpp
index 91cadc9..98d41c2 100644
--- a/cmds/statsd/src/StatsLogProcessor.cpp
+++ b/cmds/statsd/src/StatsLogProcessor.cpp
@@ -328,11 +328,6 @@
mAnomalyAlarmMonitor, mPeriodicAlarmMonitor);
if (newMetricsManager->isConfigValid()) {
mUidMap->OnConfigUpdated(key);
- if (newMetricsManager->shouldAddUidMapListener()) {
- // We have to add listener after the MetricsManager is constructed because it's
- // not safe to create wp or sp from this pointer inside its constructor.
- mUidMap->addListener(newMetricsManager.get());
- }
newMetricsManager->refreshTtl(timestampNs);
mMetricsManagers[key] = newMetricsManager;
VLOG("StatsdConfig valid");
@@ -753,6 +748,32 @@
}
}
+void StatsLogProcessor::notifyAppUpgrade(const int64_t& eventTimeNs, const string& apk,
+ const int uid, const int64_t version) {
+ std::lock_guard<std::mutex> lock(mMetricsMutex);
+ ALOGW("Received app upgrade");
+ for (auto it : mMetricsManagers) {
+ it.second->notifyAppUpgrade(eventTimeNs, apk, uid, version);
+ }
+}
+
+void StatsLogProcessor::notifyAppRemoved(const int64_t& eventTimeNs, const string& apk,
+ const int uid) {
+ std::lock_guard<std::mutex> lock(mMetricsMutex);
+ ALOGW("Received app removed");
+ for (auto it : mMetricsManagers) {
+ it.second->notifyAppRemoved(eventTimeNs, apk, uid);
+ }
+}
+
+void StatsLogProcessor::onUidMapReceived(const int64_t& eventTimeNs) {
+ std::lock_guard<std::mutex> lock(mMetricsMutex);
+ ALOGW("Received uid map");
+ for (auto it : mMetricsManagers) {
+ it.second->onUidMapReceived(eventTimeNs);
+ }
+}
+
void StatsLogProcessor::noteOnDiskData(const ConfigKey& key) {
std::lock_guard<std::mutex> lock(mMetricsMutex);
mOnDiskDataConfigs.insert(key);
diff --git a/cmds/statsd/src/StatsLogProcessor.h b/cmds/statsd/src/StatsLogProcessor.h
index b41771d..68a51ef 100644
--- a/cmds/statsd/src/StatsLogProcessor.h
+++ b/cmds/statsd/src/StatsLogProcessor.h
@@ -32,7 +32,7 @@
namespace statsd {
-class StatsLogProcessor : public ConfigListener {
+class StatsLogProcessor : public ConfigListener, public virtual PackageInfoListener {
public:
StatsLogProcessor(const sp<UidMap>& uidMap, const sp<StatsPullerManager>& pullerManager,
const sp<AlarmMonitor>& anomalyAlarmMonitor,
@@ -91,6 +91,16 @@
/* Sets the active status/ttl for all configs and metrics to the status in ActiveConfigList. */
void SetConfigsActiveState(const ActiveConfigList& activeConfigList, int64_t currentTimeNs);
+ /* Notify all MetricsManagers of app upgrades */
+ void notifyAppUpgrade(const int64_t& eventTimeNs, const string& apk, const int uid,
+ const int64_t version) override;
+
+ /* Notify all MetricsManagers of app removals */
+ void notifyAppRemoved(const int64_t& eventTimeNs, const string& apk, const int uid) override;
+
+ /* Notify all MetricsManagers of uid map snapshots received */
+ void onUidMapReceived(const int64_t& eventTimeNs) override;
+
// Reset all configs.
void resetConfigs();
diff --git a/cmds/statsd/src/StatsService.cpp b/cmds/statsd/src/StatsService.cpp
index f072c9c..2c325ba6 100644
--- a/cmds/statsd/src/StatsService.cpp
+++ b/cmds/statsd/src/StatsService.cpp
@@ -200,6 +200,7 @@
}
});
+ mUidMap->setListener(mProcessor);
mConfigManager->AddListener(mProcessor);
init_system_properties();
diff --git a/cmds/statsd/src/metrics/MetricProducer.h b/cmds/statsd/src/metrics/MetricProducer.h
index d7cbcc8..a513db6 100644
--- a/cmds/statsd/src/metrics/MetricProducer.h
+++ b/cmds/statsd/src/metrics/MetricProducer.h
@@ -87,7 +87,7 @@
// writing the report to dropbox. MetricProducers should respond to package changes as required in
// PackageInfoListener, but if none of the metrics are slicing by package name, then the update can
// be a no-op.
-class MetricProducer : public virtual PackageInfoListener, public virtual StateListener {
+class MetricProducer : public virtual android::RefBase, public virtual StateListener {
public:
MetricProducer(const int64_t& metricId, const ConfigKey& key, const int64_t timeBaseNs,
const int conditionIndex, const sp<ConditionWizard>& wizard,
@@ -109,8 +109,8 @@
* the flush again when the end timestamp is forced to be now, and then after flushing, update
* the start timestamp to be now.
*/
- void notifyAppUpgrade(const int64_t& eventTimeNs, const string& apk, const int uid,
- const int64_t version) override {
+ virtual void notifyAppUpgrade(const int64_t& eventTimeNs, const string& apk, const int uid,
+ const int64_t version) {
std::lock_guard<std::mutex> lock(mMutex);
if (eventTimeNs > getCurrentBucketEndTimeNs()) {
@@ -123,16 +123,11 @@
// is a partial bucket and can merge it with the previous bucket.
};
- void notifyAppRemoved(const int64_t& eventTimeNs, const string& apk, const int uid) override{
+ void notifyAppRemoved(const int64_t& eventTimeNs, const string& apk, const int uid) {
// Force buckets to split on removal also.
notifyAppUpgrade(eventTimeNs, apk, uid, 0);
};
- void onUidMapReceived(const int64_t& eventTimeNs) override{
- // Purposefully don't flush partial buckets on a new snapshot.
- // This occurs if a new user is added/removed or statsd crashes.
- };
-
// Consume the parsed stats log entry that already matched the "what" of the metric.
void onMatchedLogEvent(const size_t matcherIndex, const LogEvent& event) {
std::lock_guard<std::mutex> lock(mMutex);
diff --git a/cmds/statsd/src/metrics/MetricsManager.cpp b/cmds/statsd/src/metrics/MetricsManager.cpp
index 7bae4b9..464cec3 100644
--- a/cmds/statsd/src/metrics/MetricsManager.cpp
+++ b/cmds/statsd/src/metrics/MetricsManager.cpp
@@ -182,6 +182,10 @@
void MetricsManager::notifyAppUpgrade(const int64_t& eventTimeNs, const string& apk, const int uid,
const int64_t version) {
+ // Inform all metric producers.
+ for (auto it : mAllMetricProducers) {
+ it->notifyAppUpgrade(eventTimeNs, apk, uid, version);
+ }
// check if we care this package
if (std::find(mAllowedPkg.begin(), mAllowedPkg.end(), apk) == mAllowedPkg.end()) {
return;
@@ -193,6 +197,10 @@
void MetricsManager::notifyAppRemoved(const int64_t& eventTimeNs, const string& apk,
const int uid) {
+ // Inform all metric producers.
+ for (auto it : mAllMetricProducers) {
+ it->notifyAppRemoved(eventTimeNs, apk, uid);
+ }
// check if we care this package
if (std::find(mAllowedPkg.begin(), mAllowedPkg.end(), apk) == mAllowedPkg.end()) {
return;
@@ -203,6 +211,9 @@
}
void MetricsManager::onUidMapReceived(const int64_t& eventTimeNs) {
+ // Purposefully don't inform metric producers on a new snapshot
+ // because we don't need to flush partial buckets.
+ // This occurs if a new user is added/removed or statsd crashes.
if (mAllowedPkg.size() == 0) {
return;
}
diff --git a/cmds/statsd/src/metrics/MetricsManager.h b/cmds/statsd/src/metrics/MetricsManager.h
index 286610a..1fda696 100644
--- a/cmds/statsd/src/metrics/MetricsManager.h
+++ b/cmds/statsd/src/metrics/MetricsManager.h
@@ -35,7 +35,7 @@
namespace statsd {
// A MetricsManager is responsible for managing metrics from one single config source.
-class MetricsManager : public PackageInfoListener {
+class MetricsManager : public virtual android::RefBase {
public:
MetricsManager(const ConfigKey& configKey, const StatsdConfig& config, const int64_t timeBaseNs,
const int64_t currentTimeNs, const sp<UidMap>& uidMap,
@@ -63,15 +63,11 @@
unordered_set<sp<const InternalAlarm>, SpHash<InternalAlarm>>& alarmSet);
void notifyAppUpgrade(const int64_t& eventTimeNs, const string& apk, const int uid,
- const int64_t version) override;
+ const int64_t version);
- void notifyAppRemoved(const int64_t& eventTimeNs, const string& apk, const int uid) override;
+ void notifyAppRemoved(const int64_t& eventTimeNs, const string& apk, const int uid);
- void onUidMapReceived(const int64_t& eventTimeNs) override;
-
- bool shouldAddUidMapListener() const {
- return !mAllowedPkg.empty();
- }
+ void onUidMapReceived(const int64_t& eventTimeNs);
bool shouldWriteToDisk() const {
return mNoReportMetricIds.size() != mAllMetricProducers.size();
diff --git a/cmds/statsd/src/metrics/metrics_manager_util.cpp b/cmds/statsd/src/metrics/metrics_manager_util.cpp
index 6e76717..9131802 100644
--- a/cmds/statsd/src/metrics/metrics_manager_util.cpp
+++ b/cmds/statsd/src/metrics/metrics_manager_util.cpp
@@ -396,7 +396,7 @@
}
bool initMetrics(const ConfigKey& key, const StatsdConfig& config, const int64_t timeBaseTimeNs,
- const int64_t currentTimeNs, UidMap& uidMap,
+ const int64_t currentTimeNs,
const sp<StatsPullerManager>& pullerManager,
const unordered_map<int64_t, int>& logTrackerMap,
const unordered_map<int64_t, int>& conditionTrackerMap,
@@ -788,8 +788,6 @@
noReportMetricIds.insert(no_report_metric);
}
for (const auto& it : allMetricProducers) {
- uidMap.addListener(it);
-
// Register metrics to StateTrackers
for (int atomId : it->getSlicedStateAtoms()) {
if (!StateManager::getInstance().registerListener(atomId, it)) {
@@ -939,7 +937,7 @@
ALOGE("initStates failed");
return false;
}
- if (!initMetrics(key, config, timeBaseNs, currentTimeNs, uidMap, pullerManager, logTrackerMap,
+ if (!initMetrics(key, config, timeBaseNs, currentTimeNs, pullerManager, logTrackerMap,
conditionTrackerMap, allAtomMatchers, stateAtomIdMap, allStateGroupMaps,
allConditionTrackers, allMetricProducers,
conditionToMetricMap, trackerToMetricMap, metricProducerMap,
diff --git a/cmds/statsd/src/packages/UidMap.cpp b/cmds/statsd/src/packages/UidMap.cpp
index d4b57dd..7e63bbf 100644
--- a/cmds/statsd/src/packages/UidMap.cpp
+++ b/cmds/statsd/src/packages/UidMap.cpp
@@ -119,7 +119,7 @@
void UidMap::updateMap(const int64_t& timestamp, const vector<int32_t>& uid,
const vector<int64_t>& versionCode, const vector<String16>& versionString,
const vector<String16>& packageName, const vector<String16>& installer) {
- vector<wp<PackageInfoListener>> broadcastList;
+ wp<PackageInfoListener> broadcast = NULL;
{
lock_guard<mutex> lock(mMutex); // Exclusively lock for updates.
@@ -150,25 +150,22 @@
ensureBytesUsedBelowLimit();
StatsdStats::getInstance().setCurrentUidMapMemory(mBytesUsed);
- getListenerListCopyLocked(&broadcastList);
+ broadcast = mSubscriber;
}
// To avoid invoking callback while holding the internal lock. we get a copy of the listener
- // list and invoke the callback. It's still possible that after we copy the list, a
- // listener removes itself before we call it. It's then the listener's job to handle it (expect
- // the callback to be called after listener is removed, and the listener should properly
- // ignore it).
- for (const auto& weakPtr : broadcastList) {
- auto strongPtr = weakPtr.promote();
- if (strongPtr != NULL) {
- strongPtr->onUidMapReceived(timestamp);
- }
+ // and invoke the callback. It's still possible that after we copy the listener, it removes
+ // itself before we call it. It's then the listener's job to handle it (expect the callback to
+ // be called after listener is removed, and the listener should properly ignore it).
+ auto strongPtr = broadcast.promote();
+ if (strongPtr != NULL) {
+ strongPtr->onUidMapReceived(timestamp);
}
}
void UidMap::updateApp(const int64_t& timestamp, const String16& app_16, const int32_t& uid,
const int64_t& versionCode, const String16& versionString,
const String16& installer) {
- vector<wp<PackageInfoListener>> broadcastList;
+ wp<PackageInfoListener> broadcast = NULL;
string appName = string(String8(app_16).string());
{
lock_guard<mutex> lock(mMutex);
@@ -195,7 +192,7 @@
// for the first time, then we don't notify the listeners.
// It's also OK to split again if we're forming a partial bucket after re-installing an
// app after deletion.
- getListenerListCopyLocked(&broadcastList);
+ broadcast = mSubscriber;
}
mChanges.emplace_back(false, timestamp, appName, uid, versionCode, newVersionString,
prevVersion, prevVersionString);
@@ -205,11 +202,9 @@
StatsdStats::getInstance().setUidMapChanges(mChanges.size());
}
- for (const auto& weakPtr : broadcastList) {
- auto strongPtr = weakPtr.promote();
- if (strongPtr != NULL) {
- strongPtr->notifyAppUpgrade(timestamp, appName, uid, versionCode);
- }
+ auto strongPtr = broadcast.promote();
+ if (strongPtr != NULL) {
+ strongPtr->notifyAppUpgrade(timestamp, appName, uid, versionCode);
}
}
@@ -230,21 +225,8 @@
}
}
-void UidMap::getListenerListCopyLocked(vector<wp<PackageInfoListener>>* output) {
- for (auto weakIt = mSubscribers.begin(); weakIt != mSubscribers.end();) {
- auto strongPtr = weakIt->promote();
- if (strongPtr != NULL) {
- output->push_back(*weakIt);
- weakIt++;
- } else {
- weakIt = mSubscribers.erase(weakIt);
- VLOG("The UidMap listener is gone, remove it now");
- }
- }
-}
-
void UidMap::removeApp(const int64_t& timestamp, const String16& app_16, const int32_t& uid) {
- vector<wp<PackageInfoListener>> broadcastList;
+ wp<PackageInfoListener> broadcast = NULL;
string app = string(String8(app_16).string());
{
lock_guard<mutex> lock(mMutex);
@@ -271,25 +253,18 @@
ensureBytesUsedBelowLimit();
StatsdStats::getInstance().setCurrentUidMapMemory(mBytesUsed);
StatsdStats::getInstance().setUidMapChanges(mChanges.size());
- getListenerListCopyLocked(&broadcastList);
+ broadcast = mSubscriber;
}
- for (const auto& weakPtr : broadcastList) {
- auto strongPtr = weakPtr.promote();
- if (strongPtr != NULL) {
- strongPtr->notifyAppRemoved(timestamp, app, uid);
- }
+ auto strongPtr = broadcast.promote();
+ if (strongPtr != NULL) {
+ strongPtr->notifyAppRemoved(timestamp, app, uid);
}
}
-void UidMap::addListener(wp<PackageInfoListener> producer) {
+void UidMap::setListener(wp<PackageInfoListener> listener) {
lock_guard<mutex> lock(mMutex); // Lock for updates
- mSubscribers.insert(producer);
-}
-
-void UidMap::removeListener(wp<PackageInfoListener> producer) {
- lock_guard<mutex> lock(mMutex); // Lock for updates
- mSubscribers.erase(producer);
+ mSubscriber = listener;
}
void UidMap::assignIsolatedUid(int isolatedUid, int parentUid) {
diff --git a/cmds/statsd/src/packages/UidMap.h b/cmds/statsd/src/packages/UidMap.h
index a7c5fb2..2d3f6ee 100644
--- a/cmds/statsd/src/packages/UidMap.h
+++ b/cmds/statsd/src/packages/UidMap.h
@@ -118,12 +118,10 @@
// adb shell cmd stats print-uid-map
void printUidMap(int outFd) const;
- // Commands for indicating to the map that a producer should be notified if an app is updated.
- // This allows the metric producer to distinguish when the same uid or app represents a
- // different version of an app.
- void addListener(wp<PackageInfoListener> producer);
- // Remove the listener from the set of metric producers that subscribe to updates.
- void removeListener(wp<PackageInfoListener> producer);
+ // Command for indicating to the map that StatsLogProcessor should be notified if an app is
+ // updated. This allows metric producers and managers to distinguish when the same uid or app
+ // represents a different version of an app.
+ void setListener(wp<PackageInfoListener> listener);
// Informs uid map that a config is added/updated. Used for keeping mConfigKeys up to date.
void OnConfigUpdated(const ConfigKey& key);
@@ -167,8 +165,6 @@
std::set<string> getAppNamesFromUidLocked(const int32_t& uid, bool returnNormalized) const;
string normalizeAppName(const string& appName) const;
- void getListenerListCopyLocked(std::vector<wp<PackageInfoListener>>* output);
-
void writeUidMapSnapshotLocked(int64_t timestamp, bool includeVersionStrings,
bool includeInstaller, const std::set<int32_t>& interestingUids,
std::set<string>* str_set, ProtoOutputStream* proto);
@@ -195,8 +191,8 @@
// Store which uid and apps represent deleted ones.
std::list<std::pair<int, string>> mDeletedApps;
- // Metric producers that should be notified if there's an upgrade in any app.
- set<wp<PackageInfoListener>> mSubscribers;
+ // Notify StatsLogProcessor if there's an upgrade/removal in any app.
+ wp<PackageInfoListener> mSubscriber;
// Mapping of config keys we're aware of to the epoch time they last received an update. This
// lets us know it's safe to delete events older than the oldest update. The value is nanosec.
diff --git a/cmds/statsd/tests/e2e/PartialBucket_e2e_test.cpp b/cmds/statsd/tests/e2e/PartialBucket_e2e_test.cpp
index 309d251..0bc3ebb 100644
--- a/cmds/statsd/tests/e2e/PartialBucket_e2e_test.cpp
+++ b/cmds/statsd/tests/e2e/PartialBucket_e2e_test.cpp
@@ -162,7 +162,10 @@
ConfigMetricsReport report = GetReports(service.mProcessor, start + 4);
backfillStartEndTimestamp(&report);
- EXPECT_EQ(1, report.metrics_size());
+
+ ASSERT_EQ(1, report.metrics_size());
+ ASSERT_EQ(1, report.metrics(0).count_metrics().data_size());
+ ASSERT_EQ(1, report.metrics(0).count_metrics().data(0).bucket_info_size());
EXPECT_TRUE(report.metrics(0).count_metrics().data(0).bucket_info(0).
has_start_bucket_elapsed_nanos());
EXPECT_TRUE(report.metrics(0).count_metrics().data(0).bucket_info(0).
@@ -186,7 +189,10 @@
ConfigMetricsReport report = GetReports(service.mProcessor, start + 4);
backfillStartEndTimestamp(&report);
- EXPECT_EQ(1, report.metrics_size());
+
+ ASSERT_EQ(1, report.metrics_size());
+ ASSERT_EQ(1, report.metrics(0).count_metrics().data_size());
+ ASSERT_EQ(1, report.metrics(0).count_metrics().data(0).bucket_info_size());
EXPECT_TRUE(report.metrics(0).count_metrics().data(0).bucket_info(0).
has_start_bucket_elapsed_nanos());
EXPECT_TRUE(report.metrics(0).count_metrics().data(0).bucket_info(0).
@@ -228,8 +234,9 @@
ConfigMetricsReport report =
GetReports(service.mProcessor, 5 * 60 * NS_PER_SEC + start + 100 * NS_PER_SEC, true);
backfillStartEndTimestamp(&report);
- EXPECT_EQ(1, report.metrics_size());
- EXPECT_EQ(1, report.metrics(0).value_metrics().skipped_size());
+
+ ASSERT_EQ(1, report.metrics_size());
+ ASSERT_EQ(1, report.metrics(0).value_metrics().skipped_size());
EXPECT_TRUE(report.metrics(0).value_metrics().skipped(0).has_start_bucket_elapsed_nanos());
// Can't test the start time since it will be based on the actual time when the pulling occurs.
EXPECT_EQ(MillisToNano(NanoToMillis(endSkipped)),
@@ -270,8 +277,8 @@
ConfigMetricsReport report =
GetReports(service.mProcessor, 5 * 60 * NS_PER_SEC + start + 100 * NS_PER_SEC, true);
backfillStartEndTimestamp(&report);
- EXPECT_EQ(1, report.metrics_size());
- EXPECT_EQ(1, report.metrics(0).gauge_metrics().skipped_size());
+ ASSERT_EQ(1, report.metrics_size());
+ ASSERT_EQ(1, report.metrics(0).gauge_metrics().skipped_size());
// Can't test the start time since it will be based on the actual time when the pulling occurs.
EXPECT_TRUE(report.metrics(0).gauge_metrics().skipped(0).has_start_bucket_elapsed_nanos());
EXPECT_EQ(MillisToNano(NanoToMillis(endSkipped)),