Merge "Disable fallback when comparison result is different" into tm-dev am: ebec0f76ce

Original change: https://googleplex-android-review.googlesource.com/c/platform/packages/modules/Connectivity/+/18849946

Change-Id: Ibf70fadd9c2723e84124658b9614613c293db353
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/service-t/src/com/android/server/net/NetworkStatsService.java b/service-t/src/com/android/server/net/NetworkStatsService.java
index 42a108f..b37f93d 100644
--- a/service-t/src/com/android/server/net/NetworkStatsService.java
+++ b/service-t/src/com/android/server/net/NetworkStatsService.java
@@ -76,8 +76,10 @@
 import android.content.IntentFilter;
 import android.content.pm.ApplicationInfo;
 import android.content.pm.PackageManager;
+import android.content.res.Resources;
 import android.database.ContentObserver;
 import android.net.ConnectivityManager;
+import android.net.ConnectivityResources;
 import android.net.DataUsageRequest;
 import android.net.INetd;
 import android.net.INetworkStatsService;
@@ -140,6 +142,7 @@
 import android.util.SparseIntArray;
 import android.util.proto.ProtoOutputStream;
 
+import com.android.connectivity.resources.R;
 import com.android.internal.annotations.GuardedBy;
 import com.android.internal.annotations.VisibleForTesting;
 import com.android.internal.util.FileRotator;
@@ -765,6 +768,11 @@
                 return null;
             }
         }
+
+        /** Gets whether the build is userdebug. */
+        public boolean isDebuggable() {
+            return Build.isDebuggable();
+        }
     }
 
     /**
@@ -927,18 +935,27 @@
         final int targetAttempts = mDeps.getImportLegacyTargetAttempts();
         final int attempts;
         final int fallbacks;
+        final boolean runComparison;
         try {
             attempts = mImportLegacyAttemptsCounter.get();
+            // Fallbacks counter would be set to non-zero value to indicate the migration was
+            // not successful.
             fallbacks = mImportLegacyFallbacksCounter.get();
+            runComparison = shouldRunComparison();
         } catch (IOException e) {
             Log.wtf(TAG, "Failed to read counters, skip.", e);
             return;
         }
-        // If fallbacks is not zero, proceed with reading only to give signals from dogfooders.
-        // TODO(b/233752318): Remove fallbacks counter check before T formal release.
-        if (attempts >= targetAttempts && fallbacks == 0) return;
 
-        final boolean dryRunImportOnly = (attempts >= targetAttempts);
+        // If the target number of attempts are reached, don't import any data.
+        // However, if comparison is requested, still read the legacy data and compare
+        // it to the importer output. This allows OEMs to debug issues with the
+        // importer code and to collect signals from the field.
+        final boolean dryRunImportOnly =
+                fallbacks != 0 && runComparison && (attempts >= targetAttempts);
+        // Return if target attempts are reached and there is no need to dry run.
+        if (attempts >= targetAttempts && !dryRunImportOnly) return;
+
         if (dryRunImportOnly) {
             Log.i(TAG, "Starting import : only perform read");
         } else {
@@ -951,69 +968,54 @@
         };
 
         // Legacy directories will be created by recorders if they do not exist
-        final File legacyBaseDir = mDeps.getLegacyStatsDir();
-        final NetworkStatsRecorder[] legacyRecorders = new NetworkStatsRecorder[]{
-                buildRecorder(PREFIX_DEV, mSettings.getDevConfig(), false, legacyBaseDir),
-                buildRecorder(PREFIX_XT, mSettings.getXtConfig(), false, legacyBaseDir),
-                buildRecorder(PREFIX_UID, mSettings.getUidConfig(), false, legacyBaseDir),
-                buildRecorder(PREFIX_UID_TAG, mSettings.getUidTagConfig(), true, legacyBaseDir)
-        };
+        final NetworkStatsRecorder[] legacyRecorders;
+        if (runComparison) {
+            final File legacyBaseDir = mDeps.getLegacyStatsDir();
+            legacyRecorders = new NetworkStatsRecorder[]{
+                    buildRecorder(PREFIX_DEV, mSettings.getDevConfig(), false, legacyBaseDir),
+                    buildRecorder(PREFIX_XT, mSettings.getXtConfig(), false, legacyBaseDir),
+                    buildRecorder(PREFIX_UID, mSettings.getUidConfig(), false, legacyBaseDir),
+                    buildRecorder(PREFIX_UID_TAG, mSettings.getUidTagConfig(), true, legacyBaseDir)
+            };
+        } else {
+            legacyRecorders = null;
+        }
 
         long migrationEndTime = Long.MIN_VALUE;
-        boolean endedWithFallback = false;
         try {
             // First, read all legacy collections. This is OEM code and it can throw. Don't
             // commit any data to disk until all are read.
             for (int i = 0; i < migrations.length; i++) {
-                String errMsg = null;
-                Throwable exception = null;
                 final MigrationInfo migration = migrations[i];
 
-                // Read the collection from platform code, and using fallback method if throws.
+                // Read the collection from platform code, and set fallbacks counter if throws
+                // for better debugging.
                 try {
                     migration.collection = readPlatformCollectionForRecorder(migration.recorder);
                 } catch (Throwable e) {
-                    errMsg = "Failed to read stats from platform";
-                    exception = e;
-                }
-
-                // Also read the collection with legacy method
-                final NetworkStatsRecorder legacyRecorder = legacyRecorders[i];
-
-                final NetworkStatsCollection legacyStats;
-                try {
-                    legacyStats = legacyRecorder.getOrLoadCompleteLocked();
-                } catch (Throwable e) {
-                    Log.wtf(TAG, "Failed to read stats with legacy method for recorder " + i, e);
-                    if (exception != null) {
-                        throw exception;
+                    if (dryRunImportOnly) {
+                        Log.wtf(TAG, "Platform data read failed. ", e);
+                        return;
                     } else {
-                        // Use newer stats, since that's all that is available
-                        continue;
+                        // Data is not imported successfully, set fallbacks counter to non-zero
+                        // value to trigger dry run every later boot when the runComparison is
+                        // true, in order to make it easier to debug issues.
+                        tryIncrementLegacyFallbacksCounter();
+                        // Re-throw for error handling. This will increase attempts counter.
+                        throw e;
                     }
                 }
 
-                if (errMsg == null) {
-                    try {
-                        errMsg = compareStats(migration.collection, legacyStats);
-                    } catch (Throwable e) {
-                        errMsg = "Failed to compare migrated stats with all stats";
-                        exception = e;
+                if (runComparison) {
+                    final boolean success =
+                            compareImportedToLegacyStats(migration, legacyRecorders[i]);
+                    if (!success && !dryRunImportOnly) {
+                        tryIncrementLegacyFallbacksCounter();
                     }
                 }
-
-                if (errMsg != null) {
-                    Log.wtf(TAG, "NetworkStats import for migration " + i
-                            + " returned invalid data: " + errMsg, exception);
-                    // Fall back to legacy stats for this boot. The stats for old data will be
-                    // re-imported again on next boot until they succeed the import. This is fine
-                    // since every import clears the previous stats for the imported timespan.
-                    migration.collection = legacyStats;
-                    endedWithFallback = true;
-                }
             }
 
-            // For cases where the fallbacks is not zero but target attempts counts reached,
+            // For cases where the fallbacks are not zero but target attempts counts reached,
             // only perform reads above and return here.
             if (dryRunImportOnly) return;
 
@@ -1079,22 +1081,78 @@
         // Success ! No need to import again next time.
         try {
             mImportLegacyAttemptsCounter.set(targetAttempts);
-            if (endedWithFallback) {
-                Log.wtf(TAG, "Imported platform collections with legacy fallback");
-                final int fallbacksCount = mImportLegacyFallbacksCounter.get();
-                mImportLegacyFallbacksCounter.set(fallbacksCount + 1);
-            } else {
-                Log.i(TAG, "Successfully imported platform collections");
-                // The successes counter is only for debugging. Hence, the synchronization
-                // between successes counter and attempts counter are not very critical.
-                final int successCount = mImportLegacySuccessesCounter.get();
-                mImportLegacySuccessesCounter.set(successCount + 1);
-            }
+            Log.i(TAG, "Successfully imported platform collections");
+            // The successes counter is only for debugging. Hence, the synchronization
+            // between successes counter and attempts counter are not very critical.
+            final int successCount = mImportLegacySuccessesCounter.get();
+            mImportLegacySuccessesCounter.set(successCount + 1);
         } catch (IOException e) {
             Log.wtf(TAG, "Succeed but failed to update counters.", e);
         }
     }
 
+    void tryIncrementLegacyFallbacksCounter() {
+        try {
+            final int fallbacks = mImportLegacyFallbacksCounter.get();
+            mImportLegacyFallbacksCounter.set(fallbacks + 1);
+        } catch (IOException e) {
+            Log.wtf(TAG, "Failed to update fallback counter.", e);
+        }
+    }
+
+    @VisibleForTesting
+    boolean shouldRunComparison() {
+        final ConnectivityResources resources = new ConnectivityResources(mContext);
+        // 0 if id not found.
+        Boolean overlayValue = null;
+        try {
+            switch (resources.get().getInteger(R.integer.config_netstats_validate_import)) {
+                case 1:
+                    overlayValue = Boolean.TRUE;
+                    break;
+                case 0:
+                    overlayValue = Boolean.FALSE;
+                    break;
+            }
+        } catch (Resources.NotFoundException e) {
+            // Overlay value is not defined.
+        }
+        // TODO(b/233752318): For now it is always true to collect signal from beta users.
+        //  Should change to the default behavior (true if debuggable builds) before formal release.
+        return (overlayValue != null ? overlayValue : mDeps.isDebuggable()) || true;
+    }
+
+    /**
+     * Compare imported data with the data returned by legacy recorders.
+     *
+     * @return true if the data matches, false if the data does not match or throw with exceptions.
+     */
+    private boolean compareImportedToLegacyStats(@NonNull MigrationInfo migration,
+            @NonNull NetworkStatsRecorder legacyRecorder) {
+        final NetworkStatsCollection legacyStats;
+        try {
+            legacyStats = legacyRecorder.getOrLoadCompleteLocked();
+        } catch (Throwable e) {
+            Log.wtf(TAG, "Failed to read stats with legacy method for recorder "
+                    + legacyRecorder.getCookie(), e);
+            // Cannot read data from legacy method, skip comparison.
+            return false;
+        }
+
+        // The result of comparison is only for logging.
+        try {
+            final String error = compareStats(migration.collection, legacyStats);
+            if (error != null) {
+                Log.wtf(TAG, "Unexpected comparison result for recorder "
+                        + legacyRecorder.getCookie() + ": " + error);
+            }
+        } catch (Throwable e) {
+            Log.wtf(TAG, "Failed to compare migrated stats with legacy stats for recorder "
+                    + legacyRecorder.getCookie(), e);
+        }
+        return true;
+    }
+
     private static String str(NetworkStatsCollection.Key key) {
         StringBuilder sb = new StringBuilder()
                 .append(key.ident.toString())
diff --git a/service/ServiceConnectivityResources/res/values/config.xml b/service/ServiceConnectivityResources/res/values/config.xml
index 81782f9..bff6953 100644
--- a/service/ServiceConnectivityResources/res/values/config.xml
+++ b/service/ServiceConnectivityResources/res/values/config.xml
@@ -179,4 +179,13 @@
     Only supported up to S. On T+, the Wi-Fi code should use unregisterAfterReplacement in order
     to ensure that apps see the network disconnect and reconnect. -->
     <integer translatable="false" name="config_validationFailureAfterRoamIgnoreTimeMillis">-1</integer>
+
+    <!-- Whether the network stats service should run compare on the result of
+    {@link NetworkStatsDataMigrationUtils#readPlatformCollection} and the result
+    of reading from legacy recorders. Possible values are:
+      0 = never compare,
+      1 = always compare,
+      2 = compare on debuggable builds (default value)
+      -->
+    <integer translatable="false" name="config_netstats_validate_import">2</integer>
 </resources>
diff --git a/service/ServiceConnectivityResources/res/values/overlayable.xml b/service/ServiceConnectivityResources/res/values/overlayable.xml
index b92dd08..3389d63 100644
--- a/service/ServiceConnectivityResources/res/values/overlayable.xml
+++ b/service/ServiceConnectivityResources/res/values/overlayable.xml
@@ -41,6 +41,7 @@
             <item type="array" name="config_ethernet_interfaces"/>
             <item type="string" name="config_ethernet_iface_regex"/>
             <item type="integer" name="config_validationFailureAfterRoamIgnoreTimeMillis" />
+            <item type="integer" name="config_netstats_validate_import" />
         </policy>
     </overlayable>
 </resources>
diff --git a/tests/unit/java/com/android/server/net/NetworkStatsServiceTest.java b/tests/unit/java/com/android/server/net/NetworkStatsServiceTest.java
index b1d44ea..527dec3 100644
--- a/tests/unit/java/com/android/server/net/NetworkStatsServiceTest.java
+++ b/tests/unit/java/com/android/server/net/NetworkStatsServiceTest.java
@@ -95,13 +95,16 @@
 import android.app.AlarmManager;
 import android.content.Context;
 import android.content.Intent;
+import android.content.res.Resources;
 import android.database.ContentObserver;
+import android.net.ConnectivityResources;
 import android.net.DataUsageRequest;
 import android.net.INetd;
 import android.net.INetworkStatsSession;
 import android.net.LinkProperties;
 import android.net.Network;
 import android.net.NetworkCapabilities;
+import android.net.NetworkIdentity;
 import android.net.NetworkStateSnapshot;
 import android.net.NetworkStats;
 import android.net.NetworkStatsCollection;
@@ -128,6 +131,7 @@
 import androidx.test.InstrumentationRegistry;
 import androidx.test.filters.SmallTest;
 
+import com.android.connectivity.resources.R;
 import com.android.internal.util.FileRotator;
 import com.android.internal.util.test.BroadcastInterceptingContext;
 import com.android.net.module.util.IBpfMap;
@@ -154,6 +158,7 @@
 import java.time.temporal.ChronoUnit;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Set;
 import java.util.concurrent.Executor;
 import java.util.concurrent.atomic.AtomicBoolean;
 
@@ -247,6 +252,8 @@
     private @Mock PersistentInt mImportLegacyAttemptsCounter;
     private @Mock PersistentInt mImportLegacySuccessesCounter;
     private @Mock PersistentInt mImportLegacyFallbacksCounter;
+    private @Mock Resources mResources;
+    private Boolean mIsDebuggable;
 
     private class MockContext extends BroadcastInterceptingContext {
         private final Context mBaseContext;
@@ -307,6 +314,12 @@
     @Before
     public void setUp() throws Exception {
         MockitoAnnotations.initMocks(this);
+
+        // Setup mock resources.
+        final Context mockResContext = mock(Context.class);
+        doReturn(mResources).when(mockResContext).getResources();
+        ConnectivityResources.setResourcesContextForTest(mockResContext);
+
         final Context context = InstrumentationRegistry.getContext();
         mServiceContext = new MockContext(context);
         when(mLocationPermissionChecker.checkCallersLocationPermission(
@@ -462,6 +475,11 @@
             public IBpfMap<UidStatsMapKey, StatsMapValue> getAppUidStatsMap() {
                 return mAppUidStatsMap;
             }
+
+            @Override
+            public boolean isDebuggable() {
+                return mIsDebuggable == Boolean.TRUE;
+            }
         };
     }
 
@@ -1898,6 +1916,99 @@
         //  will decrease the retry counter by 1.
     }
 
+    @Test
+    public void testDataMigration_differentFromFallback() throws Exception {
+        assertStatsFilesExist(false);
+        expectDefaultSettings();
+
+        NetworkStateSnapshot[] states = new NetworkStateSnapshot[]{buildWifiState()};
+
+        mService.notifyNetworkStatus(NETWORKS_WIFI, states, getActiveIface(states),
+                new UnderlyingNetworkInfo[0]);
+
+        // modify some number on wifi, and trigger poll event
+        incrementCurrentTime(HOUR_IN_MILLIS);
+        expectNetworkStatsSummary(new NetworkStats(getElapsedRealtime(), 1)
+                .insertEntry(TEST_IFACE, 1024L, 8L, 2048L, 16L));
+        expectNetworkStatsUidDetail(new NetworkStats(getElapsedRealtime(), 1)
+                .insertEntry(TEST_IFACE, UID_BLUE, SET_DEFAULT, TAG_NONE, 128L, 1L, 128L, 1L, 0L));
+        forcePollAndWaitForIdle();
+        // Simulate shutdown to force persisting data
+        mServiceContext.sendBroadcast(new Intent(Intent.ACTION_SHUTDOWN));
+        assertStatsFilesExist(true);
+
+        // Move the files to the legacy directory to simulate an import from old data
+        for (File f : mStatsDir.listFiles()) {
+            Files.move(f.toPath(), mLegacyStatsDir.toPath().resolve(f.getName()));
+        }
+        assertStatsFilesExist(false);
+
+        // Prepare some unexpected data.
+        final NetworkIdentity testWifiIdent = new NetworkIdentity.Builder().setType(TYPE_WIFI)
+                .setWifiNetworkKey(TEST_WIFI_NETWORK_KEY).build();
+        final NetworkStatsCollection.Key unexpectedUidAllkey = new NetworkStatsCollection.Key(
+                Set.of(testWifiIdent), UID_ALL, SET_DEFAULT, 0);
+        final NetworkStatsCollection.Key unexpectedUidBluekey = new NetworkStatsCollection.Key(
+                Set.of(testWifiIdent), UID_BLUE, SET_DEFAULT, 0);
+        final NetworkStatsHistory unexpectedHistory = new NetworkStatsHistory
+                .Builder(965L /* bucketDuration */, 1)
+                .addEntry(new NetworkStatsHistory.Entry(TEST_START, 3L, 55L, 4L, 31L, 10L, 5L))
+                .build();
+
+        // Simulate the platform stats collection somehow is different from what is read from
+        // the fallback method. The service should read them as is. This usually happens when an
+        // OEM has changed the implementation of NetworkStatsDataMigrationUtils inside the platform.
+        final NetworkStatsCollection summaryCollection =
+                getLegacyCollection(PREFIX_XT, false /* includeTags */);
+        summaryCollection.recordHistory(unexpectedUidAllkey, unexpectedHistory);
+        final NetworkStatsCollection uidCollection =
+                getLegacyCollection(PREFIX_UID, false /* includeTags */);
+        uidCollection.recordHistory(unexpectedUidBluekey, unexpectedHistory);
+        mPlatformNetworkStatsCollection.put(PREFIX_DEV, summaryCollection);
+        mPlatformNetworkStatsCollection.put(PREFIX_XT, summaryCollection);
+        mPlatformNetworkStatsCollection.put(PREFIX_UID, uidCollection);
+        mPlatformNetworkStatsCollection.put(PREFIX_UID_TAG,
+                getLegacyCollection(PREFIX_UID_TAG, true /* includeTags */));
+
+        // Mock zero usage and boot through serviceReady(), verify there is no imported data.
+        expectDefaultSettings();
+        expectNetworkStatsUidDetail(buildEmptyStats());
+        expectSystemReady();
+        mService.systemReady();
+        assertStatsFilesExist(false);
+
+        // Set the flag and reboot, verify the imported data is not there until next boot.
+        mStoreFilesInApexData = true;
+        mImportLegacyTargetAttempts = 3;
+        mServiceContext.sendBroadcast(new Intent(Intent.ACTION_SHUTDOWN));
+        assertStatsFilesExist(false);
+
+        // Boot through systemReady() again.
+        expectDefaultSettings();
+        expectNetworkStatsUidDetail(buildEmptyStats());
+        expectSystemReady();
+        mService.systemReady();
+
+        // Verify the result read from public API matches the result returned from the importer.
+        assertNetworkTotal(sTemplateWifi, 1024L + 55L, 8L + 4L, 2048L + 31L, 16L + 10L, 0 + 5);
+        assertUidTotal(sTemplateWifi, UID_BLUE,
+                128L + 55L, 1L + 4L, 128L + 31L, 1L + 10L, 0 + 5);
+        assertStatsFilesExist(true);
+        verify(mImportLegacyAttemptsCounter).set(3);
+        verify(mImportLegacySuccessesCounter).set(1);
+    }
+
+    @Test
+    public void testShouldRunComparison() {
+        // TODO(b/233752318): For now it should always true to collect signal from beta users.
+        //  Should change to the default behavior (true if userdebug rom) before formal release.
+        for (int testValue : Set.of(-1, 0, 1, 2)) {
+            doReturn(testValue).when(mResources)
+                    .getInteger(R.integer.config_netstats_validate_import);
+            assertEquals(true, mService.shouldRunComparison());
+        }
+    }
+
     private NetworkStatsRecorder makeTestRecorder(File directory, String prefix, Config config,
             boolean includeTags) {
         final NetworkStats.NonMonotonicObserver observer =