Disable fallback when comparison result is different

Follow-up from ag/18452103, where we add fallback code that runs
with the importer to make sure they are identical.
When the result is different, we'll take the result from fallback
code to minimize the rollout risk. However, since the OEMs might
change the importer implementation. The fallback code would no
longer valid and that makes OEM modified code not working. Hence
the fallback code must be disabled before release.

This change keeps comparison enabled for all cases to keep getting
signals from beta users. And will switch it to read overlay value
for OEM to debug their solution.

Ignore-AOSP-First: Parent CLs are not in aosp yet
Test: 1. NetworkStatsServiceTest
      2. Test all datasets with script
Bug: 233752318
Change-Id: I869ff05297149bde6e13a204bd8c5a4fece75de0
diff --git a/service-t/src/com/android/server/net/NetworkStatsService.java b/service-t/src/com/android/server/net/NetworkStatsService.java
index 42a108f..b37f93d 100644
--- a/service-t/src/com/android/server/net/NetworkStatsService.java
+++ b/service-t/src/com/android/server/net/NetworkStatsService.java
@@ -76,8 +76,10 @@
 import android.content.IntentFilter;
 import android.content.pm.ApplicationInfo;
 import android.content.pm.PackageManager;
+import android.content.res.Resources;
 import android.database.ContentObserver;
 import android.net.ConnectivityManager;
+import android.net.ConnectivityResources;
 import android.net.DataUsageRequest;
 import android.net.INetd;
 import android.net.INetworkStatsService;
@@ -140,6 +142,7 @@
 import android.util.SparseIntArray;
 import android.util.proto.ProtoOutputStream;
 
+import com.android.connectivity.resources.R;
 import com.android.internal.annotations.GuardedBy;
 import com.android.internal.annotations.VisibleForTesting;
 import com.android.internal.util.FileRotator;
@@ -765,6 +768,11 @@
                 return null;
             }
         }
+
+        /** Gets whether the build is userdebug. */
+        public boolean isDebuggable() {
+            return Build.isDebuggable();
+        }
     }
 
     /**
@@ -927,18 +935,27 @@
         final int targetAttempts = mDeps.getImportLegacyTargetAttempts();
         final int attempts;
         final int fallbacks;
+        final boolean runComparison;
         try {
             attempts = mImportLegacyAttemptsCounter.get();
+            // Fallbacks counter would be set to non-zero value to indicate the migration was
+            // not successful.
             fallbacks = mImportLegacyFallbacksCounter.get();
+            runComparison = shouldRunComparison();
         } catch (IOException e) {
             Log.wtf(TAG, "Failed to read counters, skip.", e);
             return;
         }
-        // If fallbacks is not zero, proceed with reading only to give signals from dogfooders.
-        // TODO(b/233752318): Remove fallbacks counter check before T formal release.
-        if (attempts >= targetAttempts && fallbacks == 0) return;
 
-        final boolean dryRunImportOnly = (attempts >= targetAttempts);
+        // If the target number of attempts are reached, don't import any data.
+        // However, if comparison is requested, still read the legacy data and compare
+        // it to the importer output. This allows OEMs to debug issues with the
+        // importer code and to collect signals from the field.
+        final boolean dryRunImportOnly =
+                fallbacks != 0 && runComparison && (attempts >= targetAttempts);
+        // Return if target attempts are reached and there is no need to dry run.
+        if (attempts >= targetAttempts && !dryRunImportOnly) return;
+
         if (dryRunImportOnly) {
             Log.i(TAG, "Starting import : only perform read");
         } else {
@@ -951,69 +968,54 @@
         };
 
         // Legacy directories will be created by recorders if they do not exist
-        final File legacyBaseDir = mDeps.getLegacyStatsDir();
-        final NetworkStatsRecorder[] legacyRecorders = new NetworkStatsRecorder[]{
-                buildRecorder(PREFIX_DEV, mSettings.getDevConfig(), false, legacyBaseDir),
-                buildRecorder(PREFIX_XT, mSettings.getXtConfig(), false, legacyBaseDir),
-                buildRecorder(PREFIX_UID, mSettings.getUidConfig(), false, legacyBaseDir),
-                buildRecorder(PREFIX_UID_TAG, mSettings.getUidTagConfig(), true, legacyBaseDir)
-        };
+        final NetworkStatsRecorder[] legacyRecorders;
+        if (runComparison) {
+            final File legacyBaseDir = mDeps.getLegacyStatsDir();
+            legacyRecorders = new NetworkStatsRecorder[]{
+                    buildRecorder(PREFIX_DEV, mSettings.getDevConfig(), false, legacyBaseDir),
+                    buildRecorder(PREFIX_XT, mSettings.getXtConfig(), false, legacyBaseDir),
+                    buildRecorder(PREFIX_UID, mSettings.getUidConfig(), false, legacyBaseDir),
+                    buildRecorder(PREFIX_UID_TAG, mSettings.getUidTagConfig(), true, legacyBaseDir)
+            };
+        } else {
+            legacyRecorders = null;
+        }
 
         long migrationEndTime = Long.MIN_VALUE;
-        boolean endedWithFallback = false;
         try {
             // First, read all legacy collections. This is OEM code and it can throw. Don't
             // commit any data to disk until all are read.
             for (int i = 0; i < migrations.length; i++) {
-                String errMsg = null;
-                Throwable exception = null;
                 final MigrationInfo migration = migrations[i];
 
-                // Read the collection from platform code, and using fallback method if throws.
+                // Read the collection from platform code, and set fallbacks counter if throws
+                // for better debugging.
                 try {
                     migration.collection = readPlatformCollectionForRecorder(migration.recorder);
                 } catch (Throwable e) {
-                    errMsg = "Failed to read stats from platform";
-                    exception = e;
-                }
-
-                // Also read the collection with legacy method
-                final NetworkStatsRecorder legacyRecorder = legacyRecorders[i];
-
-                final NetworkStatsCollection legacyStats;
-                try {
-                    legacyStats = legacyRecorder.getOrLoadCompleteLocked();
-                } catch (Throwable e) {
-                    Log.wtf(TAG, "Failed to read stats with legacy method for recorder " + i, e);
-                    if (exception != null) {
-                        throw exception;
+                    if (dryRunImportOnly) {
+                        Log.wtf(TAG, "Platform data read failed. ", e);
+                        return;
                     } else {
-                        // Use newer stats, since that's all that is available
-                        continue;
+                        // Data is not imported successfully, set fallbacks counter to non-zero
+                        // value to trigger dry run every later boot when the runComparison is
+                        // true, in order to make it easier to debug issues.
+                        tryIncrementLegacyFallbacksCounter();
+                        // Re-throw for error handling. This will increase attempts counter.
+                        throw e;
                     }
                 }
 
-                if (errMsg == null) {
-                    try {
-                        errMsg = compareStats(migration.collection, legacyStats);
-                    } catch (Throwable e) {
-                        errMsg = "Failed to compare migrated stats with all stats";
-                        exception = e;
+                if (runComparison) {
+                    final boolean success =
+                            compareImportedToLegacyStats(migration, legacyRecorders[i]);
+                    if (!success && !dryRunImportOnly) {
+                        tryIncrementLegacyFallbacksCounter();
                     }
                 }
-
-                if (errMsg != null) {
-                    Log.wtf(TAG, "NetworkStats import for migration " + i
-                            + " returned invalid data: " + errMsg, exception);
-                    // Fall back to legacy stats for this boot. The stats for old data will be
-                    // re-imported again on next boot until they succeed the import. This is fine
-                    // since every import clears the previous stats for the imported timespan.
-                    migration.collection = legacyStats;
-                    endedWithFallback = true;
-                }
             }
 
-            // For cases where the fallbacks is not zero but target attempts counts reached,
+            // For cases where the fallbacks are not zero but target attempts counts reached,
             // only perform reads above and return here.
             if (dryRunImportOnly) return;
 
@@ -1079,22 +1081,78 @@
         // Success ! No need to import again next time.
         try {
             mImportLegacyAttemptsCounter.set(targetAttempts);
-            if (endedWithFallback) {
-                Log.wtf(TAG, "Imported platform collections with legacy fallback");
-                final int fallbacksCount = mImportLegacyFallbacksCounter.get();
-                mImportLegacyFallbacksCounter.set(fallbacksCount + 1);
-            } else {
-                Log.i(TAG, "Successfully imported platform collections");
-                // The successes counter is only for debugging. Hence, the synchronization
-                // between successes counter and attempts counter are not very critical.
-                final int successCount = mImportLegacySuccessesCounter.get();
-                mImportLegacySuccessesCounter.set(successCount + 1);
-            }
+            Log.i(TAG, "Successfully imported platform collections");
+            // The successes counter is only for debugging. Hence, the synchronization
+            // between successes counter and attempts counter are not very critical.
+            final int successCount = mImportLegacySuccessesCounter.get();
+            mImportLegacySuccessesCounter.set(successCount + 1);
         } catch (IOException e) {
             Log.wtf(TAG, "Succeed but failed to update counters.", e);
         }
     }
 
+    void tryIncrementLegacyFallbacksCounter() {
+        try {
+            final int fallbacks = mImportLegacyFallbacksCounter.get();
+            mImportLegacyFallbacksCounter.set(fallbacks + 1);
+        } catch (IOException e) {
+            Log.wtf(TAG, "Failed to update fallback counter.", e);
+        }
+    }
+
+    @VisibleForTesting
+    boolean shouldRunComparison() {
+        final ConnectivityResources resources = new ConnectivityResources(mContext);
+        // 0 if id not found.
+        Boolean overlayValue = null;
+        try {
+            switch (resources.get().getInteger(R.integer.config_netstats_validate_import)) {
+                case 1:
+                    overlayValue = Boolean.TRUE;
+                    break;
+                case 0:
+                    overlayValue = Boolean.FALSE;
+                    break;
+            }
+        } catch (Resources.NotFoundException e) {
+            // Overlay value is not defined.
+        }
+        // TODO(b/233752318): For now it is always true to collect signal from beta users.
+        //  Should change to the default behavior (true if debuggable builds) before formal release.
+        return (overlayValue != null ? overlayValue : mDeps.isDebuggable()) || true;
+    }
+
+    /**
+     * Compare imported data with the data returned by legacy recorders.
+     *
+     * @return true if the data matches, false if the data does not match or throw with exceptions.
+     */
+    private boolean compareImportedToLegacyStats(@NonNull MigrationInfo migration,
+            @NonNull NetworkStatsRecorder legacyRecorder) {
+        final NetworkStatsCollection legacyStats;
+        try {
+            legacyStats = legacyRecorder.getOrLoadCompleteLocked();
+        } catch (Throwable e) {
+            Log.wtf(TAG, "Failed to read stats with legacy method for recorder "
+                    + legacyRecorder.getCookie(), e);
+            // Cannot read data from legacy method, skip comparison.
+            return false;
+        }
+
+        // The result of comparison is only for logging.
+        try {
+            final String error = compareStats(migration.collection, legacyStats);
+            if (error != null) {
+                Log.wtf(TAG, "Unexpected comparison result for recorder "
+                        + legacyRecorder.getCookie() + ": " + error);
+            }
+        } catch (Throwable e) {
+            Log.wtf(TAG, "Failed to compare migrated stats with legacy stats for recorder "
+                    + legacyRecorder.getCookie(), e);
+        }
+        return true;
+    }
+
     private static String str(NetworkStatsCollection.Key key) {
         StringBuilder sb = new StringBuilder()
                 .append(key.ident.toString())
diff --git a/service/ServiceConnectivityResources/res/values/config.xml b/service/ServiceConnectivityResources/res/values/config.xml
index 81782f9..bff6953 100644
--- a/service/ServiceConnectivityResources/res/values/config.xml
+++ b/service/ServiceConnectivityResources/res/values/config.xml
@@ -179,4 +179,13 @@
     Only supported up to S. On T+, the Wi-Fi code should use unregisterAfterReplacement in order
     to ensure that apps see the network disconnect and reconnect. -->
     <integer translatable="false" name="config_validationFailureAfterRoamIgnoreTimeMillis">-1</integer>
+
+    <!-- Whether the network stats service should run compare on the result of
+    {@link NetworkStatsDataMigrationUtils#readPlatformCollection} and the result
+    of reading from legacy recorders. Possible values are:
+      0 = never compare,
+      1 = always compare,
+      2 = compare on debuggable builds (default value)
+      -->
+    <integer translatable="false" name="config_netstats_validate_import">2</integer>
 </resources>
diff --git a/service/ServiceConnectivityResources/res/values/overlayable.xml b/service/ServiceConnectivityResources/res/values/overlayable.xml
index b92dd08..3389d63 100644
--- a/service/ServiceConnectivityResources/res/values/overlayable.xml
+++ b/service/ServiceConnectivityResources/res/values/overlayable.xml
@@ -41,6 +41,7 @@
             <item type="array" name="config_ethernet_interfaces"/>
             <item type="string" name="config_ethernet_iface_regex"/>
             <item type="integer" name="config_validationFailureAfterRoamIgnoreTimeMillis" />
+            <item type="integer" name="config_netstats_validate_import" />
         </policy>
     </overlayable>
 </resources>
diff --git a/tests/unit/java/com/android/server/net/NetworkStatsServiceTest.java b/tests/unit/java/com/android/server/net/NetworkStatsServiceTest.java
index b1d44ea..527dec3 100644
--- a/tests/unit/java/com/android/server/net/NetworkStatsServiceTest.java
+++ b/tests/unit/java/com/android/server/net/NetworkStatsServiceTest.java
@@ -95,13 +95,16 @@
 import android.app.AlarmManager;
 import android.content.Context;
 import android.content.Intent;
+import android.content.res.Resources;
 import android.database.ContentObserver;
+import android.net.ConnectivityResources;
 import android.net.DataUsageRequest;
 import android.net.INetd;
 import android.net.INetworkStatsSession;
 import android.net.LinkProperties;
 import android.net.Network;
 import android.net.NetworkCapabilities;
+import android.net.NetworkIdentity;
 import android.net.NetworkStateSnapshot;
 import android.net.NetworkStats;
 import android.net.NetworkStatsCollection;
@@ -128,6 +131,7 @@
 import androidx.test.InstrumentationRegistry;
 import androidx.test.filters.SmallTest;
 
+import com.android.connectivity.resources.R;
 import com.android.internal.util.FileRotator;
 import com.android.internal.util.test.BroadcastInterceptingContext;
 import com.android.net.module.util.IBpfMap;
@@ -154,6 +158,7 @@
 import java.time.temporal.ChronoUnit;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Set;
 import java.util.concurrent.Executor;
 import java.util.concurrent.atomic.AtomicBoolean;
 
@@ -247,6 +252,8 @@
     private @Mock PersistentInt mImportLegacyAttemptsCounter;
     private @Mock PersistentInt mImportLegacySuccessesCounter;
     private @Mock PersistentInt mImportLegacyFallbacksCounter;
+    private @Mock Resources mResources;
+    private Boolean mIsDebuggable;
 
     private class MockContext extends BroadcastInterceptingContext {
         private final Context mBaseContext;
@@ -307,6 +314,12 @@
     @Before
     public void setUp() throws Exception {
         MockitoAnnotations.initMocks(this);
+
+        // Setup mock resources.
+        final Context mockResContext = mock(Context.class);
+        doReturn(mResources).when(mockResContext).getResources();
+        ConnectivityResources.setResourcesContextForTest(mockResContext);
+
         final Context context = InstrumentationRegistry.getContext();
         mServiceContext = new MockContext(context);
         when(mLocationPermissionChecker.checkCallersLocationPermission(
@@ -462,6 +475,11 @@
             public IBpfMap<UidStatsMapKey, StatsMapValue> getAppUidStatsMap() {
                 return mAppUidStatsMap;
             }
+
+            @Override
+            public boolean isDebuggable() {
+                return mIsDebuggable == Boolean.TRUE;
+            }
         };
     }
 
@@ -1898,6 +1916,99 @@
         //  will decrease the retry counter by 1.
     }
 
+    @Test
+    public void testDataMigration_differentFromFallback() throws Exception {
+        assertStatsFilesExist(false);
+        expectDefaultSettings();
+
+        NetworkStateSnapshot[] states = new NetworkStateSnapshot[]{buildWifiState()};
+
+        mService.notifyNetworkStatus(NETWORKS_WIFI, states, getActiveIface(states),
+                new UnderlyingNetworkInfo[0]);
+
+        // modify some number on wifi, and trigger poll event
+        incrementCurrentTime(HOUR_IN_MILLIS);
+        expectNetworkStatsSummary(new NetworkStats(getElapsedRealtime(), 1)
+                .insertEntry(TEST_IFACE, 1024L, 8L, 2048L, 16L));
+        expectNetworkStatsUidDetail(new NetworkStats(getElapsedRealtime(), 1)
+                .insertEntry(TEST_IFACE, UID_BLUE, SET_DEFAULT, TAG_NONE, 128L, 1L, 128L, 1L, 0L));
+        forcePollAndWaitForIdle();
+        // Simulate shutdown to force persisting data
+        mServiceContext.sendBroadcast(new Intent(Intent.ACTION_SHUTDOWN));
+        assertStatsFilesExist(true);
+
+        // Move the files to the legacy directory to simulate an import from old data
+        for (File f : mStatsDir.listFiles()) {
+            Files.move(f.toPath(), mLegacyStatsDir.toPath().resolve(f.getName()));
+        }
+        assertStatsFilesExist(false);
+
+        // Prepare some unexpected data.
+        final NetworkIdentity testWifiIdent = new NetworkIdentity.Builder().setType(TYPE_WIFI)
+                .setWifiNetworkKey(TEST_WIFI_NETWORK_KEY).build();
+        final NetworkStatsCollection.Key unexpectedUidAllkey = new NetworkStatsCollection.Key(
+                Set.of(testWifiIdent), UID_ALL, SET_DEFAULT, 0);
+        final NetworkStatsCollection.Key unexpectedUidBluekey = new NetworkStatsCollection.Key(
+                Set.of(testWifiIdent), UID_BLUE, SET_DEFAULT, 0);
+        final NetworkStatsHistory unexpectedHistory = new NetworkStatsHistory
+                .Builder(965L /* bucketDuration */, 1)
+                .addEntry(new NetworkStatsHistory.Entry(TEST_START, 3L, 55L, 4L, 31L, 10L, 5L))
+                .build();
+
+        // Simulate the platform stats collection somehow is different from what is read from
+        // the fallback method. The service should read them as is. This usually happens when an
+        // OEM has changed the implementation of NetworkStatsDataMigrationUtils inside the platform.
+        final NetworkStatsCollection summaryCollection =
+                getLegacyCollection(PREFIX_XT, false /* includeTags */);
+        summaryCollection.recordHistory(unexpectedUidAllkey, unexpectedHistory);
+        final NetworkStatsCollection uidCollection =
+                getLegacyCollection(PREFIX_UID, false /* includeTags */);
+        uidCollection.recordHistory(unexpectedUidBluekey, unexpectedHistory);
+        mPlatformNetworkStatsCollection.put(PREFIX_DEV, summaryCollection);
+        mPlatformNetworkStatsCollection.put(PREFIX_XT, summaryCollection);
+        mPlatformNetworkStatsCollection.put(PREFIX_UID, uidCollection);
+        mPlatformNetworkStatsCollection.put(PREFIX_UID_TAG,
+                getLegacyCollection(PREFIX_UID_TAG, true /* includeTags */));
+
+        // Mock zero usage and boot through serviceReady(), verify there is no imported data.
+        expectDefaultSettings();
+        expectNetworkStatsUidDetail(buildEmptyStats());
+        expectSystemReady();
+        mService.systemReady();
+        assertStatsFilesExist(false);
+
+        // Set the flag and reboot, verify the imported data is not there until next boot.
+        mStoreFilesInApexData = true;
+        mImportLegacyTargetAttempts = 3;
+        mServiceContext.sendBroadcast(new Intent(Intent.ACTION_SHUTDOWN));
+        assertStatsFilesExist(false);
+
+        // Boot through systemReady() again.
+        expectDefaultSettings();
+        expectNetworkStatsUidDetail(buildEmptyStats());
+        expectSystemReady();
+        mService.systemReady();
+
+        // Verify the result read from public API matches the result returned from the importer.
+        assertNetworkTotal(sTemplateWifi, 1024L + 55L, 8L + 4L, 2048L + 31L, 16L + 10L, 0 + 5);
+        assertUidTotal(sTemplateWifi, UID_BLUE,
+                128L + 55L, 1L + 4L, 128L + 31L, 1L + 10L, 0 + 5);
+        assertStatsFilesExist(true);
+        verify(mImportLegacyAttemptsCounter).set(3);
+        verify(mImportLegacySuccessesCounter).set(1);
+    }
+
+    @Test
+    public void testShouldRunComparison() {
+        // TODO(b/233752318): For now it should always true to collect signal from beta users.
+        //  Should change to the default behavior (true if userdebug rom) before formal release.
+        for (int testValue : Set.of(-1, 0, 1, 2)) {
+            doReturn(testValue).when(mResources)
+                    .getInteger(R.integer.config_netstats_validate_import);
+            assertEquals(true, mService.shouldRunComparison());
+        }
+    }
+
     private NetworkStatsRecorder makeTestRecorder(File directory, String prefix, Config config,
             boolean includeTags) {
         final NetworkStats.NonMonotonicObserver observer =