Fix a race condition in upstream selection.

Current upstream selection code suffers from a race where if the
CONNECTIVITY_ACTION broadcast for a given network switch is
received and processed before the NetworkCallbacks for that
network switch, upstream selection just re-selects the same
upstream it had before. The incorrect upstream persists until
another CONNECTIVITY_ACTION is received.

Fix this by defining a new EVENT_DEFAULT_SWITCHED message code
communicated from UpstreamNetworkMonitor to Tethering, and send
that whenever the default network switches.

The message is sent in onLinkPropertiesChanged, because the
tethering code stores all information about networks in an
UpstreamNetworkState structure that contains Network,
LinkProperties and NetworkCapabilities. When a network switch
occurs, onLinkPropertiesChanged always follows onAvailable and
onCapabilitiesChanged, and thus marks the first point in time
when all the information is available.

This CL tries not to change existing codepaths too much, but
it does move the update of mDefaultInternetNetwork from
onCapabilitiesChanged to onLinkPropertiesChanged. This should
not be a problem because the only thing that reads
mDefaultInternetNetwork is getCurrentPreferredUpstream, which,
in the case of a default network switch, will be run by the
onLinkPropertiesChanged which will immediately follow.

Bug: 173068192
Test: changes to existing unit tests show bug is fixed
Change-Id: Ic9196bc92892811b25bda463ffd839ee5c19d294
diff --git a/Tethering/src/com/android/networkstack/tethering/Tethering.java b/Tethering/src/com/android/networkstack/tethering/Tethering.java
index c6e8fd6..f795747 100644
--- a/Tethering/src/com/android/networkstack/tethering/Tethering.java
+++ b/Tethering/src/com/android/networkstack/tethering/Tethering.java
@@ -1719,6 +1719,12 @@
                     break;
             }
 
+            if (mConfig.chooseUpstreamAutomatically
+                    && arg1 == UpstreamNetworkMonitor.EVENT_DEFAULT_SWITCHED) {
+                chooseUpstreamType(true);
+                return;
+            }
+
             if (ns == null || !pertainsToCurrentUpstream(ns)) {
                 // TODO: In future, this is where upstream evaluation and selection
                 // could be handled for notifications which include sufficient data.
diff --git a/Tethering/src/com/android/networkstack/tethering/UpstreamNetworkMonitor.java b/Tethering/src/com/android/networkstack/tethering/UpstreamNetworkMonitor.java
index 513f07c..e39145b 100644
--- a/Tethering/src/com/android/networkstack/tethering/UpstreamNetworkMonitor.java
+++ b/Tethering/src/com/android/networkstack/tethering/UpstreamNetworkMonitor.java
@@ -42,11 +42,14 @@
 import android.util.Log;
 import android.util.SparseIntArray;
 
+import androidx.annotation.NonNull;
+
 import com.android.internal.annotations.VisibleForTesting;
 import com.android.internal.util.StateMachine;
 
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Objects;
 import java.util.Set;
 
 
@@ -82,6 +85,7 @@
     public static final int EVENT_ON_CAPABILITIES   = 1;
     public static final int EVENT_ON_LINKPROPERTIES = 2;
     public static final int EVENT_ON_LOST           = 3;
+    public static final int EVENT_DEFAULT_SWITCHED  = 4;
     public static final int NOTIFY_LOCAL_PREFIXES   = 10;
     // This value is used by deprecated preferredUpstreamIfaceTypes selection which is default
     // disabled.
@@ -398,7 +402,7 @@
         notifyTarget(EVENT_ON_CAPABILITIES, network);
     }
 
-    private void handleLinkProp(Network network, LinkProperties newLp) {
+    private void updateLinkProperties(Network network, LinkProperties newLp) {
         final UpstreamNetworkState prev = mNetworkMap.get(network);
         if (prev == null || newLp.equals(prev.linkProperties)) {
             // Ignore notifications about networks for which we have not yet
@@ -414,8 +418,10 @@
 
         mNetworkMap.put(network, new UpstreamNetworkState(
                 newLp, prev.networkCapabilities, network));
-        // TODO: If sufficient information is available to select a more
-        // preferable upstream, do so now and notify the target.
+    }
+
+    private void handleLinkProp(Network network, LinkProperties newLp) {
+        updateLinkProperties(network, newLp);
         notifyTarget(EVENT_ON_LINKPROPERTIES, network);
     }
 
@@ -445,6 +451,24 @@
         notifyTarget(EVENT_ON_LOST, mNetworkMap.remove(network));
     }
 
+    private void maybeHandleNetworkSwitch(@NonNull Network network) {
+        if (Objects.equals(mDefaultInternetNetwork, network)) return;
+
+        final UpstreamNetworkState ns = mNetworkMap.get(network);
+        if (ns == null) {
+            // Can never happen unless there is a bug in ConnectivityService. Entries are only
+            // removed from mNetworkMap when receiving onLost, and onLost for a given network can
+            // never be followed by any other callback on that network.
+            Log.wtf(TAG, "maybeHandleNetworkSwitch: no UpstreamNetworkState for " + network);
+            return;
+        }
+
+        // Default network changed. Update local data and notify tethering.
+        Log.d(TAG, "New default Internet network: " + network);
+        mDefaultInternetNetwork = network;
+        notifyTarget(EVENT_DEFAULT_SWITCHED, ns);
+    }
+
     private void recomputeLocalPrefixes() {
         final HashSet<IpPrefix> localPrefixes = allLocalPrefixes(mNetworkMap.values());
         if (!mLocalPrefixes.equals(localPrefixes)) {
@@ -482,7 +506,22 @@
         @Override
         public void onCapabilitiesChanged(Network network, NetworkCapabilities newNc) {
             if (mCallbackType == CALLBACK_DEFAULT_INTERNET) {
-                mDefaultInternetNetwork = network;
+                // mDefaultInternetNetwork is not updated here because upstream selection must only
+                // run when the LinkProperties have been updated as well as the capabilities. If
+                // this callback is due to a default network switch, then the system will invoke
+                // onLinkPropertiesChanged right after this method and mDefaultInternetNetwork will
+                // be updated then.
+                //
+                // Technically, not updating here isn't necessary, because the notifications to
+                // Tethering sent by notifyTarget are messages sent to a state machine running on
+                // the same thread as this method, and so cannot arrive until after this method has
+                // returned. However, it is not a good idea to rely on that because fact that
+                // Tethering uses multiple state machines running on the same thread is a major
+                // source of race conditions and something that should be fixed.
+                //
+                // TODO: is it correct that this code always updates EntitlementManager?
+                // This code runs when the default network connects or changes capabilities, but the
+                // default network might not be the tethering upstream.
                 final boolean newIsCellular = isCellular(newNc);
                 if (mIsDefaultCellularUpstream != newIsCellular) {
                     mIsDefaultCellularUpstream = newIsCellular;
@@ -496,7 +535,15 @@
 
         @Override
         public void onLinkPropertiesChanged(Network network, LinkProperties newLp) {
-            if (mCallbackType == CALLBACK_DEFAULT_INTERNET) return;
+            if (mCallbackType == CALLBACK_DEFAULT_INTERNET) {
+                updateLinkProperties(network, newLp);
+                // When the default network callback calls onLinkPropertiesChanged, it means that
+                // all the network information for the default network is known (because
+                // onLinkPropertiesChanged is called after onAvailable and onCapabilitiesChanged).
+                // Inform tethering that the default network might have changed.
+                maybeHandleNetworkSwitch(network);
+                return;
+            }
 
             handleLinkProp(network, newLp);
             // Any non-LISTEN_ALL callback will necessarily concern a network that will
@@ -513,6 +560,8 @@
                 mDefaultInternetNetwork = null;
                 mIsDefaultCellularUpstream = false;
                 mEntitlementMgr.notifyUpstream(false);
+                Log.d(TAG, "Lost default Internet network: " + network);
+                notifyTarget(EVENT_DEFAULT_SWITCHED, null);
                 return;
             }
 
diff --git a/Tethering/tests/unit/src/com/android/networkstack/tethering/TetheringTest.java b/Tethering/tests/unit/src/com/android/networkstack/tethering/TetheringTest.java
index 3f847ff..d18d990 100644
--- a/Tethering/tests/unit/src/com/android/networkstack/tethering/TetheringTest.java
+++ b/Tethering/tests/unit/src/com/android/networkstack/tethering/TetheringTest.java
@@ -1120,37 +1120,26 @@
         mLooper.dispatchAll();
         inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(wifi.networkId);
 
+        // This code has historically been racy, so test different orderings of CONNECTIVITY_ACTION
+        // broadcasts and callbacks, and add mLooper.dispatchAll() calls between the two.
         final Runnable doDispatchAll = () -> mLooper.dispatchAll();
 
-        // There is a race where if Tethering and UpstreamNetworkMonitor process the
-        // CONNECTIVITY_ACTION before UpstreamNetworkMonitor gets onCapabilitiesChanged on
-        // CALLBACK_DEFAULT_INTERNET, the upstream does not change.
-        // Simulate this by telling TestConnectivityManager to call mLooper.dispatchAll() between
-        // sending the CONNECTIVITY_ACTION and sending the callbacks.
-        // The switch to wifi just above shows that if the CONNECTIVITY_ACTION and the callbacks
-        // happen close enough together that the tethering code sees both, the code behaves as
-        // expected.
         mCm.makeDefaultNetwork(mobile, BROADCAST_FIRST, doDispatchAll);
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);
 
         mCm.makeDefaultNetwork(wifi, BROADCAST_FIRST, doDispatchAll);
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(wifi.networkId);
 
-        // If the broadcast immediately follows the callbacks, the code behaves as expected.
-        // In this case nothing happens because the upstream is already set to cellular and
-        // remaining on cellular is correct.
         mCm.makeDefaultNetwork(mobile, CALLBACKS_FIRST);
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);
 
         mCm.makeDefaultNetwork(wifi, CALLBACKS_FIRST);
         mLooper.dispatchAll();
         inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(wifi.networkId);
 
-        // If the broadcast happens after the callbacks have been processed, the code also behaves
-        // correctly.
         mCm.makeDefaultNetwork(mobile, CALLBACKS_FIRST, doDispatchAll);
         mLooper.dispatchAll();
         inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);
@@ -1161,31 +1150,30 @@
         inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());
 
         // Lose and regain upstream.
-        // As above, if broadcasts are processed before the callbacks, upstream is not updated.
         assertTrue(mUpstreamNetworkMonitor.getCurrentPreferredUpstream().linkProperties
                 .hasIPv4Address());
         mCm.makeDefaultNetwork(null, BROADCAST_FIRST, doDispatchAll);
         mLooper.dispatchAll();
         mobile.fakeDisconnect();
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(null);
 
         mobile = new TestNetworkAgent(mCm, buildMobile464xlatUpstreamState());
         mobile.fakeConnect();
         mCm.makeDefaultNetwork(mobile, BROADCAST_FIRST, doDispatchAll);
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(null);  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(mobile.networkId);
 
         // Check the IP addresses to ensure that the upstream is indeed not the same as the previous
         // mobile upstream, even though the netId is (unrealistically) the same.
         assertFalse(mUpstreamNetworkMonitor.getCurrentPreferredUpstream().linkProperties
                 .hasIPv4Address());
 
-        // Lose and regain upstream again, but this time send events in an order that works.
+        // Lose and regain upstream again.
         mCm.makeDefaultNetwork(null, CALLBACKS_FIRST, doDispatchAll);
         mobile.fakeDisconnect();
         mLooper.dispatchAll();
-        inOrder.verify(mUpstreamNetworkMonitor, never()).setCurrentUpstream(any());  // BUG
+        inOrder.verify(mUpstreamNetworkMonitor).setCurrentUpstream(null);
 
         mobile = new TestNetworkAgent(mCm, buildMobileDualStackUpstreamState());
         mobile.fakeConnect();