Merge changes I8cd6e49b,Ibb52c7b7 * changes: [NFCT.TETHER.10] Add/delete IPv4 offload BPF rules to/from BPF map [NFCT.TETHER.9] Build IPv4 offload BPF rules for raw ip

commit: dc4189f0e4a55e29818991b4e937e5fe01a9be02 [log] [tgz]
author: Lorenzo Colitti <lorenzo@google.com> Tue Jan 26 00:30:15 2021 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Tue Jan 26 00:30:15 2021 +0000
tree: 5b7beb2ba270390fb26e24474e07be500e259c7f
parent: c2b014604595bdc6a0d443e6d48725425da7b099 [diff]
parent: dd833dee3102ace586f49c96ea7e8fa7249082f2 [diff]
diff --git a/Tethering/bpf_progs/offload.c b/Tethering/bpf_progs/offload.c
index cce94ee..44c25e5 100644
--- a/Tethering/bpf_progs/offload.c
+++ b/Tethering/bpf_progs/offload.c

@@ -20,10 +20,17 @@
 #include <linux/pkt_cls.h>
 #include <linux/tcp.h>
 
+// bionic kernel uapi linux/udp.h header is munged...
+#define __kernel_udphdr udphdr
+#include <linux/udp.h>
+
 #include "bpf_helpers.h"
 #include "bpf_net_helpers.h"
 #include "netdbpf/bpf_shared.h"
 
+// From kernel:include/net/ip.h
+#define IP_DF 0x4000  // Flag: "Don't Fragment"
+
 // Tethering stats, indexed by upstream interface.
 DEFINE_BPF_MAP_GRW(tether_stats_map, HASH, TetherStatsKey, TetherStatsValue, 16, AID_NETWORK_STACK)
 
@@ -42,7 +49,7 @@
 DEFINE_BPF_MAP_GRW(tether_upstream6_map, HASH, TetherUpstream6Key, TetherUpstream6Value, 64,
                    AID_NETWORK_STACK)
 
-static inline __always_inline int do_forward(struct __sk_buff* skb, const bool is_ethernet,
+static inline __always_inline int do_forward6(struct __sk_buff* skb, const bool is_ethernet,
         const bool downstream) {
     const int l2_header_size = is_ethernet ? sizeof(struct ethhdr) : 0;
     void* data = (void*)(long)skb->data;
@@ -50,6 +57,9 @@
     struct ethhdr* eth = is_ethernet ? data : NULL;  // used iff is_ethernet
     struct ipv6hdr* ip6 = is_ethernet ? (void*)(eth + 1) : data;
 
+    // Require ethernet dst mac address to be our unicast address.
+    if (is_ethernet && (skb->pkt_type != PACKET_HOST)) return TC_ACT_OK;
+
     // Must be meta-ethernet IPv6 frame
     if (skb->protocol != htons(ETH_P_IPV6)) return TC_ACT_OK;
 
@@ -66,6 +76,18 @@
     // Let the kernel's stack handle these cases and generate appropriate ICMP errors.
     if (ip6->hop_limit <= 1) return TC_ACT_OK;
 
+    // If hardware offload is running and programming flows based on conntrack entries,
+    // try not to interfere with it.
+    if (ip6->nexthdr == IPPROTO_TCP) {
+        struct tcphdr* tcph = (void*)(ip6 + 1);
+
+        // Make sure we can get at the tcp header
+        if (data + l2_header_size + sizeof(*ip6) + sizeof(*tcph) > data_end) return TC_ACT_OK;
+
+        // Do not offload TCP packets with any one of the SYN/FIN/RST flags
+        if (tcph->syn || tcph->fin || tcph->rst) return TC_ACT_OK;
+    }
+
     // Protect against forwarding packets sourced from ::1 or fe80::/64 or other weirdness.
     __be32 src32 = ip6->saddr.s6_addr32[0];
     if (src32 != htonl(0x0064ff9b) &&                        // 64:ff9b:/32 incl. XLAT464 WKP
@@ -195,13 +217,13 @@
 DEFINE_BPF_PROG("schedcls/tether_downstream6_ether", AID_ROOT, AID_NETWORK_STACK,
                 sched_cls_tether_downstream6_ether)
 (struct __sk_buff* skb) {
-    return do_forward(skb, /* is_ethernet */ true, /* downstream */ true);
+    return do_forward6(skb, /* is_ethernet */ true, /* downstream */ true);
 }
 
 DEFINE_BPF_PROG("schedcls/tether_upstream6_ether", AID_ROOT, AID_NETWORK_STACK,
                 sched_cls_tether_upstream6_ether)
 (struct __sk_buff* skb) {
-    return do_forward(skb, /* is_ethernet */ true, /* downstream */ false);
+    return do_forward6(skb, /* is_ethernet */ true, /* downstream */ false);
 }
 
 // Note: section names must be unique to prevent programs from appending to each other,
@@ -220,13 +242,13 @@
 DEFINE_BPF_PROG_KVER("schedcls/tether_downstream6_rawip$5_4", AID_ROOT, AID_NETWORK_STACK,
                      sched_cls_tether_downstream6_rawip_5_4, KVER(5, 4, 0))
 (struct __sk_buff* skb) {
-    return do_forward(skb, /* is_ethernet */ false, /* downstream */ true);
+    return do_forward6(skb, /* is_ethernet */ false, /* downstream */ true);
 }
 
 DEFINE_BPF_PROG_KVER("schedcls/tether_upstream6_rawip$5_4", AID_ROOT, AID_NETWORK_STACK,
                      sched_cls_tether_upstream6_rawip_5_4, KVER(5, 4, 0))
 (struct __sk_buff* skb) {
-    return do_forward(skb, /* is_ethernet */ false, /* downstream */ false);
+    return do_forward6(skb, /* is_ethernet */ false, /* downstream */ false);
 }
 
 // and these identical optional (may fail to load) implementations for [4.14..5.4) patched kernels:
@@ -235,7 +257,7 @@
                                     sched_cls_tether_downstream6_rawip_4_14,
                                     KVER(4, 14, 0), KVER(5, 4, 0))
 (struct __sk_buff* skb) {
-    return do_forward(skb, /* is_ethernet */ false, /* downstream */ true);
+    return do_forward6(skb, /* is_ethernet */ false, /* downstream */ true);
 }
 
 DEFINE_OPTIONAL_BPF_PROG_KVER_RANGE("schedcls/tether_upstream6_rawip$4_14",
@@ -243,7 +265,7 @@
                                     sched_cls_tether_upstream6_rawip_4_14,
                                     KVER(4, 14, 0), KVER(5, 4, 0))
 (struct __sk_buff* skb) {
-    return do_forward(skb, /* is_ethernet */ false, /* downstream */ false);
+    return do_forward6(skb, /* is_ethernet */ false, /* downstream */ false);
 }
 
 // and define no-op stubs for [4.9,4.14) and unpatched [4.14,5.4) kernels.
@@ -269,26 +291,197 @@
 DEFINE_BPF_MAP_GRW(tether_upstream4_map, HASH, TetherUpstream4Key, TetherUpstream4Value, 64,
                    AID_NETWORK_STACK)
 
-DEFINE_BPF_PROG("schedcls/tether_downstream4_ether", AID_ROOT, AID_NETWORK_STACK,
-                sched_cls_tether_downstream4_ether)
+static inline __always_inline int do_forward4(struct __sk_buff* skb, const bool is_ethernet,
+        const bool downstream) {
+    const int l2_header_size = is_ethernet ? sizeof(struct ethhdr) : 0;
+    void* data = (void*)(long)skb->data;
+    const void* data_end = (void*)(long)skb->data_end;
+    struct ethhdr* eth = is_ethernet ? data : NULL;  // used iff is_ethernet
+    struct iphdr* ip = is_ethernet ? (void*)(eth + 1) : data;
+
+    // Require ethernet dst mac address to be our unicast address.
+    if (is_ethernet && (skb->pkt_type != PACKET_HOST)) return TC_ACT_OK;
+
+    // Must be meta-ethernet IPv4 frame
+    if (skb->protocol != htons(ETH_P_IP)) return TC_ACT_OK;
+
+    // Must have (ethernet and) ipv4 header
+    if (data + l2_header_size + sizeof(*ip) > data_end) return TC_ACT_OK;
+
+    // Ethertype - if present - must be IPv4
+    if (is_ethernet && (eth->h_proto != htons(ETH_P_IP))) return TC_ACT_OK;
+
+    // IP version must be 4
+    if (ip->version != 4) return TC_ACT_OK;
+
+    // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+    if (ip->ihl != 5) return TC_ACT_OK;
+
+    // Calculate the IPv4 one's complement checksum of the IPv4 header.
+    __wsum sum4 = 0;
+    for (int i = 0; i < sizeof(*ip) / sizeof(__u16); ++i) {
+        sum4 += ((__u16*)ip)[i];
+    }
+    // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+    sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+    sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+    // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+    if (sum4 != 0xFFFF) return TC_ACT_OK;
+
+    // Minimum IPv4 total length is the size of the header
+    if (ntohs(ip->tot_len) < sizeof(*ip)) return TC_ACT_OK;
+
+    // We are incapable of dealing with IPv4 fragments
+    if (ip->frag_off & ~htons(IP_DF)) return TC_ACT_OK;
+
+    // Cannot decrement during forward if already zero or would be zero,
+    // Let the kernel's stack handle these cases and generate appropriate ICMP errors.
+    if (ip->ttl <= 1) return TC_ACT_OK;
+
+    const bool is_tcp = (ip->protocol == IPPROTO_TCP);
+
+    // We do not support anything besides TCP and UDP
+    if (!is_tcp && (ip->protocol != IPPROTO_UDP)) return TC_ACT_OK;
+
+    struct tcphdr* tcph = is_tcp ? (void*)(ip + 1) : NULL;
+    struct udphdr* udph = is_tcp ? NULL : (void*)(ip + 1);
+
+    if (is_tcp) {
+        // Make sure we can get at the tcp header
+        if (data + l2_header_size + sizeof(*ip) + sizeof(*tcph) > data_end) return TC_ACT_OK;
+
+        // If hardware offload is running and programming flows based on conntrack entries, try not
+        // to interfere with it, so do not offload TCP packets with any one of the SYN/FIN/RST flags
+        if (tcph->syn || tcph->fin || tcph->rst) return TC_ACT_OK;
+    } else { // UDP
+        // Make sure we can get at the udp header
+        if (data + l2_header_size + sizeof(*ip) + sizeof(*udph) > data_end) return TC_ACT_OK;
+    }
+
+    TetherDownstream4Key kd = {
+            .iif = skb->ifindex,
+            .l4Proto = ip->protocol,
+            .src4.s_addr = ip->saddr,
+            .dst4.s_addr = ip->daddr,
+            .srcPort = is_tcp ? tcph->source : udph->source,
+            .dstPort = is_tcp ? tcph->dest : udph->dest,
+    };
+    if (is_ethernet) for (int i = 0; i < ETH_ALEN; ++i) kd.dstMac[i] = eth->h_dest[i];
+
+    TetherUpstream4Key ku = {
+            .iif = skb->ifindex,
+            .l4Proto = ip->protocol,
+            .src4.s_addr = ip->saddr,
+            .dst4.s_addr = ip->daddr,
+            .srcPort = is_tcp ? tcph->source : udph->source,
+            .dstPort = is_tcp ? tcph->dest : udph->dest,
+    };
+    if (is_ethernet) for (int i = 0; i < ETH_ALEN; ++i) ku.dstMac[i] = eth->h_dest[i];
+
+    TetherDownstream4Value* vd = downstream ? bpf_tether_downstream4_map_lookup_elem(&kd) : NULL;
+    TetherUpstream4Value* vu = downstream ? NULL : bpf_tether_upstream4_map_lookup_elem(&ku);
+
+    // If we don't find any offload information then simply let the core stack handle it...
+    if (downstream && !vd) return TC_ACT_OK;
+    if (!downstream && !vu) return TC_ACT_OK;
+
+    uint32_t stat_and_limit_k = downstream ? skb->ifindex : vu->oif;
+
+    TetherStatsValue* stat_v = bpf_tether_stats_map_lookup_elem(&stat_and_limit_k);
+
+    // If we don't have anywhere to put stats, then abort...
+    if (!stat_v) return TC_ACT_OK;
+
+    uint64_t* limit_v = bpf_tether_limit_map_lookup_elem(&stat_and_limit_k);
+
+    // If we don't have a limit, then abort...
+    if (!limit_v) return TC_ACT_OK;
+
+    // Required IPv4 minimum mtu is 68, below that not clear what we should do, abort...
+    const int pmtu = downstream ? vd->pmtu : vu->pmtu;
+    if (pmtu < 68) return TC_ACT_OK;
+
+    // Approximate handling of TCP/IPv4 overhead for incoming LRO/GRO packets: default
+    // outbound path mtu of 1500 is not necessarily correct, but worst case we simply
+    // undercount, which is still better then not accounting for this overhead at all.
+    // Note: this really shouldn't be device/path mtu at all, but rather should be
+    // derived from this particular connection's mss (ie. from gro segment size).
+    // This would require a much newer kernel with newer ebpf accessors.
+    // (This is also blindly assuming 12 bytes of tcp timestamp option in tcp header)
+    uint64_t packets = 1;
+    uint64_t bytes = skb->len;
+    if (bytes > pmtu) {
+        const int tcp_overhead = sizeof(struct iphdr) + sizeof(struct tcphdr) + 12;
+        const int mss = pmtu - tcp_overhead;
+        const uint64_t payload = bytes - tcp_overhead;
+        packets = (payload + mss - 1) / mss;
+        bytes = tcp_overhead * packets + payload;
+    }
+
+    // Are we past the limit?  If so, then abort...
+    // Note: will not overflow since u64 is 936 years even at 5Gbps.
+    // Do not drop here.  Offload is just that, whenever we fail to handle
+    // a packet we let the core stack deal with things.
+    // (The core stack needs to handle limits correctly anyway,
+    // since we don't offload all traffic in both directions)
+    if (stat_v->rxBytes + stat_v->txBytes + bytes > *limit_v) return TC_ACT_OK;
+
+    // TODO: replace Errors with Packets once implemented
+    __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, packets);
+    __sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, bytes);
+
+    // TODO: not actually implemented yet
+    return TC_ACT_OK;
+}
+
+// Real implementations for 5.9+ kernels
+
+DEFINE_BPF_PROG_KVER("schedcls/tether_downstream4_ether$5_9", AID_ROOT, AID_NETWORK_STACK,
+                     sched_cls_tether_downstream4_ether_5_9, KVER(5, 9, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ true, /* downstream */ true);
+}
+
+DEFINE_BPF_PROG_KVER("schedcls/tether_downstream4_rawip$5_9", AID_ROOT, AID_NETWORK_STACK,
+                     sched_cls_tether_downstream4_rawip_5_9, KVER(5, 9, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ false, /* downstream */ true);
+}
+
+DEFINE_BPF_PROG_KVER("schedcls/tether_upstream4_ether$5_9", AID_ROOT, AID_NETWORK_STACK,
+                     sched_cls_tether_upstream4_ether_5_9, KVER(5, 9, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ true, /* downstream */ false);
+}
+
+DEFINE_BPF_PROG_KVER("schedcls/tether_upstream4_rawip$5_9", AID_ROOT, AID_NETWORK_STACK,
+                     sched_cls_tether_upstream4_rawip_5_9, KVER(5, 9, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ false, /* downstream */ false);
+}
+
+// Placeholder implementations for older pre-5.9 kernels
+
+DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_downstream4_ether$stub", AID_ROOT, AID_NETWORK_STACK,
+                           sched_cls_tether_downstream4_ether_stub, KVER_NONE, KVER(5, 9, 0))
 (struct __sk_buff* skb) {
     return TC_ACT_OK;
 }
 
-DEFINE_BPF_PROG("schedcls/tether_downstream4_rawip", AID_ROOT, AID_NETWORK_STACK,
-                sched_cls_tether_downstream4_rawip)
+DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_downstream4_rawip$stub", AID_ROOT, AID_NETWORK_STACK,
+                           sched_cls_tether_downstream4_rawip_stub, KVER_NONE, KVER(5, 9, 0))
 (struct __sk_buff* skb) {
     return TC_ACT_OK;
 }
 
-DEFINE_BPF_PROG("schedcls/tether_upstream4_ether", AID_ROOT, AID_NETWORK_STACK,
-                sched_cls_tether_upstream4_ether)
+DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_ether$stub", AID_ROOT, AID_NETWORK_STACK,
+                           sched_cls_tether_upstream4_ether_stub, KVER_NONE, KVER(5, 9, 0))
 (struct __sk_buff* skb) {
     return TC_ACT_OK;
 }
 
-DEFINE_BPF_PROG("schedcls/tether_upstream4_rawip", AID_ROOT, AID_NETWORK_STACK,
-                sched_cls_tether_upstream4_rawip)
+DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_rawip$stub", AID_ROOT, AID_NETWORK_STACK,
+                           sched_cls_tether_upstream4_rawip_stub, KVER_NONE, KVER(5, 9, 0))
 (struct __sk_buff* skb) {
     return TC_ACT_OK;
 }

diff --git a/framework/src/com/android/connectivity/aidl/INetworkAgent.aidl b/framework/src/com/android/connectivity/aidl/INetworkAgent.aidl
index 1af9e76..64b5567 100644
--- a/framework/src/com/android/connectivity/aidl/INetworkAgent.aidl
+++ b/framework/src/com/android/connectivity/aidl/INetworkAgent.aidl

@@ -16,6 +16,7 @@
 package com.android.connectivity.aidl;
 
 import android.net.NattKeepalivePacketData;
+import android.net.QosFilterParcelable;
 import android.net.TcpKeepalivePacketData;
 
 import com.android.connectivity.aidl.INetworkAgentRegistry;
@@ -43,4 +44,6 @@
     void onAddTcpKeepalivePacketFilter(int slot,
         in TcpKeepalivePacketData packetData);
     void onRemoveKeepalivePacketFilter(int slot);
+    void onQosFilterCallbackRegistered(int qosCallbackId, in QosFilterParcelable filterParcel);
+    void onQosCallbackUnregistered(int qosCallbackId);
 }

diff --git a/framework/src/com/android/connectivity/aidl/INetworkAgentRegistry.aidl b/framework/src/com/android/connectivity/aidl/INetworkAgentRegistry.aidl
index d42a340..f0193db 100644
--- a/framework/src/com/android/connectivity/aidl/INetworkAgentRegistry.aidl
+++ b/framework/src/com/android/connectivity/aidl/INetworkAgentRegistry.aidl

@@ -19,6 +19,8 @@
 import android.net.Network;
 import android.net.NetworkCapabilities;
 import android.net.NetworkInfo;
+import android.net.QosSession;
+import android.telephony.data.EpsBearerQosSessionAttributes;
 
 /**
  * Interface for NetworkAgents to send network network properties.
@@ -33,4 +35,7 @@
     void sendExplicitlySelected(boolean explicitlySelected, boolean acceptPartial);
     void sendSocketKeepaliveEvent(int slot, int reason);
     void sendUnderlyingNetworks(in @nullable List<Network> networks);
+    void sendEpsQosSessionAvailable(int callbackId, in QosSession session, in EpsBearerQosSessionAttributes attributes);
+    void sendQosSessionLost(int qosCallbackId, in QosSession session);
+    void sendQosCallbackError(int qosCallbackId, int exceptionType);
 }
commit	dc4189f0e4a55e29818991b4e937e5fe01a9be02	[log] [tgz]
author	Lorenzo Colitti <lorenzo@google.com>	Tue Jan 26 00:30:15 2021 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Tue Jan 26 00:30:15 2021 +0000
tree	5b7beb2ba270390fb26e24474e07be500e259c7f
parent	c2b014604595bdc6a0d443e6d48725425da7b099 [diff]
parent	dd833dee3102ace586f49c96ea7e8fa7249082f2 [diff]