offload ebpf - introduce 4.14+ ipv4 forwarding programs

We've backported the necessary support to all 4.14+ ACK kernels,
but we can't actually enforce that these changes will be picked
up by all devices.  Thus we can only make the full featured
implementations optional on [4.14..5.8) kernels, with a tcp-only
version for those 4.14+ devices where the full featured version
fails to load.

Note: there's still a fair bit of implementation work left
in the do_forward4() function itself.  This is really just
the skeleton.

Test: atest, TreeHugger
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Change-Id: If78123e00d55a77f2ecd7da1547581797e23f9b2
diff --git a/Tethering/bpf_progs/bpf_tethering.h b/Tethering/bpf_progs/bpf_tethering.h
index c8ada88..10e6bb0 100644
--- a/Tethering/bpf_progs/bpf_tethering.h
+++ b/Tethering/bpf_progs/bpf_tethering.h
@@ -40,6 +40,7 @@
     ERR(IS_IP_FRAG)          \
     ERR(CHECKSUM)            \
     ERR(NON_TCP_UDP)         \
+    ERR(NON_TCP)             \
     ERR(SHORT_TCP_HEADER)    \
     ERR(SHORT_UDP_HEADER)    \
     ERR(TRUNCATED_IPV4)      \
diff --git a/Tethering/bpf_progs/offload.c b/Tethering/bpf_progs/offload.c
index b5a34db..d4b5246 100644
--- a/Tethering/bpf_progs/offload.c
+++ b/Tethering/bpf_progs/offload.c
@@ -387,10 +387,21 @@
     // Let the kernel's stack handle these cases and generate appropriate ICMP errors.
     if (ip->ttl <= 1) PUNT(LOW_TTL);
 
-    const bool is_tcp = (ip->protocol == IPPROTO_TCP);
+    // If we cannot update the 'last_used' field due to lack of bpf_ktime_get_boot_ns() helper,
+    // then it is not safe to offload UDP due to the small conntrack timeouts, as such,
+    // in such a situation we can only support TCP.  This also has the added nice benefit of
+    // using a separate error counter, and thus making it obvious which version of the program
+    // is loaded.
+    if (!updatetime && ip->protocol != IPPROTO_TCP) PUNT(NON_TCP);
 
-    // We do not support anything besides TCP and UDP
-    if (!is_tcp && (ip->protocol != IPPROTO_UDP)) PUNT(NON_TCP_UDP);
+    // We do not support offloading anything besides IPv4 TCP and UDP, due to need for NAT,
+    // but no need to check this if !updatetime due to check immediately above.
+    if (updatetime && (ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP))
+        PUNT(NON_TCP_UDP);
+
+    // We want to make sure that the compiler will, in the !updatetime case, entirely optimize
+    // out all the non-tcp logic.  Also note that at this point is_udp === !is_tcp.
+    const bool is_tcp = !updatetime || (ip->protocol == IPPROTO_TCP);
 
     struct tcphdr* tcph = is_tcp ? (void*)(ip + 1) : NULL;
     struct udphdr* udph = is_tcp ? NULL : (void*)(ip + 1);
@@ -463,8 +474,7 @@
     // since we don't offload all traffic in both directions)
     if (stat_v->rxBytes + stat_v->txBytes + bytes > *limit_v) PUNT(LIMIT_REACHED);
 
-
-if (!is_tcp) return TC_ACT_OK; // HACK
+    if (!is_tcp) PUNT(NON_TCP);  // TEMP HACK: will remove once UDP is supported.
 
     if (!is_ethernet) {
         // Try to inject an ethernet header, and simply return if we fail.
@@ -519,8 +529,10 @@
     bpf_l4_csum_replace(skb, ETH_IP4_TCP_OFFSET(check), k.dstPort, v->dstPort, sz2);
     bpf_skb_store_bytes(skb, ETH_IP4_TCP_OFFSET(dest), &v->dstPort, sz2, 0);
 
-// TTL dec
+    // TEMP HACK: lack of TTL decrement
 
+    // This requires the bpf_ktime_get_boot_ns() helper which was added in 5.8,
+    // and backported to all Android Common Kernel 4.14+ trees.
     if (updatetime) v->last_used = bpf_ktime_get_boot_ns();
 
     __sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets);
@@ -535,7 +547,7 @@
     return bpf_redirect(v->oif, 0 /* this is effectively BPF_F_EGRESS */);
 }
 
-// Real implementations for 5.8+ kernels
+// Full featured (required) implementations for 5.8+ kernels
 
 DEFINE_BPF_PROG_KVER("schedcls/tether_downstream4_ether$5_8", AID_ROOT, AID_NETWORK_STACK,
                      sched_cls_tether_downstream4_ether_5_8, KVER(5, 8, 0))
@@ -561,28 +573,97 @@
     return do_forward4(skb, /* is_ethernet */ false, /* downstream */ false, /* updatetime */ true);
 }
 
-// Placeholder implementations for older pre-5.8 kernels
+// Full featured (optional) implementations for [4.14..5.8) kernels
+
+DEFINE_OPTIONAL_BPF_PROG_KVER_RANGE("schedcls/tether_downstream4_ether$opt",
+                                    AID_ROOT, AID_NETWORK_STACK,
+                                    sched_cls_tether_downstream4_ether_opt,
+                                    KVER(4, 14, 0), KVER(5, 8, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ true, /* downstream */ true, /* updatetime */ true);
+}
+
+DEFINE_OPTIONAL_BPF_PROG_KVER_RANGE("schedcls/tether_downstream4_rawip$opt",
+                                    AID_ROOT, AID_NETWORK_STACK,
+                                    sched_cls_tether_downstream4_rawip_opt,
+                                    KVER(4, 14, 0), KVER(5, 8, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ false, /* downstream */ true, /* updatetime */ true);
+}
+
+DEFINE_OPTIONAL_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_ether$opt",
+                                    AID_ROOT, AID_NETWORK_STACK,
+                                    sched_cls_tether_upstream4_ether_opt,
+                                    KVER(4, 14, 0), KVER(5, 8, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ true, /* downstream */ false, /* updatetime */ true);
+}
+
+DEFINE_OPTIONAL_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_rawip$opt",
+                                    AID_ROOT, AID_NETWORK_STACK,
+                                    sched_cls_tether_upstream4_rawip_opt,
+                                    KVER(4, 14, 0), KVER(5, 8, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ false, /* downstream */ false, /* updatetime */ true);
+}
+
+// Partial (TCP-only: will not update 'last_used' field) implementations for 4.14+ kernels.
+// These will be loaded only if the above optional ones failed (loading of *these* must succeed).
+//
+// [Note: as a result TCP connections will not have their conntrack timeout refreshed, however,
+// since /proc/sys/net/netfilter/nf_conntrack_tcp_timeout_established defaults to 432000 (seconds),
+// this in practice means they'll break only after 5 days.  This seems an acceptable trade-off.
+//
+// Additionally kernel/tests change "net-test: add bpf_ktime_get_ns / bpf_ktime_get_boot_ns tests"
+// which enforces and documents the required kernel cherrypicks will make it pretty unlikely that
+// many devices upgrading to S will end up relying on these fallback programs.
+
+DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_downstream4_ether$4_14", AID_ROOT, AID_NETWORK_STACK,
+                           sched_cls_tether_downstream4_ether_4_14, KVER(4, 14, 0), KVER(5, 8, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ true, /* downstream */ true, /* updatetime */ false);
+}
+
+DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_downstream4_rawip$4_14", AID_ROOT, AID_NETWORK_STACK,
+                           sched_cls_tether_downstream4_rawip_4_14, KVER(4, 14, 0), KVER(5, 8, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ false, /* downstream */ true, /* updatetime */ false);
+}
+
+DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_ether$4_14", AID_ROOT, AID_NETWORK_STACK,
+                           sched_cls_tether_upstream4_ether_4_14, KVER(4, 14, 0), KVER(5, 8, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ true, /* downstream */ false, /* updatetime */ false);
+}
+
+DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_rawip$4_14", AID_ROOT, AID_NETWORK_STACK,
+                           sched_cls_tether_upstream4_rawip_4_14, KVER(4, 14, 0), KVER(5, 8, 0))
+(struct __sk_buff* skb) {
+    return do_forward4(skb, /* is_ethernet */ false, /* downstream */ false, /* updatetime */ false);
+}
+
+// Placeholder (no-op) implementations for older pre-4.14 kernels
 
 DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_downstream4_ether$stub", AID_ROOT, AID_NETWORK_STACK,
-                           sched_cls_tether_downstream4_ether_stub, KVER_NONE, KVER(5, 8, 0))
+                           sched_cls_tether_downstream4_ether_stub, KVER_NONE, KVER(4, 14, 0))
 (struct __sk_buff* skb) {
     return TC_ACT_OK;
 }
 
 DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_downstream4_rawip$stub", AID_ROOT, AID_NETWORK_STACK,
-                           sched_cls_tether_downstream4_rawip_stub, KVER_NONE, KVER(5, 8, 0))
+                           sched_cls_tether_downstream4_rawip_stub, KVER_NONE, KVER(4, 14, 0))
 (struct __sk_buff* skb) {
     return TC_ACT_OK;
 }
 
 DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_ether$stub", AID_ROOT, AID_NETWORK_STACK,
-                           sched_cls_tether_upstream4_ether_stub, KVER_NONE, KVER(5, 8, 0))
+                           sched_cls_tether_upstream4_ether_stub, KVER_NONE, KVER(4, 14, 0))
 (struct __sk_buff* skb) {
     return TC_ACT_OK;
 }
 
 DEFINE_BPF_PROG_KVER_RANGE("schedcls/tether_upstream4_rawip$stub", AID_ROOT, AID_NETWORK_STACK,
-                           sched_cls_tether_upstream4_rawip_stub, KVER_NONE, KVER(5, 8, 0))
+                           sched_cls_tether_upstream4_rawip_stub, KVER_NONE, KVER(4, 14, 0))
 (struct __sk_buff* skb) {
     return TC_ACT_OK;
 }