offload - finish ipv4 tethering

Test: atest, TreeHugger
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Change-Id: I88f14938c7c4f66190a3afa2acd9104284e27927
diff --git a/Tethering/bpf_progs/offload.c b/Tethering/bpf_progs/offload.c
index 2997031..8a2e6bc 100644
--- a/Tethering/bpf_progs/offload.c
+++ b/Tethering/bpf_progs/offload.c
@@ -31,6 +31,44 @@
 // From kernel:include/net/ip.h
 #define IP_DF 0x4000  // Flag: "Don't Fragment"
 
+// ----- Helper functions for offsets to fields -----
+
+// They all assume simple IP packets:
+//   - no VLAN ethernet tags
+//   - no IPv4 options (see IPV4_HLEN/TCP4_OFFSET/UDP4_OFFSET)
+//   - no IPv6 extension headers
+//   - no TCP options (see TCP_HLEN)
+
+//#define ETH_HLEN sizeof(struct ethhdr)
+#define IP4_HLEN sizeof(struct iphdr)
+#define IP6_HLEN sizeof(struct ipv6hdr)
+#define TCP_HLEN sizeof(struct tcphdr)
+#define UDP_HLEN sizeof(struct udphdr)
+
+// Offsets from beginning of L4 (TCP/UDP) header
+#define TCP_OFFSET(field) offsetof(struct tcphdr, field)
+#define UDP_OFFSET(field) offsetof(struct udphdr, field)
+
+// Offsets from beginning of L3 (IPv4) header
+#define IP4_OFFSET(field) offsetof(struct iphdr, field)
+#define IP4_TCP_OFFSET(field) (IP4_HLEN + TCP_OFFSET(field))
+#define IP4_UDP_OFFSET(field) (IP4_HLEN + UDP_OFFSET(field))
+
+// Offsets from beginning of L3 (IPv6) header
+#define IP6_OFFSET(field) offsetof(struct ipv6hdr, field)
+#define IP6_TCP_OFFSET(field) (IP6_HLEN + TCP_OFFSET(field))
+#define IP6_UDP_OFFSET(field) (IP6_HLEN + UDP_OFFSET(field))
+
+// Offsets from beginning of L2 (ie. Ethernet) header (which must be present)
+#define ETH_IP4_OFFSET(field) (ETH_HLEN + IP4_OFFSET(field))
+#define ETH_IP4_TCP_OFFSET(field) (ETH_HLEN + IP4_TCP_OFFSET(field))
+#define ETH_IP4_UDP_OFFSET(field) (ETH_HLEN + IP4_UDP_OFFSET(field))
+#define ETH_IP6_OFFSET(field) (ETH_HLEN + IP6_OFFSET(field))
+#define ETH_IP6_TCP_OFFSET(field) (ETH_HLEN + IP6_TCP_OFFSET(field))
+#define ETH_IP6_UDP_OFFSET(field) (ETH_HLEN + IP6_UDP_OFFSET(field))
+
+// ----- Tethering stats and data limits -----
+
 // Tethering stats, indexed by upstream interface.
 DEFINE_BPF_MAP_GRW(tether_stats_map, HASH, TetherStatsKey, TetherStatsValue, 16, AID_NETWORK_STACK)
 
@@ -410,12 +448,76 @@
     // since we don't offload all traffic in both directions)
     if (stat_v->rxBytes + stat_v->txBytes + bytes > *limit_v) return TC_ACT_OK;
 
-    // TODO: replace Errors with Packets once implemented
-    __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, packets);
+
+if (!is_tcp) return TC_ACT_OK; // HACK
+
+    if (!is_ethernet) {
+        // Try to inject an ethernet header, and simply return if we fail.
+        // We do this even if TX interface is RAWIP and thus does not need an ethernet header,
+        // because this is easier and the kernel will strip extraneous ethernet header.
+        if (bpf_skb_change_head(skb, sizeof(struct ethhdr), /*flags*/ 0)) {
+            __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
+            return TC_ACT_OK;
+        }
+
+        // bpf_skb_change_head() invalidates all pointers - reload them
+        data = (void*)(long)skb->data;
+        data_end = (void*)(long)skb->data_end;
+        eth = data;
+        ip = (void*)(eth + 1);
+        tcph = is_tcp ? (void*)(ip + 1) : NULL;
+        udph = is_tcp ? NULL : (void*)(ip + 1);
+
+        // I do not believe this can ever happen, but keep the verifier happy...
+        if (data + sizeof(struct ethhdr) + sizeof(*ip) + (is_tcp ? sizeof(*tcph) : sizeof(*udph)) > data_end) {
+            __sync_fetch_and_add(downstream ? &stat_v->rxErrors : &stat_v->txErrors, 1);
+            return TC_ACT_SHOT;
+        }
+    };
+
+    // At this point we always have an ethernet header - which will get stripped by the
+    // kernel during transmit through a rawip interface.  ie. 'eth' pointer is valid.
+    // Additionally note that 'is_ethernet' and 'l2_header_size' are no longer correct.
+
+    // Overwrite any mac header with the new one
+    // For a rawip tx interface it will simply be a bunch of zeroes and later stripped.
+    *eth = v->macHeader;
+
+    const int sz4 = sizeof(__be32);
+    const __be32 old_daddr = k.dst4.s_addr;
+    const __be32 old_saddr = k.src4.s_addr;
+    const __be32 new_daddr = v->dst46.s6_addr32[3];
+    const __be32 new_saddr = v->src46.s6_addr32[3];
+
+    bpf_l4_csum_replace(skb, ETH_IP4_TCP_OFFSET(check), old_daddr, new_daddr, sz4 | BPF_F_PSEUDO_HDR);
+    bpf_l3_csum_replace(skb, ETH_IP4_OFFSET(check), old_daddr, new_daddr, sz4);
+    bpf_skb_store_bytes(skb, ETH_IP4_OFFSET(daddr), &new_daddr, sz4, 0);
+
+    bpf_l4_csum_replace(skb, ETH_IP4_TCP_OFFSET(check), old_saddr, new_saddr, sz4 | BPF_F_PSEUDO_HDR);
+    bpf_l3_csum_replace(skb, ETH_IP4_OFFSET(check), old_saddr, new_saddr, sz4);
+    bpf_skb_store_bytes(skb, ETH_IP4_OFFSET(saddr), &new_saddr, sz4, 0);
+
+    const int sz2 = sizeof(__be16);
+    bpf_l4_csum_replace(skb, ETH_IP4_TCP_OFFSET(check), k.srcPort, v->srcPort, sz2);
+    bpf_skb_store_bytes(skb, ETH_IP4_TCP_OFFSET(source), &v->srcPort, sz2, 0);
+
+    bpf_l4_csum_replace(skb, ETH_IP4_TCP_OFFSET(check), k.dstPort, v->dstPort, sz2);
+    bpf_skb_store_bytes(skb, ETH_IP4_TCP_OFFSET(dest), &v->dstPort, sz2, 0);
+
+// TTL dec
+
+// v->last_used = bpf_ktime_get_boot_ns();
+
+    __sync_fetch_and_add(downstream ? &stat_v->rxPackets : &stat_v->txPackets, packets);
     __sync_fetch_and_add(downstream ? &stat_v->rxBytes : &stat_v->txBytes, bytes);
 
-    // TODO: not actually implemented yet
-    return TC_ACT_OK;
+    // Redirect to forwarded interface.
+    //
+    // Note that bpf_redirect() cannot fail unless you pass invalid flags.
+    // The redirect actually happens after the ebpf program has already terminated,
+    // and can fail for example for mtu reasons at that point in time, but there's nothing
+    // we can do about it here.
+    return bpf_redirect(v->oif, 0 /* this is effectively BPF_F_EGRESS */);
 }
 
 // Real implementations for 5.9+ kernels