Tyler Wear | 7238821 | 2021-09-09 14:49:02 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2021 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include <linux/types.h> |
| 18 | #include <linux/bpf.h> |
| 19 | #include <linux/ip.h> |
| 20 | #include <linux/ipv6.h> |
| 21 | #include <linux/if_ether.h> |
| 22 | #include <linux/pkt_cls.h> |
| 23 | #include <linux/tcp.h> |
| 24 | #include <stdint.h> |
| 25 | #include <netinet/in.h> |
| 26 | #include <netinet/udp.h> |
| 27 | #include <string.h> |
| 28 | |
| 29 | #include "bpf_helpers.h" |
| 30 | |
| 31 | #define MAX_POLICIES 16 |
| 32 | #define MAP_A 1 |
| 33 | #define MAP_B 2 |
| 34 | |
| 35 | #define STRUCT_SIZE(name, size) _Static_assert(sizeof(name) == (size), "Incorrect struct size.") |
| 36 | |
| 37 | // TODO: these are already defined in /system/netd/bpf_progs/bpf_net_helpers.h |
| 38 | // should they be moved to common location? |
| 39 | static uint64_t (*bpf_get_socket_cookie)(struct __sk_buff* skb) = |
| 40 | (void*)BPF_FUNC_get_socket_cookie; |
| 41 | static int (*bpf_skb_store_bytes)(struct __sk_buff* skb, __u32 offset, const void* from, __u32 len, |
| 42 | __u64 flags) = (void*)BPF_FUNC_skb_store_bytes; |
| 43 | static int (*bpf_l3_csum_replace)(struct __sk_buff* skb, __u32 offset, __u64 from, __u64 to, |
| 44 | __u64 flags) = (void*)BPF_FUNC_l3_csum_replace; |
| 45 | |
| 46 | typedef struct { |
| 47 | // Add family here to match __sk_buff ? |
| 48 | struct in_addr srcIp; |
| 49 | struct in_addr dstIp; |
| 50 | __be16 srcPort; |
| 51 | __be16 dstPort; |
| 52 | uint8_t proto; |
| 53 | uint8_t dscpVal; |
| 54 | uint8_t pad[2]; |
| 55 | } Ipv4RuleEntry; |
| 56 | STRUCT_SIZE(Ipv4RuleEntry, 2 * 4 + 2 * 2 + 2 * 1 + 2); // 16, 4 for in_addr |
| 57 | |
| 58 | #define SRC_IP_MASK 1 |
| 59 | #define DST_IP_MASK 2 |
| 60 | #define SRC_PORT_MASK 4 |
| 61 | #define DST_PORT_MASK 8 |
| 62 | #define PROTO_MASK 16 |
| 63 | |
| 64 | typedef struct { |
| 65 | struct in6_addr srcIp; |
| 66 | struct in6_addr dstIp; |
| 67 | __be16 srcPort; |
| 68 | __be16 dstPortStart; |
| 69 | __be16 dstPortEnd; |
| 70 | uint8_t proto; |
| 71 | uint8_t dscpVal; |
| 72 | uint8_t mask; |
| 73 | uint8_t pad[3]; |
| 74 | } Ipv4Policy; |
| 75 | STRUCT_SIZE(Ipv4Policy, 2 * 16 + 3 * 2 + 3 * 1 + 3); // 44 |
| 76 | |
| 77 | typedef struct { |
| 78 | struct in6_addr srcIp; |
| 79 | struct in6_addr dstIp; |
| 80 | __be16 srcPort; |
| 81 | __be16 dstPortStart; |
| 82 | __be16 dstPortEnd; |
| 83 | uint8_t proto; |
| 84 | uint8_t dscpVal; |
| 85 | uint8_t mask; |
| 86 | // should we override this struct to include the param bitmask for linear search? |
| 87 | // For mapping socket to policies, all the params should match exactly since we can |
| 88 | // pull any missing from the sock itself. |
| 89 | } Ipv6RuleEntry; |
| 90 | STRUCT_SIZE(Ipv6RuleEntry, 2 * 16 + 3 * 2 + 3 * 1 + 3); // 44 |
| 91 | |
| 92 | // TODO: move to using 1 map. Map v4 address to 0xffff::v4 |
| 93 | DEFINE_BPF_MAP_GRW(ipv4_socket_to_policies_map_A, HASH, uint64_t, Ipv4RuleEntry, MAX_POLICIES, |
| 94 | AID_SYSTEM) |
| 95 | DEFINE_BPF_MAP_GRW(ipv4_socket_to_policies_map_B, HASH, uint64_t, Ipv4RuleEntry, MAX_POLICIES, |
| 96 | AID_SYSTEM) |
| 97 | DEFINE_BPF_MAP_GRW(ipv6_socket_to_policies_map_A, HASH, uint64_t, Ipv6RuleEntry, MAX_POLICIES, |
| 98 | AID_SYSTEM) |
| 99 | DEFINE_BPF_MAP_GRW(ipv6_socket_to_policies_map_B, HASH, uint64_t, Ipv6RuleEntry, MAX_POLICIES, |
| 100 | AID_SYSTEM) |
| 101 | DEFINE_BPF_MAP_GRW(switch_comp_map, ARRAY, int, uint64_t, 1, AID_SYSTEM) |
| 102 | |
| 103 | DEFINE_BPF_MAP_GRW(ipv4_dscp_policies_map, ARRAY, uint32_t, Ipv4Policy, MAX_POLICIES, |
| 104 | AID_SYSTEM) |
| 105 | DEFINE_BPF_MAP_GRW(ipv6_dscp_policies_map, ARRAY, uint32_t, Ipv6RuleEntry, MAX_POLICIES, |
| 106 | AID_SYSTEM) |
| 107 | |
| 108 | DEFINE_BPF_PROG_KVER("schedcls/set_dscp", AID_ROOT, AID_SYSTEM, |
| 109 | schedcls_set_dscp, KVER(5, 4, 0)) |
| 110 | (struct __sk_buff* skb) { |
| 111 | int one = 0; |
| 112 | uint64_t* selectedMap = bpf_switch_comp_map_lookup_elem(&one); |
| 113 | |
| 114 | // use this with HASH map so map lookup only happens once policies have been added? |
| 115 | if (!selectedMap) { |
| 116 | return TC_ACT_PIPE; |
| 117 | } |
| 118 | |
| 119 | // used for map lookup |
| 120 | uint64_t cookie = bpf_get_socket_cookie(skb); |
| 121 | |
| 122 | // Do we need separate maps for ipv4/ipv6 |
| 123 | if (skb->protocol == htons(ETH_P_IP)) { //maybe bpf_htons() |
| 124 | Ipv4RuleEntry* v4Policy; |
| 125 | if (*selectedMap == MAP_A) { |
| 126 | v4Policy = bpf_ipv4_socket_to_policies_map_A_lookup_elem(&cookie); |
| 127 | } else { |
| 128 | v4Policy = bpf_ipv4_socket_to_policies_map_B_lookup_elem(&cookie); |
| 129 | } |
| 130 | |
| 131 | // How to use bitmask here to compare params efficiently? |
| 132 | // TODO: add BPF_PROG_TYPE_SK_SKB prog type to Loader? |
| 133 | |
| 134 | void* data = (void*)(long)skb->data; |
| 135 | const void* data_end = (void*)(long)skb->data_end; |
| 136 | const struct iphdr* const iph = data; |
| 137 | |
| 138 | // Must have ipv4 header |
| 139 | if (data + sizeof(*iph) > data_end) return TC_ACT_PIPE; |
| 140 | |
| 141 | // IP version must be 4 |
| 142 | if (iph->version != 4) return TC_ACT_PIPE; |
| 143 | |
| 144 | // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header |
| 145 | if (iph->ihl != 5) return TC_ACT_PIPE; |
| 146 | |
| 147 | if (iph->protocol != IPPROTO_UDP) return TC_ACT_PIPE; |
| 148 | |
| 149 | struct udphdr *udp; |
| 150 | udp = data + sizeof(struct iphdr); //sizeof(struct ethhdr) |
| 151 | |
| 152 | if ((void*)(udp + 1) > data_end) return TC_ACT_PIPE; |
| 153 | |
| 154 | // Source/destination port in udphdr are stored in be16, need to convert to le16. |
| 155 | // This can be done via ntohs or htons. Is there a more preferred way? |
| 156 | // Cached policy was found. |
| 157 | if (v4Policy && iph->saddr == v4Policy->srcIp.s_addr && |
| 158 | iph->daddr == v4Policy->dstIp.s_addr && |
| 159 | ntohs(udp->source) == v4Policy->srcPort && |
| 160 | ntohs(udp->dest) == v4Policy->dstPort && |
| 161 | iph->protocol == v4Policy->proto) { |
| 162 | // set dscpVal in packet. Least sig 2 bits of TOS |
| 163 | // reference ipv4_change_dsfield() |
| 164 | |
| 165 | // TODO: fix checksum... |
| 166 | int ecn = iph->tos & 3; |
| 167 | uint8_t newDscpVal = (v4Policy->dscpVal << 2) + ecn; |
| 168 | int oldDscpVal = iph->tos >> 2; |
| 169 | bpf_l3_csum_replace(skb, 1, oldDscpVal, newDscpVal, sizeof(uint8_t)); |
| 170 | bpf_skb_store_bytes(skb, 1, &newDscpVal, sizeof(uint8_t), 0); |
| 171 | return TC_ACT_PIPE; |
| 172 | } |
| 173 | |
| 174 | // linear scan ipv4_dscp_policies_map, stored socket params do not match actual |
| 175 | int bestScore = -1; |
| 176 | uint32_t bestMatch = 0; |
| 177 | |
| 178 | for (register uint64_t i = 0; i < MAX_POLICIES; i++) { |
| 179 | int score = 0; |
| 180 | uint8_t tempMask = 0; |
| 181 | // Using a uint62 in for loop prevents infinite loop during BPF load, |
| 182 | // but the key is uint32, so convert back. |
| 183 | uint32_t key = i; |
| 184 | Ipv4Policy* policy = bpf_ipv4_dscp_policies_map_lookup_elem(&key); |
| 185 | |
| 186 | // if mask is 0 continue, key does not have corresponding policy value |
| 187 | if (policy && policy->mask != 0) { |
| 188 | if ((policy->mask & SRC_IP_MASK) == SRC_IP_MASK && |
| 189 | iph->saddr == policy->srcIp.s6_addr32[3]) { |
| 190 | score++; |
| 191 | tempMask |= SRC_IP_MASK; |
| 192 | } |
| 193 | if ((policy->mask & DST_IP_MASK) == DST_IP_MASK && |
| 194 | iph->daddr == policy->dstIp.s6_addr32[3]) { |
| 195 | score++; |
| 196 | tempMask |= DST_IP_MASK; |
| 197 | } |
| 198 | if ((policy->mask & SRC_PORT_MASK) == SRC_PORT_MASK && |
| 199 | ntohs(udp->source) == htons(policy->srcPort)) { |
| 200 | score++; |
| 201 | tempMask |= SRC_PORT_MASK; |
| 202 | } |
| 203 | if ((policy->mask & DST_PORT_MASK) == DST_PORT_MASK && |
| 204 | ntohs(udp->dest) >= htons(policy->dstPortStart) && |
| 205 | ntohs(udp->dest) <= htons(policy->dstPortEnd)) { |
| 206 | score++; |
| 207 | tempMask |= DST_PORT_MASK; |
| 208 | } |
| 209 | if ((policy->mask & PROTO_MASK) == PROTO_MASK && |
| 210 | iph->protocol == policy->proto) { |
| 211 | score++; |
| 212 | tempMask |= PROTO_MASK; |
| 213 | } |
| 214 | |
| 215 | if (score > bestScore && tempMask == policy->mask) { |
| 216 | bestMatch = i; |
| 217 | bestScore = score; |
| 218 | } |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | uint8_t newDscpVal = 0; // Can 0 be used as default forwarding value? |
| 223 | uint8_t curDscp = iph->tos & 252; |
| 224 | if (bestScore > 0) { |
| 225 | Ipv4Policy* policy = bpf_ipv4_dscp_policies_map_lookup_elem(&bestMatch); |
| 226 | if (policy) { |
| 227 | // TODO: if DSCP value is already set ignore? |
| 228 | // TODO: update checksum, for testing increment counter... |
| 229 | int ecn = iph->tos & 3; |
| 230 | newDscpVal = (policy->dscpVal << 2) + ecn; |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | Ipv4RuleEntry value = { |
| 235 | .srcIp.s_addr = iph->saddr, |
| 236 | .dstIp.s_addr = iph->daddr, |
| 237 | .srcPort = udp->source, |
| 238 | .dstPort = udp->dest, |
| 239 | .proto = iph->protocol, |
| 240 | .dscpVal = newDscpVal, |
| 241 | }; |
| 242 | |
| 243 | if (!cookie) |
| 244 | return TC_ACT_PIPE; |
| 245 | |
| 246 | // Update map |
| 247 | if (*selectedMap == MAP_A) { |
| 248 | bpf_ipv4_socket_to_policies_map_A_update_elem(&cookie, &value, BPF_ANY); |
| 249 | } else { |
| 250 | bpf_ipv4_socket_to_policies_map_B_update_elem(&cookie, &value, BPF_ANY); |
| 251 | } |
| 252 | |
| 253 | // Need to store bytes after updating map or program will not load. |
| 254 | if (newDscpVal != curDscp) { |
| 255 | // 1 is the offset (Version/Header length) |
| 256 | int oldDscpVal = iph->tos >> 2; |
| 257 | bpf_l3_csum_replace(skb, 1, oldDscpVal, newDscpVal, sizeof(uint8_t)); |
| 258 | bpf_skb_store_bytes(skb, 1, &newDscpVal, sizeof(uint8_t), 0); |
| 259 | } |
| 260 | |
| 261 | } else if (skb->protocol == htons(ETH_P_IPV6)) { //maybe bpf_htons() |
| 262 | Ipv6RuleEntry* v6Policy; |
| 263 | if (*selectedMap == MAP_A) { |
| 264 | v6Policy = bpf_ipv6_socket_to_policies_map_A_lookup_elem(&cookie); |
| 265 | } else { |
| 266 | v6Policy = bpf_ipv6_socket_to_policies_map_B_lookup_elem(&cookie); |
| 267 | } |
| 268 | |
| 269 | if (!v6Policy) |
| 270 | return TC_ACT_PIPE; |
| 271 | |
| 272 | // TODO: Add code to process IPv6 packet. |
| 273 | } |
| 274 | |
| 275 | // Always return TC_ACT_PIPE |
| 276 | return TC_ACT_PIPE; |
| 277 | } |
| 278 | |
| 279 | LICENSE("Apache 2.0"); |
| 280 | CRITICAL("Connectivity"); |