1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <linux/bpf.h>
18 #include <linux/if_ether.h>
19 #include <linux/if_packet.h>
20 #include <linux/ip.h>
21 #include <linux/ipv6.h>
22 #include <linux/pkt_cls.h>
23 #include <linux/tcp.h>
24 #include <linux/types.h>
25 #include <netinet/in.h>
26 #include <netinet/udp.h>
27 #include <stdint.h>
28 #include <string.h>
29 
30 // The resulting .o needs to load on Android T+
31 #define BPFLOADER_MIN_VER BPFLOADER_T_VERSION
32 
33 #include "bpf_helpers.h"
34 #include "dscpPolicy.h"
35 
36 #define ECN_MASK 3
37 #define IP4_OFFSET(field, header) ((header) + offsetof(struct iphdr, field))
38 #define UPDATE_TOS(dscp, tos) ((dscp) << 2) | ((tos) & ECN_MASK)
39 
DEFINE_BPF_MAP_GRW(socket_policy_cache_map,HASH,uint64_t,RuleEntry,CACHE_MAP_SIZE,AID_SYSTEM)40 DEFINE_BPF_MAP_GRW(socket_policy_cache_map, HASH, uint64_t, RuleEntry, CACHE_MAP_SIZE, AID_SYSTEM)
41 
42 DEFINE_BPF_MAP_GRW(ipv4_dscp_policies_map, ARRAY, uint32_t, DscpPolicy, MAX_POLICIES, AID_SYSTEM)
43 DEFINE_BPF_MAP_GRW(ipv6_dscp_policies_map, ARRAY, uint32_t, DscpPolicy, MAX_POLICIES, AID_SYSTEM)
44 
45 static inline __always_inline void match_policy(struct __sk_buff* skb, bool ipv4) {
46     void* data = (void*)(long)skb->data;
47     const void* data_end = (void*)(long)skb->data_end;
48 
49     const int l2_header_size = sizeof(struct ethhdr);
50     struct ethhdr* eth = data;
51 
52     if (data + l2_header_size > data_end) return;
53 
54     int hdr_size = 0;
55 
56     // used for map lookup
57     uint64_t cookie = bpf_get_socket_cookie(skb);
58     if (!cookie) return;
59 
60     __be16 sport = 0;
61     uint16_t dport = 0;
62     uint8_t protocol = 0;  // TODO: Use are reserved value? Or int (-1) and cast to uint below?
63     struct in6_addr src_ip = {};
64     struct in6_addr dst_ip = {};
65     uint8_t tos = 0;            // Only used for IPv4
66     __be32 old_first_be32 = 0;  // Only used for IPv6
67     if (ipv4) {
68         const struct iphdr* const iph = (void*)(eth + 1);
69         hdr_size = l2_header_size + sizeof(struct iphdr);
70         // Must have ipv4 header
71         if (data + hdr_size > data_end) return;
72 
73         // IP version must be 4
74         if (iph->version != 4) return;
75 
76         // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
77         if (iph->ihl != 5) return;
78 
79         // V4 mapped address in in6_addr sets 10/11 position to 0xff.
80         src_ip.s6_addr32[2] = htonl(0x0000ffff);
81         dst_ip.s6_addr32[2] = htonl(0x0000ffff);
82 
83         // Copy IPv4 address into in6_addr for easy comparison below.
84         src_ip.s6_addr32[3] = iph->saddr;
85         dst_ip.s6_addr32[3] = iph->daddr;
86         protocol = iph->protocol;
87         tos = iph->tos;
88     } else {
89         struct ipv6hdr* ip6h = (void*)(eth + 1);
90         hdr_size = l2_header_size + sizeof(struct ipv6hdr);
91         // Must have ipv6 header
92         if (data + hdr_size > data_end) return;
93 
94         if (ip6h->version != 6) return;
95 
96         src_ip = ip6h->saddr;
97         dst_ip = ip6h->daddr;
98         protocol = ip6h->nexthdr;
99         old_first_be32 = *(__be32*)ip6h;
100     }
101 
102     switch (protocol) {
103         case IPPROTO_UDP:
104         case IPPROTO_UDPLITE: {
105             struct udphdr* udp;
106             udp = data + hdr_size;
107             if ((void*)(udp + 1) > data_end) return;
108             sport = udp->source;
109             dport = ntohs(udp->dest);
110         } break;
111         case IPPROTO_TCP: {
112             struct tcphdr* tcp;
113             tcp = data + hdr_size;
114             if ((void*)(tcp + 1) > data_end) return;
115             sport = tcp->source;
116             dport = ntohs(tcp->dest);
117         } break;
118         default:
119             return;
120     }
121 
122     RuleEntry* existing_rule = bpf_socket_policy_cache_map_lookup_elem(&cookie);
123 
124     if (existing_rule &&
125         v6_equal(src_ip, existing_rule->src_ip) &&
126         v6_equal(dst_ip, existing_rule->dst_ip) &&
127         skb->ifindex == existing_rule->ifindex &&
128         sport == existing_rule->src_port &&
129         dport == existing_rule->dst_port &&
130         protocol == existing_rule->proto) {
131         if (existing_rule->dscp_val < 0) return;
132         if (ipv4) {
133             uint8_t newTos = UPDATE_TOS(existing_rule->dscp_val, tos);
134             bpf_l3_csum_replace(skb, IP4_OFFSET(check, l2_header_size), htons(tos), htons(newTos),
135                                 sizeof(uint16_t));
136             bpf_skb_store_bytes(skb, IP4_OFFSET(tos, l2_header_size), &newTos, sizeof(newTos), 0);
137         } else {
138             __be32 new_first_be32 =
139                 htonl(ntohl(old_first_be32) & 0xF03FFFFF | (existing_rule->dscp_val << 22));
140             bpf_skb_store_bytes(skb, l2_header_size, &new_first_be32, sizeof(__be32),
141                 BPF_F_RECOMPUTE_CSUM);
142         }
143         return;
144     }
145 
146     // Linear scan ipv4_dscp_policies_map since no stored params match skb.
147     int best_score = 0;
148     int8_t new_dscp = -1;
149 
150     for (register uint64_t i = 0; i < MAX_POLICIES; i++) {
151         // Using a uint64 in for loop prevents infinite loop during BPF load,
152         // but the key is uint32, so convert back.
153         uint32_t key = i;
154 
155         DscpPolicy* policy;
156         if (ipv4) {
157             policy = bpf_ipv4_dscp_policies_map_lookup_elem(&key);
158         } else {
159             policy = bpf_ipv6_dscp_policies_map_lookup_elem(&key);
160         }
161 
162         // If the policy lookup failed, just continue (this should not ever happen)
163         if (!policy) continue;
164 
165         // If policy iface index does not match skb, then skip to next policy.
166         if (policy->ifindex != skb->ifindex) continue;
167 
168         int score = 0;
169 
170         if (policy->present_fields & PROTO_MASK_FLAG) {
171             if (protocol != policy->proto) continue;
172             score += 0xFFFF;
173         }
174         if (policy->present_fields & SRC_IP_MASK_FLAG) {
175             if (v6_not_equal(src_ip, policy->src_ip)) continue;
176             score += 0xFFFF;
177         }
178         if (policy->present_fields & DST_IP_MASK_FLAG) {
179             if (v6_not_equal(dst_ip, policy->dst_ip)) continue;
180             score += 0xFFFF;
181         }
182         if (policy->present_fields & SRC_PORT_MASK_FLAG) {
183             if (sport != policy->src_port) continue;
184             score += 0xFFFF;
185         }
186         if (dport < policy->dst_port_start) continue;
187         if (dport > policy->dst_port_end) continue;
188         score += 0xFFFF + policy->dst_port_start - policy->dst_port_end;
189 
190         if (score > best_score) {
191             best_score = score;
192             new_dscp = policy->dscp_val;
193         }
194     }
195 
196     RuleEntry value = {
197         .src_ip = src_ip,
198         .dst_ip = dst_ip,
199         .ifindex = skb->ifindex,
200         .src_port = sport,
201         .dst_port = dport,
202         .proto = protocol,
203         .dscp_val = new_dscp,
204     };
205 
206     // Update cache with found policy.
207     bpf_socket_policy_cache_map_update_elem(&cookie, &value, BPF_ANY);
208 
209     if (new_dscp < 0) return;
210 
211     // Need to store bytes after updating map or program will not load.
212     if (ipv4) {
213         uint8_t new_tos = UPDATE_TOS(new_dscp, tos);
214         bpf_l3_csum_replace(skb, IP4_OFFSET(check, l2_header_size), htons(tos), htons(new_tos), 2);
215         bpf_skb_store_bytes(skb, IP4_OFFSET(tos, l2_header_size), &new_tos, sizeof(new_tos), 0);
216     } else {
217         __be32 new_first_be32 = htonl(ntohl(old_first_be32) & 0xF03FFFFF | (new_dscp << 22));
218         bpf_skb_store_bytes(skb, l2_header_size, &new_first_be32, sizeof(__be32),
219             BPF_F_RECOMPUTE_CSUM);
220     }
221     return;
222 }
223 
224 DEFINE_BPF_PROG_KVER("schedcls/set_dscp_ether", AID_ROOT, AID_SYSTEM, schedcls_set_dscp_ether,
225                      KVER_5_15)
226 (struct __sk_buff* skb) {
227     if (skb->pkt_type != PACKET_HOST) return TC_ACT_PIPE;
228 
229     if (skb->protocol == htons(ETH_P_IP)) {
230         match_policy(skb, true);
231     } else if (skb->protocol == htons(ETH_P_IPV6)) {
232         match_policy(skb, false);
233     }
234 
235     // Always return TC_ACT_PIPE
236     return TC_ACT_PIPE;
237 }
238 
239 LICENSE("Apache 2.0");
240 CRITICAL("Connectivity");
241 DISABLE_BTF_ON_USER_BUILDS();
242 DISABLE_ON_MAINLINE_BEFORE_U_QPR3();
243