1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.net.ip;
18 
19 import static android.net.metrics.IpReachabilityEvent.NUD_FAILED;
20 import static android.net.metrics.IpReachabilityEvent.NUD_FAILED_ORGANIC;
21 import static android.net.metrics.IpReachabilityEvent.PROVISIONING_LOST;
22 import static android.net.metrics.IpReachabilityEvent.PROVISIONING_LOST_ORGANIC;
23 
24 import static com.android.networkstack.util.NetworkStackUtils.IP_REACHABILITY_IGNORE_INCOMPLETE_IPV6_DEFAULT_ROUTER_VERSION;
25 import static com.android.networkstack.util.NetworkStackUtils.IP_REACHABILITY_IGNORE_INCOMPLETE_IPV6_DNS_SERVER_VERSION;
26 import static com.android.networkstack.util.NetworkStackUtils.IP_REACHABILITY_IGNORE_NEVER_REACHABLE_NEIGHBOR_VERSION;
27 import static com.android.networkstack.util.NetworkStackUtils.IP_REACHABILITY_IGNORE_ORGANIC_NUD_FAILURE_VERSION;
28 import static com.android.networkstack.util.NetworkStackUtils.IP_REACHABILITY_MCAST_RESOLICIT_VERSION;
29 import static com.android.networkstack.util.NetworkStackUtils.IP_REACHABILITY_ROUTER_MAC_CHANGE_FAILURE_ONLY_AFTER_ROAM_VERSION;
30 
31 import android.content.Context;
32 import android.net.ConnectivityManager;
33 import android.net.INetd;
34 import android.net.LinkProperties;
35 import android.net.RouteInfo;
36 import android.net.metrics.IpConnectivityLog;
37 import android.net.metrics.IpReachabilityEvent;
38 import android.net.networkstack.aidl.ip.ReachabilityLossReason;
39 import android.os.ConditionVariable;
40 import android.os.Handler;
41 import android.os.Looper;
42 import android.os.PowerManager;
43 import android.os.PowerManager.WakeLock;
44 import android.os.RemoteException;
45 import android.os.SystemClock;
46 import android.stats.connectivity.IpType;
47 import android.stats.connectivity.NudEventType;
48 import android.stats.connectivity.NudNeighborType;
49 import android.text.TextUtils;
50 import android.util.ArraySet;
51 import android.util.Log;
52 
53 import androidx.annotation.NonNull;
54 import androidx.annotation.Nullable;
55 
56 import com.android.internal.annotations.VisibleForTesting;
57 import com.android.internal.util.Preconditions;
58 import com.android.net.module.util.DeviceConfigUtils;
59 import com.android.net.module.util.InterfaceParams;
60 import com.android.net.module.util.SharedLog;
61 import com.android.net.module.util.ip.IpNeighborMonitor;
62 import com.android.net.module.util.ip.IpNeighborMonitor.NeighborEvent;
63 import com.android.net.module.util.ip.IpNeighborMonitor.NeighborEventConsumer;
64 import com.android.net.module.util.netlink.StructNdMsg;
65 import com.android.networkstack.R;
66 import com.android.networkstack.metrics.IpReachabilityMonitorMetrics;
67 
68 import java.io.PrintWriter;
69 import java.net.Inet6Address;
70 import java.net.InetAddress;
71 import java.util.ArrayList;
72 import java.util.HashMap;
73 import java.util.List;
74 import java.util.Map;
75 import java.util.Set;
76 
77 
78 /**
79  * IpReachabilityMonitor.
80  *
81  * Monitors on-link IP reachability and notifies callers whenever any on-link
82  * addresses of interest appear to have become unresponsive.
83  *
84  * This code does not concern itself with "why" a neighbour might have become
85  * unreachable. Instead, it primarily reacts to the kernel's notion of IP
86  * reachability for each of the neighbours we know to be critically important
87  * to normal network connectivity. As such, it is often "just the messenger":
88  * the neighbours about which it warns are already deemed by the kernel to have
89  * become unreachable.
90  *
91  *
92  * How it works:
93  *
94  *   1. The "on-link neighbours of interest" found in a given LinkProperties
95  *      instance are added to a "watch list" via #updateLinkProperties().
96  *      This usually means all default gateways and any on-link DNS servers.
97  *
98  *   2. We listen continuously for netlink neighbour messages (RTM_NEWNEIGH,
99  *      RTM_DELNEIGH), watching only for neighbours in the watch list.
100  *
101  *        - A neighbour going into NUD_REACHABLE, NUD_STALE, NUD_DELAY, and
102  *          even NUD_PROBE is perfectly normal; we merely record the new state.
103  *
104  *        - A neighbour's entry may be deleted (RTM_DELNEIGH), for example due
105  *          to garbage collection.  This is not necessarily of immediate
106  *          concern; we record the neighbour as moving to NUD_NONE.
107  *
108  *        - A neighbour transitioning to NUD_FAILED (for any reason) is
109  *          critically important and is handled as described below in #4.
110  *
111  *   3. All on-link neighbours in the watch list can be forcibly "probed" by
112  *      calling #probeAll(). This should be called whenever it is important to
113  *      verify that critical neighbours on the link are still reachable, e.g.
114  *      when roaming between BSSIDs.
115  *
116  *        - The kernel will send unicast ARP requests for IPv4 neighbours and
117  *          unicast NS packets for IPv6 neighbours.  The expected replies will
118  *          likely be unicast.
119  *
120  *        - The forced probing is done holding a wakelock. The kernel may,
121  *          however, initiate probing of a neighbor on its own, i.e. whenever
122  *          a neighbour has expired from NUD_DELAY.
123  *
124  *        - The kernel sends:
125  *
126  *              /proc/sys/net/ipv{4,6}/neigh/<ifname>/ucast_solicit
127  *
128  *          number of probes (usually 3) every:
129  *
130  *              /proc/sys/net/ipv{4,6}/neigh/<ifname>/retrans_time_ms
131  *
132  *          number of milliseconds (usually 1000ms). This normally results in
133  *          3 unicast packets, 1 per second.
134  *
135  *        - If no response is received to any of the probe packets, the kernel
136  *          marks the neighbour as being in state NUD_FAILED, and the listening
137  *          process in #2 will learn of it.
138  *
139  *   4. We call the supplied Callback#notifyLost() function if the loss of a
140  *      neighbour in NUD_FAILED would cause IPv4 or IPv6 configuration to
141  *      become incomplete (a loss of provisioning).
142  *
143  *        - For example, losing all our IPv4 on-link DNS servers (or losing
144  *          our only IPv6 default gateway) constitutes a loss of IPv4 (IPv6)
145  *          provisioning; Callback#notifyLost() would be called.
146  *
147  *        - Since it can be non-trivial to reacquire certain IP provisioning
148  *          state it may be best for the link to disconnect completely and
149  *          reconnect afresh.
150  *
151  * Accessing an instance of this class from multiple threads is NOT safe.
152  *
153  * @hide
154  */
155 public class IpReachabilityMonitor {
156     private static final String TAG = "IpReachabilityMonitor";
157     private static final boolean DBG = Log.isLoggable(TAG, Log.DEBUG);
158     private static final boolean VDBG = Log.isLoggable(TAG, Log.VERBOSE);
159 
160     // Upper and lower bound for NUD probe parameters.
161     protected static final int MAX_NUD_SOLICIT_NUM = 15;
162     protected static final int MIN_NUD_SOLICIT_NUM = 5;
163     protected static final int MAX_NUD_SOLICIT_INTERVAL_MS = 1000;
164     protected static final int MIN_NUD_SOLICIT_INTERVAL_MS = 750;
165     protected static final int NUD_MCAST_RESOLICIT_NUM = 3;
166     private static final int INVALID_NUD_MCAST_RESOLICIT_NUM = -1;
167 
168     private static final int INVALID_LEGACY_NUD_FAILURE_TYPE = -1;
169     public static final int INVALID_REACHABILITY_LOSS_TYPE = -1;
170 
171     public interface Callback {
172         /**
173          * This callback function must execute as quickly as possible as it is
174          * run on the same thread that listens to kernel neighbor updates.
175          *
176          * TODO: refactor to something like notifyProvisioningLost(String msg).
177          */
notifyLost(String logMsg, NudEventType type)178         void notifyLost(String logMsg, NudEventType type);
179     }
180 
181     /**
182      * Encapsulates IpReachabilityMonitor dependencies on systems that hinder unit testing.
183      * TODO: consider also wrapping MultinetworkPolicyTracker in this interface.
184      */
185     @VisibleForTesting(visibility = VisibleForTesting.Visibility.PACKAGE)
186     public interface Dependencies {
acquireWakeLock(long durationMs)187         void acquireWakeLock(long durationMs);
makeIpNeighborMonitor(Handler h, SharedLog log, NeighborEventConsumer cb)188         IpNeighborMonitor makeIpNeighborMonitor(Handler h, SharedLog log, NeighborEventConsumer cb);
isFeatureEnabled(Context context, String name)189         boolean isFeatureEnabled(Context context, String name);
isFeatureNotChickenedOut(Context context, String name)190         boolean isFeatureNotChickenedOut(Context context, String name);
getIpReachabilityMonitorMetrics()191         IpReachabilityMonitorMetrics getIpReachabilityMonitorMetrics();
192 
makeDefault(Context context, String iface)193         static Dependencies makeDefault(Context context, String iface) {
194             final String lockName = TAG + "." + iface;
195             final PowerManager pm = (PowerManager) context.getSystemService(Context.POWER_SERVICE);
196             final WakeLock lock = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, lockName);
197 
198             return new Dependencies() {
199                 public void acquireWakeLock(long durationMs) {
200                     lock.acquire(durationMs);
201                 }
202 
203                 public IpNeighborMonitor makeIpNeighborMonitor(Handler h, SharedLog log,
204                         NeighborEventConsumer cb) {
205                     return new IpNeighborMonitor(h, log, cb);
206                 }
207 
208                 public boolean isFeatureEnabled(final Context context, final String name) {
209                     return DeviceConfigUtils.isNetworkStackFeatureEnabled(context, name);
210                 }
211 
212                 public boolean isFeatureNotChickenedOut(final Context context, final String name) {
213                     return DeviceConfigUtils.isNetworkStackFeatureNotChickenedOut(context, name);
214                 }
215 
216                 public IpReachabilityMonitorMetrics getIpReachabilityMonitorMetrics() {
217                     return new IpReachabilityMonitorMetrics();
218                 }
219             };
220         }
221     }
222 
223     private final InterfaceParams mInterfaceParams;
224     private final IpNeighborMonitor mIpNeighborMonitor;
225     private final SharedLog mLog;
226     private final Dependencies mDependencies;
227     private final boolean mUsingMultinetworkPolicyTracker;
228     private final ConnectivityManager mCm;
229     private final IpConnectivityLog mMetricsLog;
230     private final Context mContext;
231     private final INetd mNetd;
232     private final IpReachabilityMonitorMetrics mIpReachabilityMetrics;
233     private LinkProperties mLinkProperties = new LinkProperties();
234     private Map<InetAddress, NeighborEvent> mNeighborWatchList = new HashMap<>();
235     // Time in milliseconds of the last forced probe request.
236     private volatile long mLastProbeTimeMs;
237     // Time in milliseconds of the last forced probe request due to roam or CMD_CONFIRM.
238     private long mLastProbeDueToRoamMs;
239     private long mLastProbeDueToConfirmMs;
240     private int mNumSolicits;
241     private int mInterSolicitIntervalMs;
242     @NonNull
243     private final Callback mCallback;
244     private final boolean mMulticastResolicitEnabled;
245     private final boolean mIgnoreIncompleteIpv6DnsServerEnabled;
246     private final boolean mIgnoreIncompleteIpv6DefaultRouterEnabled;
247     private final boolean mMacChangeFailureOnlyAfterRoam;
248     private final boolean mIgnoreOrganicNudFailure;
249     private final boolean mIgnoreNeverReachableNeighbor;
250     // A set to track whether a neighbor has ever entered NUD_REACHABLE state before.
251     private final Set<InetAddress> mEverReachableNeighbors = new ArraySet<>();
252 
253     public IpReachabilityMonitor(
254             Context context, InterfaceParams ifParams, Handler h, SharedLog log, Callback callback,
255             boolean usingMultinetworkPolicyTracker, Dependencies dependencies, final INetd netd) {
256         this(context, ifParams, h, log, callback, usingMultinetworkPolicyTracker, dependencies,
257                 new IpConnectivityLog(), netd);
258     }
259 
260     @VisibleForTesting
261     public IpReachabilityMonitor(Context context, InterfaceParams ifParams, Handler h,
262             SharedLog log, Callback callback, boolean usingMultinetworkPolicyTracker,
263             Dependencies dependencies, final IpConnectivityLog metricsLog, final INetd netd) {
264         if (ifParams == null) throw new IllegalArgumentException("null InterfaceParams");
265 
266         mContext = context;
267         mInterfaceParams = ifParams;
268         mLog = log.forSubComponent(TAG);
269         mCallback = callback;
270         mUsingMultinetworkPolicyTracker = usingMultinetworkPolicyTracker;
271         mCm = context.getSystemService(ConnectivityManager.class);
272         mDependencies = dependencies;
273         mMulticastResolicitEnabled = dependencies.isFeatureNotChickenedOut(context,
274                 IP_REACHABILITY_MCAST_RESOLICIT_VERSION);
275         mIgnoreIncompleteIpv6DnsServerEnabled = dependencies.isFeatureNotChickenedOut(context,
276                 IP_REACHABILITY_IGNORE_INCOMPLETE_IPV6_DNS_SERVER_VERSION);
277         mIgnoreIncompleteIpv6DefaultRouterEnabled = dependencies.isFeatureEnabled(context,
278                 IP_REACHABILITY_IGNORE_INCOMPLETE_IPV6_DEFAULT_ROUTER_VERSION);
279         mMacChangeFailureOnlyAfterRoam = dependencies.isFeatureNotChickenedOut(context,
280                 IP_REACHABILITY_ROUTER_MAC_CHANGE_FAILURE_ONLY_AFTER_ROAM_VERSION);
281         mIgnoreOrganicNudFailure = dependencies.isFeatureEnabled(context,
282                 IP_REACHABILITY_IGNORE_ORGANIC_NUD_FAILURE_VERSION);
283         mIgnoreNeverReachableNeighbor = dependencies.isFeatureEnabled(context,
284                 IP_REACHABILITY_IGNORE_NEVER_REACHABLE_NEIGHBOR_VERSION);
285         mMetricsLog = metricsLog;
286         mNetd = netd;
287         Preconditions.checkNotNull(mNetd);
288         Preconditions.checkArgument(!TextUtils.isEmpty(mInterfaceParams.name));
289 
290         // In case the overylaid parameters specify an invalid configuration, set the parameters
291         // to the hardcoded defaults first, then set them to the values used in the steady state.
292         try {
293             int numResolicits = mMulticastResolicitEnabled
294                     ? NUD_MCAST_RESOLICIT_NUM
295                     : INVALID_NUD_MCAST_RESOLICIT_NUM;
296             setNeighborParameters(MIN_NUD_SOLICIT_NUM, MIN_NUD_SOLICIT_INTERVAL_MS, numResolicits);
297         } catch (Exception e) {
298             Log.e(TAG, "Failed to adjust neighbor parameters with hardcoded defaults");
299         }
300         setNeighbourParametersForSteadyState();
301 
302         mIpNeighborMonitor = dependencies.makeIpNeighborMonitor(h, mLog,
303                 (NeighborEvent event) -> {
304                     if (mInterfaceParams.index != event.ifindex) return;
305                     if (!mNeighborWatchList.containsKey(event.ip)) return;
306 
307                     final NeighborEvent prev = mNeighborWatchList.put(event.ip, event);
308                     if (DBG) {
309                         Log.d(TAG, "neighbor went from: " + prev + " to: " + event);
310                     }
311 
312                     // TODO: Consider what to do with other states that are not within
313                     // NeighborEvent#isValid() (i.e. NUD_NONE, NUD_INCOMPLETE).
314                     if (event.nudState == StructNdMsg.NUD_FAILED) {
315                         // After both unicast probe and multicast probe(if mcast_resolicit is not 0)
316                         // attempts fail, trigger the neighbor lost event and disconnect.
317                         mLog.w("ALERT neighbor went from: " + prev + " to: " + event);
318                         handleNeighborLost(prev, event);
319                     } else if (event.nudState == StructNdMsg.NUD_REACHABLE) {
320                         // TODO: do not ignore NUD_REACHABLE events before neighbors are added to
321                         // the watchlist.
322                         // If the NUD_REACHABLE event is received before the neighbor is put in the
323                         // watchlist via updateLinkProperties, it is not recorded in
324                         // mEverReachableNeighbors. This means that if a NUD_FAILURE occurs before
325                         // another NUD_REACHABLE, it is ignored. This race already exists today for
326                         // incomplete IPv6 neighbors.
327                         mEverReachableNeighbors.add(event.ip);
328                         handleNeighborReachable(prev, event);
329                     }
330                 });
331         mIpNeighborMonitor.start();
332         mIpReachabilityMetrics = dependencies.getIpReachabilityMonitorMetrics();
333     }
334 
335     public void stop() {
336         mIpNeighborMonitor.stop();
337         clearLinkProperties();
338     }
339 
340     public void dump(PrintWriter pw) {
341         if (Looper.myLooper() == mIpNeighborMonitor.getHandler().getLooper()) {
342             pw.println(describeWatchList("\n"));
343             return;
344         }
345 
346         final ConditionVariable cv = new ConditionVariable(false);
347         mIpNeighborMonitor.getHandler().post(() -> {
348             pw.println(describeWatchList("\n"));
349             cv.open();
350         });
351 
352         if (!cv.block(1000)) {
353             pw.println("Timed out waiting for IpReachabilityMonitor dump");
354         }
355     }
356 
357     private String describeWatchList() { return describeWatchList(" "); }
358 
359     private String describeWatchList(String sep) {
360         final StringBuilder sb = new StringBuilder();
361         sb.append("iface{" + mInterfaceParams + "}," + sep);
362         sb.append("ntable=[" + sep);
363         String delimiter = "";
364         for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) {
365             sb.append(delimiter).append(entry.getKey().getHostAddress() + "/" + entry.getValue());
366             delimiter = "," + sep;
367         }
368         sb.append("]");
369         return sb.toString();
370     }
371 
372     @VisibleForTesting
373     static boolean isOnLink(List<RouteInfo> routes, InetAddress ip) {
374         for (RouteInfo route : routes) {
375             if (!route.hasGateway() && route.matches(ip)
376                     && route.getType() == RouteInfo.RTN_UNICAST) {
377                 return true;
378             }
379         }
380         return false;
381     }
382 
383     private boolean hasDefaultRouterNeighborMacAddressChanged(
384             @Nullable final NeighborEvent prev, @NonNull final NeighborEvent event) {
385         // TODO: once this rolls out safely, merge something like aosp/2908139 and remove this code.
386         if (mMacChangeFailureOnlyAfterRoam) {
387             if (!isNeighborDefaultRouter(event)) return false;
388             if (prev == null || prev.nudState != StructNdMsg.NUD_PROBE) return false;
389             if (!isNudFailureDueToRoam()) return false;
390         } else {
391             // Previous, incorrect, behaviour: MAC address changes are a failure at all times.
392             if (prev == null || !isNeighborDefaultRouter(event)) return false;
393         }
394         return !event.macAddr.equals(prev.macAddr);
395     }
396 
397     private boolean isNeighborDefaultRouter(@NonNull final NeighborEvent event) {
398         // For the IPv6 link-local scoped address, equals() works because the NeighborEvent.ip
399         // doesn't have a scope id and Inet6Address#equals doesn't consider scope id neither.
400         for (RouteInfo route : mLinkProperties.getRoutes()) {
401             if (route.isDefaultRoute() && event.ip.equals(route.getGateway())) return true;
402         }
403         return false;
404     }
405 
406     private boolean isNeighborDnsServer(@NonNull final NeighborEvent event) {
407         for (InetAddress dns : mLinkProperties.getDnsServers()) {
408             if (event.ip.equals(dns)) return true;
409         }
410         return false;
411     }
412 
413     public void updateLinkProperties(LinkProperties lp) {
414         if (!mInterfaceParams.name.equals(lp.getInterfaceName())) {
415             // TODO: figure out whether / how to cope with interface changes.
416             Log.wtf(TAG, "requested LinkProperties interface '" + lp.getInterfaceName() +
417                     "' does not match: " + mInterfaceParams.name);
418             return;
419         }
420 
421         mLinkProperties = new LinkProperties(lp);
422         Map<InetAddress, NeighborEvent> newNeighborWatchList = new HashMap<>();
423 
424         final List<RouteInfo> routes = mLinkProperties.getRoutes();
425         for (RouteInfo route : routes) {
426             if (route.hasGateway()) {
427                 InetAddress gw = route.getGateway();
428                 if (isOnLink(routes, gw)) {
429                     newNeighborWatchList.put(gw, mNeighborWatchList.getOrDefault(gw, null));
430                 }
431             }
432         }
433 
434         for (InetAddress dns : lp.getDnsServers()) {
435             if (isOnLink(routes, dns)) {
436                 newNeighborWatchList.put(dns, mNeighborWatchList.getOrDefault(dns, null));
437             }
438         }
439 
440         mNeighborWatchList = newNeighborWatchList;
441         // Remove the corresponding neighbor from mEverReachableNeighbors if it was removed from the
442         // watchlist.
443         mEverReachableNeighbors.removeIf(addr -> !newNeighborWatchList.containsKey(addr));
444         if (DBG) { Log.d(TAG, "watch: " + describeWatchList()); }
445     }
446 
447     public void clearLinkProperties() {
448         mLinkProperties.clear();
449         mNeighborWatchList.clear();
450         mEverReachableNeighbors.clear();
451         if (DBG) { Log.d(TAG, "clear: " + describeWatchList()); }
452     }
453 
454     private void handleNeighborReachable(@Nullable final NeighborEvent prev,
455             @NonNull final NeighborEvent event) {
456         if (mMulticastResolicitEnabled
457                 && hasDefaultRouterNeighborMacAddressChanged(prev, event)) {
458             // This implies device has confirmed the neighbor's reachability from
459             // other states(e.g., NUD_PROBE or NUD_STALE), checking if the mac
460             // address hasn't changed is required. If Mac address does change, then
461             // trigger a new neighbor lost event and disconnect.
462             final String logMsg = "ALERT neighbor: " + event.ip
463                     + " MAC address changed from: " + prev.macAddr
464                     + " to: " + event.macAddr;
465             final NudEventType type =
466                     getMacAddressChangedEventType(isFromProbe(), isNudFailureDueToRoam());
467             mLog.w(logMsg);
468             mCallback.notifyLost(logMsg, type);
469             logNudFailed(event, type);
470             return;
471         }
472         maybeRestoreNeighborParameters();
473     }
474 
475     private boolean shouldIgnoreIncompleteNeighbor(@Nullable final NeighborEvent prev,
476             @NonNull final NeighborEvent event) {
477         // mIgnoreNeverReachableNeighbor already takes care of incomplete IPv6 neighbors, so do not
478         // apply this logic.
479         if (mIgnoreNeverReachableNeighbor) return false;
480 
481         // For on-link IPv4/v6 DNS server or default router that never ever responds to
482         // address resolution(e.g. ARP or NS), kernel will send RTM_NEWNEIGH with NUD_FAILED
483         // to user space directly, and there is no netlink neighbor events related to this
484         // neighbor received before.
485         return (prev == null && event.nudState == StructNdMsg.NUD_FAILED);
486     }
487 
488     private void handleNeighborLost(@Nullable final NeighborEvent prev,
489             @NonNull final NeighborEvent event) {
490         final LinkProperties whatIfLp = new LinkProperties(mLinkProperties);
491 
492         for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) {
493             // TODO: Consider using NeighborEvent#isValid() here; it's more
494             // strict but may interact badly if other entries are somehow in
495             // NUD_INCOMPLETE (say, during network attach).
496             final NeighborEvent val = entry.getValue();
497             final InetAddress ip = entry.getKey();
498 
499             // Find all the neighbors that have gone into FAILED state.
500             // Ignore entries for which we have never received an event. If there are neighbors
501             // that never respond to ARP/ND, the kernel will send several FAILED events, then
502             // an INCOMPLETE event, and then more FAILED events. The INCOMPLETE event will
503             // populate the map and the subsequent FAILED event will be processed.
504             if (val == null || val.nudState != StructNdMsg.NUD_FAILED) continue;
505 
506             // Also ignore entry for any neighbor which is never ever reachable.
507             // Pretend neighbors that have never been reachable are still there. Leaving them
508             // inside whatIfLp has the benefit that the logic that compares provisioning loss
509             // below works consistently independent of the current event being processed.
510             if (mIgnoreNeverReachableNeighbor && !mEverReachableNeighbors.contains(ip)) continue;
511 
512             for (RouteInfo route : mLinkProperties.getRoutes()) {
513                 if (ip.equals(route.getGateway())) {
514                     whatIfLp.removeRoute(route);
515                 }
516             }
517 
518             if (avoidingBadLinks() || !(ip instanceof Inet6Address)) {
519                 // We should do this unconditionally, but alas we cannot: b/31827713.
520                 whatIfLp.removeDnsServer(ip);
521             }
522         }
523 
524         // TODO: cleanup below code(checking if the incomplete IPv6 neighbor should be ignored)
525         // once the feature of ignoring the neighbor was never ever reachable rolls out.
526         final boolean ignoreIncompleteIpv6DnsServer =
527                 mIgnoreIncompleteIpv6DnsServerEnabled
528                         && isNeighborDnsServer(event)
529                         && shouldIgnoreIncompleteNeighbor(prev, event);
530 
531         // Generally Router Advertisement should take SLLA option, then device won't do address
532         // resolution for default router's IPv6 link-local address automatically. But sometimes
533         // it may miss SLLA option, also add a flag to check these cases.
534         final boolean ignoreIncompleteIpv6DefaultRouter =
535                 mIgnoreIncompleteIpv6DefaultRouterEnabled
536                         && isNeighborDefaultRouter(event)
537                         && shouldIgnoreIncompleteNeighbor(prev, event);
538 
539         // Only ignore the incomplete IPv6 neighbor iff IPv4 is still provisioned. For IPv6-only
540         // networks, we MUST not ignore any incomplete IPv6 neighbor.
541         final boolean ignoreIncompleteIpv6Neighbor =
542                 (ignoreIncompleteIpv6DnsServer || ignoreIncompleteIpv6DefaultRouter)
543                         && whatIfLp.isIpv4Provisioned();
544 
545         // It's better to remove the incompleted on-link IPv6 DNS server or default router from
546         // watch list, otherwise, when wifi invokes probeAll later (e.g. post roam) to send probe
547         // to an incompleted on-link DNS server or default router, it should fail to send netlink
548         // message to kernel as there is no neighbor cache entry for it at all.
549         if (ignoreIncompleteIpv6Neighbor) {
550             Log.d(TAG, "remove incomplete IPv6 neighbor " + event.ip
551                     + " which fails to respond to address resolution from watch list.");
552             mNeighborWatchList.remove(event.ip);
553         }
554 
555         final boolean lostIpv4Provisioning =
556                 mLinkProperties.isIpv4Provisioned() && !whatIfLp.isIpv4Provisioned();
557         final boolean lostIpv6Provisioning =
558                 mLinkProperties.isIpv6Provisioned() && !whatIfLp.isIpv6Provisioned()
559                         && !ignoreIncompleteIpv6Neighbor;
560         final boolean lostProvisioning = lostIpv4Provisioning || lostIpv6Provisioning;
561         final NudEventType type = getNudFailureEventType(isFromProbe(),
562                 isNudFailureDueToRoam(), lostProvisioning);
563 
564         if (lostProvisioning) {
565             final boolean isOrganicNudFailureAndToBeIgnored =
566                     ((type == NudEventType.NUD_ORGANIC_FAILED_CRITICAL)
567                             && mIgnoreOrganicNudFailure);
568             final String logMsg = "FAILURE: LOST_PROVISIONING, " + event
569                     + ", NUD event type: " + type.name()
570                     + (isOrganicNudFailureAndToBeIgnored ? ", to be ignored" : "");
571             Log.w(TAG, logMsg);
572             // Notify critical neighbor lost as long as the NUD failures
573             // are not from kernel organic or the NUD failure event type is
574             // NUD_ORGANIC_FAILED_CRITICAL but the experiment flag is not
575             // enabled. Regardless, the event metrics are still recoreded.
576             if (!isOrganicNudFailureAndToBeIgnored) {
577                 mCallback.notifyLost(logMsg, type);
578             }
579         }
580         logNudFailed(event, type);
581     }
582 
583     private void maybeRestoreNeighborParameters() {
584         for (Map.Entry<InetAddress, NeighborEvent> entry : mNeighborWatchList.entrySet()) {
585             final NeighborEvent val = entry.getValue();
586             final InetAddress ip = entry.getKey();
587             if (DBG) {
588                 Log.d(TAG, "neighbour IPv4(v6): " + ip + " neighbour state: "
589                         + (val != null ? StructNdMsg.stringForNudState(val.nudState) : "null"));
590             }
591 
592             // Skip the neighbor which is never ever reachable, we ignore the NUD failure for it,
593             // pretend neighbor that has never been reachable is still there no matter of neighbor
594             // event state.
595             if (mIgnoreNeverReachableNeighbor && !mEverReachableNeighbors.contains(ip)) continue;
596 
597             // If an entry is null, consider that probing for that neighbour has completed.
598             if (val == null || val.nudState != StructNdMsg.NUD_REACHABLE) return;
599         }
600 
601         // Probing for all neighbours in the watchlist is complete and the connection is stable,
602         // restore NUD probe parameters to steadystate value. In the case where neighbours
603         // are responsive, this code will run before the wakelock expires.
604         setNeighbourParametersForSteadyState();
605     }
606 
607     private boolean avoidingBadLinks() {
608         return !mUsingMultinetworkPolicyTracker || mCm.shouldAvoidBadWifi();
609     }
610 
611     /**
612      * Force probe to verify whether or not the critical on-link neighbours are still reachable.
613      *
614      * @param dueToRoam indicate on which situation forced probe has been sent, e.g., on post
615      *                  roaming or receiving CMD_CONFIRM from IpClient.
616      */
617     public void probeAll(boolean dueToRoam) {
618         setNeighbourParametersPostRoaming();
619 
620         final List<InetAddress> ipProbeList = new ArrayList<>(mNeighborWatchList.keySet());
621         if (!ipProbeList.isEmpty()) {
622             // Keep the CPU awake long enough to allow all ARP/ND
623             // probes a reasonable chance at success. See b/23197666.
624             //
625             // The wakelock we use is (by default) refcounted, and this version
626             // of acquire(timeout) queues a release message to keep acquisitions
627             // and releases balanced.
628             mDependencies.acquireWakeLock(getProbeWakeLockDuration());
629         }
630 
631         for (InetAddress ip : ipProbeList) {
632             final int rval = IpNeighborMonitor.startKernelNeighborProbe(mInterfaceParams.index, ip);
633             mLog.log(String.format("put neighbor %s into NUD_PROBE state (rval=%d)",
634                      ip.getHostAddress(), rval));
635             logEvent(IpReachabilityEvent.PROBE, rval);
636         }
637         mLastProbeTimeMs = SystemClock.elapsedRealtime();
638         if (dueToRoam) {
639             mLastProbeDueToRoamMs = mLastProbeTimeMs;
640         } else {
641             mLastProbeDueToConfirmMs = mLastProbeTimeMs;
642         }
643     }
644 
645     private long getProbeWakeLockDuration() {
646         final long gracePeriodMs = 500;
647         final int numSolicits =
648                 mNumSolicits + (mMulticastResolicitEnabled ? NUD_MCAST_RESOLICIT_NUM : 0);
649         return (long) (numSolicits * mInterSolicitIntervalMs) + gracePeriodMs;
650     }
651 
652     private void setNeighbourParametersPostRoaming() {
653         setNeighborParametersFromResources(R.integer.config_nud_postroaming_solicit_num,
654                 R.integer.config_nud_postroaming_solicit_interval);
655     }
656 
657     private void setNeighbourParametersForSteadyState() {
658         setNeighborParametersFromResources(R.integer.config_nud_steadystate_solicit_num,
659                 R.integer.config_nud_steadystate_solicit_interval);
660     }
661 
662     private void setNeighborParametersFromResources(final int numResId, final int intervalResId) {
663         try {
664             final int numSolicits = mContext.getResources().getInteger(numResId);
665             final int interSolicitIntervalMs = mContext.getResources().getInteger(intervalResId);
666             setNeighborParameters(numSolicits, interSolicitIntervalMs);
667         } catch (Exception e) {
668             Log.e(TAG, "Failed to adjust neighbor parameters");
669         }
670     }
671 
672     private void setNeighborParameters(int numSolicits, int interSolicitIntervalMs)
673             throws RemoteException, IllegalArgumentException {
674         // Do not set mcast_resolicit param by default.
675         setNeighborParameters(numSolicits, interSolicitIntervalMs, INVALID_NUD_MCAST_RESOLICIT_NUM);
676     }
677 
678     private void setNeighborParameters(int numSolicits, int interSolicitIntervalMs,
679             int numResolicits) throws RemoteException, IllegalArgumentException {
680         Preconditions.checkArgument(numSolicits >= MIN_NUD_SOLICIT_NUM,
681                 "numSolicits must be at least " + MIN_NUD_SOLICIT_NUM);
682         Preconditions.checkArgument(numSolicits <= MAX_NUD_SOLICIT_NUM,
683                 "numSolicits must be at most " + MAX_NUD_SOLICIT_NUM);
684         Preconditions.checkArgument(interSolicitIntervalMs >= MIN_NUD_SOLICIT_INTERVAL_MS,
685                 "interSolicitIntervalMs must be at least " + MIN_NUD_SOLICIT_INTERVAL_MS);
686         Preconditions.checkArgument(interSolicitIntervalMs <= MAX_NUD_SOLICIT_INTERVAL_MS,
687                 "interSolicitIntervalMs must be at most " + MAX_NUD_SOLICIT_INTERVAL_MS);
688 
689         for (int family : new Integer[]{INetd.IPV4, INetd.IPV6}) {
690             mNetd.setProcSysNet(family, INetd.NEIGH, mInterfaceParams.name, "retrans_time_ms",
691                     Integer.toString(interSolicitIntervalMs));
692             mNetd.setProcSysNet(family, INetd.NEIGH, mInterfaceParams.name, "ucast_solicit",
693                     Integer.toString(numSolicits));
694             if (numResolicits != INVALID_NUD_MCAST_RESOLICIT_NUM) {
695                 mNetd.setProcSysNet(family, INetd.NEIGH, mInterfaceParams.name, "mcast_resolicit",
696                         Integer.toString(numResolicits));
697             }
698         }
699 
700         mNumSolicits = numSolicits;
701         mInterSolicitIntervalMs = interSolicitIntervalMs;
702     }
703 
704     private boolean isFromProbe() {
705         final long duration = SystemClock.elapsedRealtime() - mLastProbeTimeMs;
706         return duration < getProbeWakeLockDuration();
707     }
708 
709     private boolean isNudFailureDueToRoam() {
710         if (!isFromProbe()) return false;
711 
712         // Check to which probe expiry the curren timestamp gets close when NUD failure event
713         // happens, theoretically that indicates which probe event(due to roam or CMD_CONFIRM)
714         // was triggered eariler.
715         //
716         // Note that this would be incorrect if the probe or confirm was so long ago that the
717         // probe duration has already expired. That cannot happen because isFromProbe would return
718         // false.
719         final long probeExpiryAfterRoam = mLastProbeDueToRoamMs + getProbeWakeLockDuration();
720         final long probeExpiryAfterConfirm =
721                 mLastProbeDueToConfirmMs + getProbeWakeLockDuration();
722         final long currentTime = SystemClock.elapsedRealtime();
723         return Math.abs(probeExpiryAfterRoam - currentTime)
724                 < Math.abs(probeExpiryAfterConfirm - currentTime);
725     }
726 
727     private void logEvent(int probeType, int errorCode) {
728         int eventType = probeType | (errorCode & 0xff);
729         mMetricsLog.log(mInterfaceParams.name, new IpReachabilityEvent(eventType));
730     }
731 
732     private void logNudFailed(final NeighborEvent event, final NudEventType type) {
733         logNeighborLostEvent(event, type);
734 
735         // The legacy metrics only record whether the failure came from a probe and whether
736         // the network is still provisioned. They do not record provisioning failures due to
737         // multicast resolicits finding that the MAC address has changed.
738         final int eventType = legacyNudFailureType(type);
739         if (eventType == INVALID_LEGACY_NUD_FAILURE_TYPE) return;
740         mMetricsLog.log(mInterfaceParams.name, new IpReachabilityEvent(eventType));
741     }
742 
743     /**
744      * Returns the neighbor type code corresponding to the given conditions.
745      */
746     private NudNeighborType getNeighborType(final NeighborEvent event) {
747         final boolean isGateway = isNeighborDefaultRouter(event);
748         final boolean isDnsServer = isNeighborDnsServer(event);
749 
750         if (isGateway && isDnsServer) return NudNeighborType.NUD_NEIGHBOR_BOTH;
751         if (isGateway && !isDnsServer) return NudNeighborType.NUD_NEIGHBOR_GATEWAY;
752         if (!isGateway && isDnsServer) return NudNeighborType.NUD_NEIGHBOR_DNS;
753         return NudNeighborType.NUD_NEIGHBOR_UNKNOWN;
754     }
755 
756     /**
757      * Returns the NUD failure event type code corresponding to the given conditions.
758      */
759     private static NudEventType getNudFailureEventType(boolean isFromProbe, boolean isDueToRoam,
760             boolean isProvisioningLost) {
761         if (!isFromProbe) {
762             return isProvisioningLost
763                     ? NudEventType.NUD_ORGANIC_FAILED_CRITICAL
764                     : NudEventType.NUD_ORGANIC_FAILED;
765         }
766         return isProvisioningLost
767                 ? isDueToRoam
768                         ? NudEventType.NUD_POST_ROAMING_FAILED_CRITICAL
769                         : NudEventType.NUD_CONFIRM_FAILED_CRITICAL
770                 : isDueToRoam
771                         ? NudEventType.NUD_POST_ROAMING_FAILED
772                         : NudEventType.NUD_CONFIRM_FAILED;
773     }
774 
775     /**
776      * Returns the NUD failure event type code due to neighbor's MAC address has changed
777      * corresponding to the given conditions.
778      */
779     private static NudEventType getMacAddressChangedEventType(boolean isFromProbe,
780             boolean isDueToRoam) {
781         return isFromProbe
782                 ? isDueToRoam
783                         ? NudEventType.NUD_POST_ROAMING_MAC_ADDRESS_CHANGED
784                         : NudEventType.NUD_CONFIRM_MAC_ADDRESS_CHANGED
785                 : NudEventType.NUD_ORGANIC_MAC_ADDRESS_CHANGED;
786     }
787 
788     /**
789      * Log NUD failure metrics with new statsd APIs while the function using mMetricsLog API
790      * still sends the legacy metrics, @see #logNudFailed.
791      */
792     private void logNeighborLostEvent(final NeighborEvent event, final NudEventType type) {
793         final IpType ipType = (event.ip instanceof Inet6Address) ? IpType.IPV6 : IpType.IPV4;
794         mIpReachabilityMetrics.setNudIpType(ipType);
795         mIpReachabilityMetrics.setNudNeighborType(getNeighborType(event));
796         mIpReachabilityMetrics.setNudEventType(type);
797         mIpReachabilityMetrics.statsWrite();
798     }
799 
800     /**
801      * Returns the NUD failure event type code corresponding to the given conditions.
802      */
803     private static int legacyNudFailureType(final NudEventType type) {
804         switch (type) {
805             case NUD_POST_ROAMING_FAILED:
806             case NUD_CONFIRM_FAILED:
807                 return NUD_FAILED;
808             case NUD_POST_ROAMING_FAILED_CRITICAL:
809             case NUD_CONFIRM_FAILED_CRITICAL:
810                 return PROVISIONING_LOST;
811             case NUD_ORGANIC_FAILED:
812                 return NUD_FAILED_ORGANIC;
813             case NUD_ORGANIC_FAILED_CRITICAL:
814                 return PROVISIONING_LOST_ORGANIC;
815             default:
816                 // Do not log legacy event
817                 return INVALID_LEGACY_NUD_FAILURE_TYPE;
818         }
819     }
820 
821     /**
822      * Convert the NUD critical failure event type to a int constant defined in IIpClientCallbacks.
823      */
824     public static int nudEventTypeToInt(final NudEventType type) {
825         switch (type) {
826             case NUD_POST_ROAMING_FAILED_CRITICAL:
827             case NUD_POST_ROAMING_MAC_ADDRESS_CHANGED:
828                 return ReachabilityLossReason.ROAM;
829             case NUD_CONFIRM_FAILED_CRITICAL:
830             case NUD_CONFIRM_MAC_ADDRESS_CHANGED:
831                 return ReachabilityLossReason.CONFIRM;
832             case NUD_ORGANIC_FAILED_CRITICAL:
833             case NUD_ORGANIC_MAC_ADDRESS_CHANGED:
834                 return ReachabilityLossReason.ORGANIC;
835             // For other NudEventType which won't trigger notifyLost, just ignore these events.
836             default:
837                 return INVALID_REACHABILITY_LOSS_TYPE;
838         }
839     }
840 }
841