Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
Bernie Innocenti | e9ba09c | 2018-09-12 23:20:10 +0900 | [diff] [blame] | 16 | #define LOG_TAG "res_stats" |
| 17 | |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 18 | #include <arpa/nameser.h> |
Bernie Innocenti | f12d5bb | 2018-08-31 14:09:46 +0900 | [diff] [blame] | 19 | #include <stdbool.h> |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 20 | #include <string.h> |
| 21 | |
Bernie Innocenti | e9ba09c | 2018-09-12 23:20:10 +0900 | [diff] [blame] | 22 | #include <android-base/logging.h> |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 23 | |
Bernie Innocenti | 189eb50 | 2018-10-01 23:10:18 +0900 | [diff] [blame] | 24 | #include "netd_resolv/stats.h" |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 25 | |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 26 | |
Bernie Innocenti | ee1b85b | 2018-09-25 14:23:19 +0900 | [diff] [blame] | 27 | // Calculate the round-trip-time from start time t0 and end time t1. |
| 28 | int _res_stats_calculate_rtt(const timespec* t1, const timespec* t0) { |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 29 | // Divide ns by one million to get ms, multiply s by thousand to get ms (obvious) |
| 30 | long ms0 = t0->tv_sec * 1000 + t0->tv_nsec / 1000000; |
| 31 | long ms1 = t1->tv_sec * 1000 + t1->tv_nsec / 1000000; |
| 32 | return (int) (ms1 - ms0); |
| 33 | } |
| 34 | |
Bernie Innocenti | ee1b85b | 2018-09-25 14:23:19 +0900 | [diff] [blame] | 35 | // Create a sample for calculating server reachability statistics. |
Bernie Innocenti | 189eb50 | 2018-10-01 23:10:18 +0900 | [diff] [blame] | 36 | void _res_stats_set_sample(res_sample* sample, time_t now, int rcode, int rtt) { |
chenbruce | 16adee4 | 2019-02-20 19:45:50 +0800 | [diff] [blame] | 37 | LOG(INFO) << __func__ << ": rcode = " << rcode << ", sec = " << rtt; |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 38 | sample->at = now; |
| 39 | sample->rcode = rcode; |
| 40 | sample->rtt = rtt; |
| 41 | } |
| 42 | |
| 43 | /* Clears all stored samples for the given server. */ |
Bernie Innocenti | 189eb50 | 2018-10-01 23:10:18 +0900 | [diff] [blame] | 44 | void _res_stats_clear_samples(res_stats* stats) { |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 45 | stats->sample_count = stats->sample_next = 0; |
| 46 | } |
| 47 | |
| 48 | /* Aggregates the reachability statistics for the given server based on on the stored samples. */ |
Bernie Innocenti | 189eb50 | 2018-10-01 23:10:18 +0900 | [diff] [blame] | 49 | void android_net_res_stats_aggregate(res_stats* stats, int* successes, int* errors, int* timeouts, |
| 50 | int* internal_errors, int* rtt_avg, time_t* last_sample_time) { |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 51 | int s = 0; // successes |
| 52 | int e = 0; // errors |
| 53 | int t = 0; // timouts |
| 54 | int ie = 0; // internal errors |
| 55 | long rtt_sum = 0; |
| 56 | time_t last = 0; |
| 57 | int rtt_count = 0; |
Bernie Innocenti | f12d5bb | 2018-08-31 14:09:46 +0900 | [diff] [blame] | 58 | for (int i = 0; i < stats->sample_count; ++i) { |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 59 | // Treat everything as an error that the code in send_dg() already considers a |
| 60 | // rejection by the server, i.e. SERVFAIL, NOTIMP and REFUSED. Assume that NXDOMAIN |
| 61 | // and NOTAUTH can actually occur for user queries. NOERROR with empty answer section |
| 62 | // is not treated as an error here either. FORMERR seems to sometimes be returned by |
| 63 | // some versions of BIND in response to DNSSEC or EDNS0. Whether to treat such responses |
| 64 | // as an indication of a broken server is unclear, though. For now treat such responses, |
| 65 | // as well as unknown codes as errors. |
| 66 | switch (stats->samples[i].rcode) { |
Bernie Innocenti | f12d5bb | 2018-08-31 14:09:46 +0900 | [diff] [blame] | 67 | case NOERROR: |
| 68 | case NOTAUTH: |
| 69 | case NXDOMAIN: |
| 70 | ++s; |
| 71 | rtt_sum += stats->samples[i].rtt; |
| 72 | ++rtt_count; |
| 73 | break; |
| 74 | case RCODE_TIMEOUT: |
| 75 | ++t; |
| 76 | break; |
| 77 | case RCODE_INTERNAL_ERROR: |
| 78 | ++ie; |
| 79 | break; |
| 80 | case SERVFAIL: |
| 81 | case NOTIMP: |
| 82 | case REFUSED: |
| 83 | default: |
| 84 | ++e; |
| 85 | break; |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 86 | } |
| 87 | } |
| 88 | *successes = s; |
| 89 | *errors = e; |
| 90 | *timeouts = t; |
| 91 | *internal_errors = ie; |
| 92 | /* If there was at least one successful sample, calculate average RTT. */ |
| 93 | if (rtt_count) { |
| 94 | *rtt_avg = rtt_sum / rtt_count; |
| 95 | } else { |
| 96 | *rtt_avg = -1; |
| 97 | } |
| 98 | /* If we had at least one sample, populate last sample time. */ |
| 99 | if (stats->sample_count > 0) { |
| 100 | if (stats->sample_next > 0) { |
| 101 | last = stats->samples[stats->sample_next - 1].at; |
| 102 | } else { |
| 103 | last = stats->samples[stats->sample_count - 1].at; |
| 104 | } |
| 105 | } |
| 106 | *last_sample_time = last; |
| 107 | } |
| 108 | |
waynema | 85f22d6 | 2019-04-17 07:48:31 -0700 | [diff] [blame] | 109 | // Returns true if the server is considered usable, i.e. if the success rate is not lower than the |
Bernie Innocenti | ee1b85b | 2018-09-25 14:23:19 +0900 | [diff] [blame] | 110 | // threshold for the stored stored samples. If not enough samples are stored, the server is |
| 111 | // considered usable. |
Bernie Innocenti | 34de3ba | 2019-02-19 18:08:36 +0900 | [diff] [blame] | 112 | static bool res_stats_usable_server(const res_params* params, res_stats* stats) { |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 113 | int successes = -1; |
| 114 | int errors = -1; |
| 115 | int timeouts = -1; |
| 116 | int internal_errors = -1; |
| 117 | int rtt_avg = -1; |
| 118 | time_t last_sample_time = 0; |
| 119 | android_net_res_stats_aggregate(stats, &successes, &errors, &timeouts, &internal_errors, |
Bernie Innocenti | f12d5bb | 2018-08-31 14:09:46 +0900 | [diff] [blame] | 120 | &rtt_avg, &last_sample_time); |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 121 | if (successes >= 0 && errors >= 0 && timeouts >= 0) { |
| 122 | int total = successes + errors + timeouts; |
Ken Chen | bab5014 | 2019-03-19 17:41:28 +0800 | [diff] [blame] | 123 | LOG(INFO) << __func__ << ": NS stats: S " << successes << " + E " << errors << " + T " |
| 124 | << timeouts << " + I " << internal_errors << " = " << total |
| 125 | << ", rtt = " << rtt_avg << ", min_samples = " << unsigned(params->min_samples); |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 126 | if (total >= params->min_samples && (errors > 0 || timeouts > 0)) { |
| 127 | int success_rate = successes * 100 / total; |
Ken Chen | bab5014 | 2019-03-19 17:41:28 +0800 | [diff] [blame] | 128 | LOG(INFO) << __func__ << ": success rate " << success_rate; |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 129 | if (success_rate < params->success_threshold) { |
Bernie Innocenti | f89b351 | 2018-08-30 07:34:37 +0900 | [diff] [blame] | 130 | time_t now = time(NULL); |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 131 | if (now - last_sample_time > params->sample_validity) { |
| 132 | // Note: It might be worth considering to expire old servers after their expiry |
| 133 | // date has been reached, however the code for returning the ring buffer to its |
| 134 | // previous non-circular state would induce additional complexity. |
Ken Chen | bab5014 | 2019-03-19 17:41:28 +0800 | [diff] [blame] | 135 | LOG(INFO) << __func__ << ": samples stale, retrying server"; |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 136 | _res_stats_clear_samples(stats); |
| 137 | } else { |
Ken Chen | bab5014 | 2019-03-19 17:41:28 +0800 | [diff] [blame] | 138 | LOG(INFO) << __func__ << ": too many resolution errors, ignoring server"; |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 139 | return 0; |
| 140 | } |
| 141 | } |
| 142 | } |
| 143 | } |
| 144 | return 1; |
| 145 | } |
| 146 | |
Luke Huang | 92915e5 | 2019-01-31 11:57:41 +0800 | [diff] [blame] | 147 | int android_net_res_stats_get_usable_servers(const res_params* params, res_stats stats[], |
| 148 | int nscount, bool usable_servers[]) { |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 149 | unsigned usable_servers_found = 0; |
| 150 | for (int ns = 0; ns < nscount; ns++) { |
Bernie Innocenti | ee1b85b | 2018-09-25 14:23:19 +0900 | [diff] [blame] | 151 | bool usable = res_stats_usable_server(params, &stats[ns]); |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 152 | if (usable) { |
| 153 | ++usable_servers_found; |
| 154 | } |
| 155 | usable_servers[ns] = usable; |
| 156 | } |
| 157 | // If there are no usable servers, consider all of them usable. |
| 158 | // TODO: Explore other possibilities, such as enabling only the best N servers, etc. |
| 159 | if (usable_servers_found == 0) { |
| 160 | for (int ns = 0; ns < nscount; ns++) { |
| 161 | usable_servers[ns] = true; |
| 162 | } |
| 163 | } |
Luke Huang | 92915e5 | 2019-01-31 11:57:41 +0800 | [diff] [blame] | 164 | return (usable_servers_found == 0) ? nscount : usable_servers_found; |
Bernie Innocenti | 5586419 | 2018-08-30 04:05:20 +0900 | [diff] [blame] | 165 | } |