update_engine: call res_init and retry one extra time on unresolved host
libcurl error

Based on https://curl.haxx.se/docs/todo.html#updated_DNS_server_while_running:

"If /etc/resolv.conf gets updated while a program using libcurl is running, it
may cause name resolves to fail unless res_init() is called. We should
consider calling res_init() + retry once unconditionally on all name resolve
failures to mitigate against this."

This CL added following behavior:
On libcurl returns CURLE_COULDNT_RESOLVE_HOST error code:
1. we increase the max retry count by 1 for the first time it happens in the
lifetime of an LibcurlHttpFetcher object.
2. we call res_init unconditionally.

We also add UMA metrics to measure whether calling res_init helps
mitigate the unresolved host problem. WIP CL: https://chromium-review.googlesource.com/c/chromium/src/+/1698722

BUG=chromium:982813
TEST=FEATURES="test" emerge-kefka update_engine, tested on a device

Change-Id: Ia894eae93b3a0adbac1a831e657b75cba835dfa0
diff --git a/libcurl_http_fetcher.cc b/libcurl_http_fetcher.cc
index 06722fd..247327a 100644
--- a/libcurl_http_fetcher.cc
+++ b/libcurl_http_fetcher.cc
@@ -16,6 +16,8 @@
 
 #include "update_engine/libcurl_http_fetcher.h"
 
+#include <netinet/in.h>
+#include <resolv.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -480,14 +482,45 @@
   if (http_response_code_) {
     LOG(INFO) << "HTTP response code: " << http_response_code_;
     no_network_retry_count_ = 0;
+    unresolved_host_state_machine_.UpdateState(false);
   } else {
     LOG(ERROR) << "Unable to get http response code.";
-    LogCurlHandleInfo();
+    CURLcode curl_code = GetCurlCode();
+    LOG(ERROR) << "Return code for the transfer: " << curl_code;
+    if (curl_code == CURLE_COULDNT_RESOLVE_HOST) {
+      LOG(ERROR) << "libcurl can not resolve host.";
+      unresolved_host_state_machine_.UpdateState(true);
+      if (delegate_) {
+        delegate_->ReportUpdateCheckMetrics(
+            metrics::CheckResult::kUnset,
+            metrics::CheckReaction::kUnset,
+            metrics::DownloadErrorCode::kUnresolvedHost);
+      }
+    }
   }
 
   // we're done!
   CleanUp();
 
+  if (unresolved_host_state_machine_.getState() ==
+      UnresolvedHostStateMachine::State::kRetry) {
+    // Based on
+    // https://curl.haxx.se/docs/todo.html#updated_DNS_server_while_running,
+    // update_engine process should call res_init() and unconditionally retry.
+    res_init();
+    no_network_max_retries_++;
+    LOG(INFO) << "Will retry after reloading resolv.conf because last attempt "
+                 "failed to resolve host.";
+  } else if (unresolved_host_state_machine_.getState() ==
+             UnresolvedHostStateMachine::State::kRetriedSuccess) {
+    if (delegate_) {
+      delegate_->ReportUpdateCheckMetrics(
+          metrics::CheckResult::kUnset,
+          metrics::CheckReaction::kUnset,
+          metrics::DownloadErrorCode::kUnresolvedHostRecovered);
+    }
+  }
+
   // TODO(petkov): This temporary code tries to deal with the case where the
   // update engine performs an update check while the network is not ready
   // (e.g., right after resume). Longer term, we should check if the network
@@ -813,7 +846,8 @@
   }
 }
 
-void LibcurlHttpFetcher::LogCurlHandleInfo() {
+CURLcode LibcurlHttpFetcher::GetCurlCode() {
+  CURLcode curl_code = CURLE_OK;
   while (true) {
     // Repeated calls to |curl_multi_info_read| will return a new struct each
     // time, until a NULL is returned as a signal that there is no more to get
@@ -831,7 +865,7 @@
       CHECK_EQ(curl_handle_, curl_msg->easy_handle);
       // Transfer return code reference:
       // https://curl.haxx.se/libcurl/c/libcurl-errors.html
-      LOG(ERROR) << "Return code for the transfer: " << curl_msg->data.result;
+      curl_code = curl_msg->data.result;
     }
   }
 
@@ -842,6 +876,32 @@
   if (res == CURLE_OK && connect_error) {
     LOG(ERROR) << "Connect error code from the OS: " << connect_error;
   }
+
+  return curl_code;
+}
+
+void UnresolvedHostStateMachine::UpdateState(bool failed_to_resolve_host) {
+  switch (state_) {
+    case State::kInit:
+      if (failed_to_resolve_host) {
+        state_ = State::kRetry;
+      }
+      break;
+    case State::kRetry:
+      if (failed_to_resolve_host) {
+        state_ = State::kNotRetry;
+      } else {
+        state_ = State::kRetriedSuccess;
+      }
+      break;
+    case State::kNotRetry:
+      break;
+    case State::kRetriedSuccess:
+      break;
+    default:
+      NOTREACHED();
+      break;
+  }
 }
 
 }  // namespace chromeos_update_engine
diff --git a/libcurl_http_fetcher.h b/libcurl_http_fetcher.h
index 3978b70..cdd489d 100644
--- a/libcurl_http_fetcher.h
+++ b/libcurl_http_fetcher.h
@@ -37,6 +37,48 @@
 
 namespace chromeos_update_engine {
 
+// |UnresolvedHostStateMachine| is a representation of internal state machine of
+// |LibcurlHttpFetcher|.
+class UnresolvedHostStateMachine {
+ public:
+  UnresolvedHostStateMachine() = default;
+  enum class State {
+    kInit = 0,
+    kRetry = 1,
+    kRetriedSuccess = 2,
+    kNotRetry = 3,
+  };
+
+  State getState() { return state_; }
+
+  // Updates the following internal state machine:
+  //
+  // |kInit|
+  //   |
+  //   |
+  //   \/
+  // (Try, host Unresolved)
+  //   |
+  //   |
+  //   \/
+  // |kRetry| --> (Retry, host resolved)
+  //   |                                  |
+  //   |                                  |
+  //   \/                                 \/
+  // (Retry, host Unresolved)    |kRetriedSuccess|
+  //   |
+  //   |
+  //   \/
+  // |kNotRetry|
+  //
+  void UpdateState(bool failed_to_resolve_host);
+
+ private:
+  State state_ = {State::kInit};
+
+  DISALLOW_COPY_AND_ASSIGN(UnresolvedHostStateMachine);
+};
+
 class LibcurlHttpFetcher : public HttpFetcher {
  public:
   LibcurlHttpFetcher(ProxyResolver* proxy_resolver,
@@ -88,6 +130,8 @@
     no_network_max_retries_ = retries;
   }
 
+  int get_no_network_max_retries() { return no_network_max_retries_; }
+
   void set_server_to_check(ServerToCheck server_to_check) {
     server_to_check_ = server_to_check;
   }
@@ -125,10 +169,8 @@
   // Asks libcurl for the http response code and stores it in the object.
   void GetHttpResponseCode();
 
-  // Logs curl handle info.
-  // This can be called only when an http request failed to avoid spamming the
-  // logs. This must be called after |ResumeTransfer| and before |CleanUp|.
-  void LogCurlHandleInfo();
+  // Returns the last |CURLcode|.
+  CURLcode GetCurlCode();
 
   // Checks whether stored HTTP response is within the success range.
   inline bool IsHttpResponseSuccess() {
@@ -280,6 +322,9 @@
   // True if this object is for update check.
   bool is_update_check_{false};
 
+  // Internal state machine.
+  UnresolvedHostStateMachine unresolved_host_state_machine_;
+
   int low_speed_limit_bps_{kDownloadLowSpeedLimitBps};
   int low_speed_time_seconds_{kDownloadLowSpeedTimeSeconds};
   int connect_timeout_seconds_{kDownloadConnectTimeoutSeconds};
diff --git a/libcurl_http_fetcher_unittest.cc b/libcurl_http_fetcher_unittest.cc
index 88e48fa..7f00dae 100644
--- a/libcurl_http_fetcher_unittest.cc
+++ b/libcurl_http_fetcher_unittest.cc
@@ -43,6 +43,7 @@
   brillo::FakeMessageLoop loop_{nullptr};
   FakeHardware fake_hardware_;
   LibcurlHttpFetcher libcurl_fetcher_{nullptr, &fake_hardware_};
+  UnresolvedHostStateMachine state_machine_;
 };
 
 TEST_F(LibcurlHttpFetcherTest, GetEmptyHeaderValueTest) {
@@ -78,4 +79,60 @@
   EXPECT_EQ(header_value, actual_header_value);
 }
 
+TEST_F(LibcurlHttpFetcherTest, InvalidURLTest) {
+  int no_network_max_retries = 1;
+  libcurl_fetcher_.set_no_network_max_retries(no_network_max_retries);
+
+  libcurl_fetcher_.BeginTransfer("not-an-URL");
+  while (loop_.PendingTasks()) {
+    loop_.RunOnce(true);
+  }
+
+  EXPECT_EQ(libcurl_fetcher_.get_no_network_max_retries(),
+            no_network_max_retries);
+}
+
+TEST_F(LibcurlHttpFetcherTest, CouldntResolveHostTest) {
+  int no_network_max_retries = 1;
+  libcurl_fetcher_.set_no_network_max_retries(no_network_max_retries);
+
+  // This test actually sends request to internet but according to
+  // https://tools.ietf.org/html/rfc2606#section-2, .invalid domain names are
+  // reserved and sure to be invalid. Ideally we should mock libcurl or
+  // reorganize LibcurlHttpFetcher so the part that sends request can be mocked
+  // easily.
+  // TODO(xiaochu) Refactor LibcurlHttpFetcher (and its relates) so it's
+  // easier to mock the part that depends on internet connectivity.
+  libcurl_fetcher_.BeginTransfer("https://An-uNres0lvable-uRl.invalid");
+  while (loop_.PendingTasks()) {
+    loop_.RunOnce(true);
+  }
+
+  // If libcurl fails to resolve the name, we call res_init() to reload
+  // resolv.conf and retry exactly once more. See crbug.com/982813 for details.
+  EXPECT_EQ(libcurl_fetcher_.get_no_network_max_retries(),
+            no_network_max_retries + 1);
+}
+
+TEST_F(LibcurlHttpFetcherTest, HttpFetcherStateMachineRetryFailedTest) {
+  state_machine_.UpdateState(true);
+  state_machine_.UpdateState(true);
+  EXPECT_EQ(state_machine_.getState(),
+            UnresolvedHostStateMachine::State::kNotRetry);
+}
+
+TEST_F(LibcurlHttpFetcherTest, HttpFetcherStateMachineRetrySucceedTest) {
+  state_machine_.UpdateState(true);
+  state_machine_.UpdateState(false);
+  EXPECT_EQ(state_machine_.getState(),
+            UnresolvedHostStateMachine::State::kRetriedSuccess);
+}
+
+TEST_F(LibcurlHttpFetcherTest, HttpFetcherStateMachineNoRetryTest) {
+  state_machine_.UpdateState(false);
+  state_machine_.UpdateState(false);
+  EXPECT_EQ(state_machine_.getState(),
+            UnresolvedHostStateMachine::State::kInit);
+}
+
 }  // namespace chromeos_update_engine
diff --git a/metrics_constants.h b/metrics_constants.h
index b3833a3..161d585 100644
--- a/metrics_constants.h
+++ b/metrics_constants.h
@@ -60,6 +60,11 @@
   // above block and before the kInputMalformed field. This
   // is to ensure that error codes are not reordered.
 
+  // This error is reported when libcurl returns CURLE_COULDNT_RESOLVE_HOST and
+  // calling res_init() can recover.
+  kUnresolvedHostRecovered = 97,
+  // This error is reported when libcurl returns CURLE_COULDNT_RESOLVE_HOST.
+  kUnresolvedHost = 98,
   // This error is reported when libcurl has an internal error that
   // update_engine can't recover from.
   kInternalError = 99,