Merge "init start time tracking." am: 601bf9e7c9
am: eefaa1f250

Change-Id: I48d8ec69d0f6cad1ffbd152e253ae4ff4d0eaec9
diff --git a/init/builtins.cpp b/init/builtins.cpp
index 9d1bb80..8524234 100644
--- a/init/builtins.cpp
+++ b/init/builtins.cpp
@@ -68,7 +68,7 @@
 #define UNMOUNT_CHECK_MS 5000
 #define UNMOUNT_CHECK_TIMES 10
 
-static const int kTerminateServiceDelayMicroSeconds = 50000;
+static constexpr std::chrono::nanoseconds kCommandRetryTimeout = 5s;
 
 static int insmod(const char *filename, const char *options, int flags) {
     int fd = open(filename, O_RDONLY | O_NOFOLLOW | O_CLOEXEC);
@@ -449,7 +449,7 @@
         return -1;
     } else {
         if (wait)
-            wait_for_file(source, COMMAND_RETRY_TIMEOUT);
+            wait_for_file(source, kCommandRetryTimeout);
         if (mount(source, target, system, flags, options) < 0) {
             return -1;
         }
@@ -754,7 +754,7 @@
             }
 
             // Wait a bit before recounting the number or running services.
-            usleep(kTerminateServiceDelayMicroSeconds);
+            usleep(50000 /*us*/);
         }
         LOG(VERBOSE) << "Terminating running services took " << t.duration() << " seconds";
     }
@@ -965,11 +965,11 @@
 
 static int do_wait(const std::vector<std::string>& args) {
     if (args.size() == 2) {
-        return wait_for_file(args[1].c_str(), COMMAND_RETRY_TIMEOUT);
+        return wait_for_file(args[1].c_str(), kCommandRetryTimeout);
     } else if (args.size() == 3) {
         int timeout;
         if (android::base::ParseInt(args[2], &timeout)) {
-            return wait_for_file(args[1].c_str(), timeout);
+            return wait_for_file(args[1].c_str(), std::chrono::seconds(timeout));
         }
     }
     return -1;
diff --git a/init/init.cpp b/init/init.cpp
index 7c37d28..cbd46bf 100644
--- a/init/init.cpp
+++ b/init/init.cpp
@@ -18,6 +18,7 @@
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <inttypes.h>
 #include <libgen.h>
 #include <paths.h>
 #include <signal.h>
@@ -67,6 +68,8 @@
 #include "util.h"
 #include "watchdogd.h"
 
+using android::base::StringPrintf;
+
 struct selabel_handle *sehandle;
 struct selabel_handle *sehandle_prop;
 
@@ -75,7 +78,7 @@
 static char qemu[32];
 
 std::string default_console = "/dev/console";
-static time_t process_needs_restart;
+static time_t process_needs_restart_at;
 
 const char *ENV[32];
 
@@ -132,11 +135,10 @@
 
 static void restart_processes()
 {
-    process_needs_restart = 0;
-    ServiceManager::GetInstance().
-        ForEachServiceWithFlags(SVC_RESTARTING, [] (Service* s) {
-                s->RestartIfNeeded(process_needs_restart);
-            });
+    process_needs_restart_at = 0;
+    ServiceManager::GetInstance().ForEachServiceWithFlags(SVC_RESTARTING, [](Service* s) {
+        s->RestartIfNeeded(&process_needs_restart_at);
+    });
 }
 
 void handle_control_message(const std::string& msg, const std::string& name) {
@@ -164,7 +166,7 @@
     // Any longer than 1s is an unreasonable length of time to delay booting.
     // If you're hitting this timeout, check that you didn't make your
     // sepolicy regular expressions too expensive (http://b/19899875).
-    if (wait_for_file(COLDBOOT_DONE, 1)) {
+    if (wait_for_file(COLDBOOT_DONE, 1s)) {
         LOG(ERROR) << "Timed out waiting for " COLDBOOT_DONE;
     }
 
@@ -268,15 +270,14 @@
 
     if (for_emulator) {
         // In the emulator, export any kernel option with the "ro.kernel." prefix.
-        property_set(android::base::StringPrintf("ro.kernel.%s", key.c_str()).c_str(), value.c_str());
+        property_set(StringPrintf("ro.kernel.%s", key.c_str()).c_str(), value.c_str());
         return;
     }
 
     if (key == "qemu") {
         strlcpy(qemu, value.c_str(), sizeof(qemu));
     } else if (android::base::StartsWith(key, "androidboot.")) {
-        property_set(android::base::StringPrintf("ro.boot.%s", key.c_str() + 12).c_str(),
-                     value.c_str());
+        property_set(StringPrintf("ro.boot.%s", key.c_str() + 12).c_str(), value.c_str());
     }
 }
 
@@ -314,7 +315,7 @@
 static void process_kernel_dt() {
     static const char android_dir[] = "/proc/device-tree/firmware/android";
 
-    std::string file_name = android::base::StringPrintf("%s/compatible", android_dir);
+    std::string file_name = StringPrintf("%s/compatible", android_dir);
 
     std::string dt_file;
     android::base::ReadFileToString(file_name, &dt_file);
@@ -332,12 +333,12 @@
             continue;
         }
 
-        file_name = android::base::StringPrintf("%s/%s", android_dir, dp->d_name);
+        file_name = StringPrintf("%s/%s", android_dir, dp->d_name);
 
         android::base::ReadFileToString(file_name, &dt_file);
         std::replace(dt_file.begin(), dt_file.end(), ',', '.');
 
-        std::string property_name = android::base::StringPrintf("ro.boot.%s", dp->d_name);
+        std::string property_name = StringPrintf("ro.boot.%s", dp->d_name);
         property_set(property_name.c_str(), dt_file.c_str());
     }
 }
@@ -566,12 +567,14 @@
         return watchdogd_main(argc, argv);
     }
 
+    boot_clock::time_point start_time = boot_clock::now();
+
     // Clear the umask.
     umask(0);
 
     add_environment("PATH", _PATH_DEFPATH);
 
-    bool is_first_stage = (argc == 1) || (strcmp(argv[1], "--second-stage") != 0);
+    bool is_first_stage = (getenv("INIT_SECOND_STAGE") == nullptr);
 
     // Don't expose the raw commandline to unprivileged processes.
     chmod("/proc/cmdline", 0440);
@@ -596,32 +599,34 @@
     // talk to the outside world...
     InitKernelLogging(argv);
 
-    if (is_first_stage) {
-        LOG(INFO) << "init first stage started!";
+    LOG(INFO) << "init " << (is_first_stage ? "first" : "second") << " stage started!";
 
+    if (is_first_stage) {
         // Mount devices defined in android.early.* kernel commandline
         early_mount();
 
-        // Set up SELinux, including loading the SELinux policy if we're in the kernel domain.
+        // Set up SELinux, loading the SELinux policy.
         selinux_initialize(true);
 
-        // If we're in the kernel domain, re-exec init to transition to the init domain now
+        // We're in the kernel domain, so re-exec init to transition to the init domain now
         // that the SELinux policy has been loaded.
-
         if (restorecon("/init") == -1) {
             PLOG(ERROR) << "restorecon failed";
             security_failure();
         }
+
+        setenv("INIT_SECOND_STAGE", "true", 1);
+
+        uint64_t start_ns = start_time.time_since_epoch().count();
+        setenv("INIT_STARTED_AT", StringPrintf("%" PRIu64, start_ns).c_str(), 1);
+
         char* path = argv[0];
-        char* args[] = { path, const_cast<char*>("--second-stage"), nullptr };
+        char* args[] = { path, nullptr };
         if (execv(path, args) == -1) {
             PLOG(ERROR) << "execv(\"" << path << "\") failed";
             security_failure();
         }
-
     } else {
-        LOG(INFO) << "init second stage started!";
-
         // Indicate that booting is in progress to background fw loaders, etc.
         close(open("/dev/.booting", O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
 
@@ -636,7 +641,10 @@
         // used by init as well as the current required properties.
         export_kernel_boot_props();
 
-        // Now set up SELinux for second stage
+        // Make the time that init started available for bootstat to log.
+        property_set("init.start", getenv("INIT_STARTED_AT"));
+
+        // Now set up SELinux for second stage.
         selinux_initialize(false);
     }
 
@@ -710,21 +718,22 @@
             restart_processes();
         }
 
-        int timeout = -1;
-        if (process_needs_restart) {
-            timeout = (process_needs_restart - gettime()) * 1000;
-            if (timeout < 0)
-                timeout = 0;
+        // By default, sleep until something happens.
+        int epoll_timeout_ms = -1;
+
+        // If there's more work to do, wake up again immediately.
+        if (am.HasMoreCommands()) epoll_timeout_ms = 0;
+
+        // If there's a process that needs restarting, wake up in time for that.
+        if (process_needs_restart_at != 0) {
+            epoll_timeout_ms = (process_needs_restart_at - time(nullptr)) * 1000;
+            if (epoll_timeout_ms < 0) epoll_timeout_ms = 0;
         }
 
-        if (am.HasMoreCommands()) {
-            timeout = 0;
-        }
-
-        bootchart_sample(&timeout);
+        bootchart_sample(&epoll_timeout_ms);
 
         epoll_event ev;
-        int nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd, &ev, 1, timeout));
+        int nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd, &ev, 1, epoll_timeout_ms));
         if (nr == -1) {
             PLOG(ERROR) << "epoll_wait failed";
         } else if (nr == 1) {
diff --git a/init/init.h b/init/init.h
index 0019337..cfb3139 100644
--- a/init/init.h
+++ b/init/init.h
@@ -22,8 +22,6 @@
 class Action;
 class Service;
 
-#define COMMAND_RETRY_TIMEOUT 5
-
 extern const char *ENV[32];
 extern bool waiting_for_exec;
 extern std::string default_console;
diff --git a/init/readme.txt b/init/readme.txt
index 5173ca6..3cfdc4e 100644
--- a/init/readme.txt
+++ b/init/readme.txt
@@ -443,8 +443,16 @@
 Init provides information about the services that it is responsible
 for via the below properties.
 
+init.start
+  Time after boot in ns (via the CLOCK_BOOTTIME clock) at which the first
+  stage of init started.
+
 init.svc.<name>
-   State of a named service ("stopped", "stopping", "running", "restarting")
+  State of a named service ("stopped", "stopping", "running", "restarting")
+
+init.svc.<name>.start
+  Time after boot in ns (via the CLOCK_BOOTTIME clock) that the service was
+  most recently started.
 
 
 Bootcharting
@@ -540,10 +548,10 @@
 
 For quicker turnaround when working on init itself, use:
 
-  mm -j
-  m ramdisk-nodeps
-  m bootimage-nodeps
-  adb reboot bootloader
+  mm -j &&
+  m ramdisk-nodeps &&
+  m bootimage-nodeps &&
+  adb reboot bootloader &&
   fastboot boot $ANDROID_PRODUCT_OUT/boot.img
 
 Alternatively, use the emulator:
diff --git a/init/service.cpp b/init/service.cpp
index f093dd9..1f53a1b 100644
--- a/init/service.cpp
+++ b/init/service.cpp
@@ -17,6 +17,7 @@
 #include "service.h"
 
 #include <fcntl.h>
+#include <inttypes.h>
 #include <linux/securebits.h>
 #include <sched.h>
 #include <sys/mount.h>
@@ -51,9 +52,6 @@
 using android::base::StringPrintf;
 using android::base::WriteStringToFile;
 
-#define CRITICAL_CRASH_THRESHOLD    4       // if we crash >4 times ...
-#define CRITICAL_CRASH_WINDOW       (4*60)  // ... in 4 minutes, goto recovery
-
 static std::string ComputeContextFromExecutable(std::string& service_name,
                                                 const std::string& service_path) {
     std::string computed_context;
@@ -154,8 +152,8 @@
 
 Service::Service(const std::string& name, const std::string& classname,
                  const std::vector<std::string>& args)
-    : name_(name), classname_(classname), flags_(0), pid_(0), time_started_(0),
-      time_crashed_(0), nr_crashed_(0), uid_(0), gid_(0), namespace_flags_(0),
+    : name_(name), classname_(classname), flags_(0), pid_(0),
+      crash_count_(0), uid_(0), gid_(0), namespace_flags_(0),
       seclabel_(""), ioprio_class_(IoSchedClass_NONE), ioprio_pri_(0),
       priority_(0), oom_score_adjust_(-1000), args_(args) {
     onrestart_.InitSingleTrigger("onrestart");
@@ -168,7 +166,7 @@
                  const std::string& seclabel,
                  const std::vector<std::string>& args)
     : name_(name), classname_(classname), flags_(flags), pid_(0),
-      time_started_(0), time_crashed_(0), nr_crashed_(0), uid_(uid), gid_(gid),
+      crash_count_(0), uid_(uid), gid_(gid),
       supp_gids_(supp_gids), capabilities_(capabilities),
       namespace_flags_(namespace_flags), seclabel_(seclabel),
       ioprio_class_(IoSchedClass_NONE), ioprio_pri_(0), priority_(0),
@@ -190,6 +188,12 @@
     }
 
     property_set(prop_name.c_str(), new_state.c_str());
+
+    if (new_state == "running") {
+        prop_name += ".start";
+        uint64_t start_ns = time_started_.time_since_epoch().count();
+        property_set(prop_name.c_str(), StringPrintf("%" PRIu64, start_ns).c_str());
+    }
 }
 
 void Service::KillProcessGroup(int signal) {
@@ -274,20 +278,19 @@
         return false;
     }
 
-    time_t now = gettime();
+    // If we crash > 4 times in 4 minutes, reboot into recovery.
+    boot_clock::time_point now = boot_clock::now();
     if ((flags_ & SVC_CRITICAL) && !(flags_ & SVC_RESTART)) {
-        if (time_crashed_ + CRITICAL_CRASH_WINDOW >= now) {
-            if (++nr_crashed_ > CRITICAL_CRASH_THRESHOLD) {
-                LOG(ERROR) << "critical process '" << name_ << "' exited "
-                           << CRITICAL_CRASH_THRESHOLD << " times in "
-                           << (CRITICAL_CRASH_WINDOW / 60) << " minutes; "
+        if (now < time_crashed_ + 4min) {
+            if (++crash_count_ > 4) {
+                LOG(ERROR) << "critical process '" << name_ << "' exited 4 times in 4 minutes; "
                            << "rebooting into recovery mode";
                 android_reboot(ANDROID_RB_RESTART2, 0, "recovery");
                 return false;
             }
         } else {
             time_crashed_ = now;
-            nr_crashed_ = 1;
+            crash_count_ = 1;
         }
     }
 
@@ -553,7 +556,6 @@
     // Starting a service removes it from the disabled or reset state and
     // immediately takes it out of the restarting state if it was in there.
     flags_ &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET|SVC_RESTART|SVC_DISABLED_START));
-    time_started_ = 0;
 
     // Running processes require no additional work --- if they're in the
     // process of exiting, we've ensured that they will immediately restart
@@ -667,7 +669,7 @@
         }
     }
 
-    time_started_ = gettime();
+    time_started_ = boot_clock::now();
     pid_ = pid;
     flags_ |= SVC_RUNNING;
 
@@ -731,18 +733,19 @@
     } /* else: Service is restarting anyways. */
 }
 
-void Service::RestartIfNeeded(time_t& process_needs_restart) {
-    time_t next_start_time = time_started_ + 5;
-
-    if (next_start_time <= gettime()) {
+void Service::RestartIfNeeded(time_t* process_needs_restart_at) {
+    boot_clock::time_point now = boot_clock::now();
+    boot_clock::time_point next_start = time_started_ + 5s;
+    if (now > next_start) {
         flags_ &= (~SVC_RESTARTING);
         Start();
         return;
     }
 
-    if ((next_start_time < process_needs_restart) ||
-        (process_needs_restart == 0)) {
-        process_needs_restart = next_start_time;
+    time_t next_start_time_t = time(nullptr) +
+        time_t(std::chrono::duration_cast<std::chrono::seconds>(next_start - now).count());
+    if (next_start_time_t < *process_needs_restart_at || *process_needs_restart_at == 0) {
+        *process_needs_restart_at = next_start_time_t;
     }
 }
 
diff --git a/init/service.h b/init/service.h
index d9e8f57..013e65f 100644
--- a/init/service.h
+++ b/init/service.h
@@ -30,6 +30,7 @@
 #include "descriptors.h"
 #include "init_parser.h"
 #include "keyword_map.h"
+#include "util.h"
 
 #define SVC_DISABLED       0x001  // do not autostart with class
 #define SVC_ONESHOT        0x002  // do not restart on exit
@@ -75,7 +76,7 @@
     void Stop();
     void Terminate();
     void Restart();
-    void RestartIfNeeded(time_t& process_needs_restart);
+    void RestartIfNeeded(time_t* process_needs_restart_at);
     bool Reap();
     void DumpState() const;
 
@@ -134,9 +135,9 @@
 
     unsigned flags_;
     pid_t pid_;
-    time_t time_started_;    // time of last start
-    time_t time_crashed_;    // first crash within inspection window
-    int nr_crashed_;         // number of times crashed within window
+    boot_clock::time_point time_started_; // time of last start
+    boot_clock::time_point time_crashed_; // first crash within inspection window
+    int crash_count_;                     // number of times crashed within window
 
     uid_t uid_;
     gid_t gid_;
diff --git a/init/util.cpp b/init/util.cpp
index 705fbb4..5288a05 100644
--- a/init/util.cpp
+++ b/init/util.cpp
@@ -258,16 +258,11 @@
     return result;
 }
 
-time_t gettime() {
-    timespec now;
-    clock_gettime(CLOCK_MONOTONIC, &now);
-    return now.tv_sec;
-}
-
-uint64_t gettime_ns() {
-    timespec now;
-    clock_gettime(CLOCK_MONOTONIC, &now);
-    return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000000) + now.tv_nsec;
+boot_clock::time_point boot_clock::now() {
+  timespec ts;
+  clock_gettime(CLOCK_BOOTTIME, &ts);
+  return boot_clock::time_point(std::chrono::seconds(ts.tv_sec) +
+                                std::chrono::nanoseconds(ts.tv_nsec));
 }
 
 int mkdir_recursive(const char *pathname, mode_t mode)
@@ -325,16 +320,15 @@
     }
 }
 
-int wait_for_file(const char *filename, int timeout)
-{
-    struct stat info;
-    uint64_t timeout_time_ns = gettime_ns() + timeout * UINT64_C(1000000000);
-    int ret = -1;
+int wait_for_file(const char* filename, std::chrono::nanoseconds timeout) {
+    boot_clock::time_point timeout_time = boot_clock::now() + timeout;
+    while (boot_clock::now() < timeout_time) {
+        struct stat sb;
+        if (stat(filename, &sb) != -1) return 0;
 
-    while (gettime_ns() < timeout_time_ns && ((ret = stat(filename, &info)) < 0))
         usleep(10000);
-
-    return ret;
+    }
+    return -1;
 }
 
 void import_kernel_cmdline(bool in_qemu,
diff --git a/init/util.h b/init/util.h
index b7531cc..ef40748 100644
--- a/init/util.h
+++ b/init/util.h
@@ -20,11 +20,14 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#include <chrono>
 #include <string>
 #include <functional>
 
 #define COLDBOOT_DONE "/dev/.coldboot_done"
 
+using namespace std::chrono_literals;
+
 int create_socket(const char *name, int type, mode_t perm,
                   uid_t uid, gid_t gid, const char *socketcon);
 int create_file(const char *path, int mode, mode_t perm,
@@ -33,27 +36,35 @@
 bool read_file(const char* path, std::string* content);
 int write_file(const char* path, const char* content);
 
-time_t gettime();
-uint64_t gettime_ns();
+// A std::chrono clock based on CLOCK_BOOTTIME.
+class boot_clock {
+ public:
+  typedef std::chrono::nanoseconds duration;
+  typedef std::chrono::time_point<boot_clock, duration> time_point;
+  static constexpr bool is_steady = true;
+
+  static time_point now();
+};
 
 class Timer {
  public:
-  Timer() : t0(gettime_ns()) {
+  Timer() : start_(boot_clock::now()) {
   }
 
   double duration() {
-    return static_cast<double>(gettime_ns() - t0) / 1000000000.0;
+    typedef std::chrono::duration<double> double_duration;
+    return std::chrono::duration_cast<double_duration>(boot_clock::now() - start_).count();
   }
 
  private:
-  uint64_t t0;
+  boot_clock::time_point start_;
 };
 
 unsigned int decode_uid(const char *s);
 
 int mkdir_recursive(const char *pathname, mode_t mode);
 void sanitize(char *p);
-int wait_for_file(const char *filename, int timeout);
+int wait_for_file(const char *filename, std::chrono::nanoseconds timeout);
 void import_kernel_cmdline(bool in_qemu,
                            const std::function<void(const std::string&, const std::string&, bool)>&);
 int make_dir(const char *path, mode_t mode);