crash_reporter: Fix crash_sender

- Remove all the ChromeOS specific logic.
- Fix paths to correct Android paths.
- Add periodic_scheduler, and add crash_sender to init.

Bug: 23231196
Bug: 23233267

Change-Id: I12de28bfbe5d5b08831eda9b28c6d7a669c22290
diff --git a/crash_reporter/Android.mk b/crash_reporter/Android.mk
index 6b98af4..467432a 100644
--- a/crash_reporter/Android.mk
+++ b/crash_reporter/Android.mk
@@ -81,6 +81,7 @@
 LOCAL_MODULE := crash_sender
 LOCAL_MODULE_CLASS := EXECUTABLES
 LOCAL_MODULE_PATH := $(TARGET_OUT_EXECUTABLES)
+LOCAL_REQUIRED_MODULES := curl periodic_scheduler
 LOCAL_SRC_FILES := crash_sender
 include $(BUILD_PREBUILT)
 
@@ -113,6 +114,15 @@
 LOCAL_SRC_FILES := crash_reporter_logs.conf
 include $(BUILD_PREBUILT)
 
+# Periodic Scheduler.
+# ========================================================
+include $(CLEAR_VARS)
+LOCAL_MODULE := periodic_scheduler
+LOCAL_MODULE_CLASS := EXECUTABLES
+LOCAL_MODULE_PATH := $(TARGET_OUT_EXECUTABLES)
+LOCAL_SRC_FILES := periodic_scheduler
+include $(BUILD_PREBUILT)
+
 # Crash reporter tests.
 # ========================================================
 include $(CLEAR_VARS)
diff --git a/crash_reporter/crash_collector.cc b/crash_reporter/crash_collector.cc
index 77755f4..b81a936 100644
--- a/crash_reporter/crash_collector.cc
+++ b/crash_reporter/crash_collector.cc
@@ -42,12 +42,13 @@
 
 const char kCollectChromeFile[] =
     "/mnt/stateful_partition/etc/collect_chrome_crashes";
-const char kCrashTestInProgressPath[] = "/tmp/crash-test-in-progress";
+const char kCrashTestInProgressPath[] =
+    "/data/misc/crash_reporter/tmp/crash-test-in-progress";
 const char kDefaultLogConfig[] = "/etc/crash_reporter_logs.conf";
 const char kDefaultUserName[] = "chronos";
-const char kLeaveCoreFile[] = "/root/.leave_core";
+const char kLeaveCoreFile[] = "/data/misc/crash_reporter/.leave_core";
 const char kLsbRelease[] = "/etc/lsb-release";
-const char kShellPath[] = "/bin/sh";
+const char kShellPath[] = "/system/bin/sh";
 const char kSystemCrashPath[] = "/data/misc/crash_reporter/crash";
 const char kUploadVarPrefix[] = "upload_var_";
 const char kUploadFilePrefix[] = "upload_file_";
diff --git a/crash_reporter/crash_sender b/crash_reporter/crash_sender
index fa2f8fc..7f9062a 100755
--- a/crash_reporter/crash_sender
+++ b/crash_reporter/crash_sender
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/system/bin/sh
 
 # Copyright (C) 2010 The Android Open Source Project
 #
@@ -17,20 +17,20 @@
 set -e
 
 # Default product ID in crash report (used if GOOGLE_CRASH_* is undefined).
-CHROMEOS_PRODUCT=ChromeOS
+BRILLO_PRODUCT=Brillo
+
+# Base directory that contains any crash reporter state files.
+CRASH_STATE_DIR="/data/misc/crash_reporter"
 
 # File whose existence implies crash reports may be sent, and whose
 # contents includes our machine's anonymized guid.
-CONSENT_ID="/home/chronos/Consent To Send Stats"
+CONSENT_ID="/data/misc/metrics/enabled"
 
 # Crash sender lock in case the sender is already running.
-CRASH_SENDER_LOCK="/var/lock/crash_sender"
+CRASH_SENDER_LOCK="${CRASH_STATE_DIR}/lock/crash_sender"
 
 # Path to file that indicates a crash test is currently running.
-CRASH_TEST_IN_PROGRESS_FILE="/tmp/crash-test-in-progress"
-
-# Path to find which is required for computing the crash rate.
-FIND="/usr/bin/find"
+CRASH_TEST_IN_PROGRESS_FILE="${CRASH_STATE_DIR}/tmp/crash-test-in-progress"
 
 # Set this to 1 in the environment to allow uploading crash reports
 # for unofficial versions.
@@ -40,20 +40,17 @@
 HWCLASS_PATH="/sys/devices/platform/chromeos_acpi/HWID"
 
 # Path to file that indicates this is a developer image.
-LEAVE_CORE_FILE="/root/.leave_core"
+LEAVE_CORE_FILE="${CRASH_STATE_DIR}/.leave_core"
 
 # Path to list_proxies.
-LIST_PROXIES="/usr/bin/list_proxies"
+LIST_PROXIES="list_proxies"
 
 # Maximum crashes to send per day.
 MAX_CRASH_RATE=${MAX_CRASH_RATE:-32}
 
-# Path to metrics_client.
-METRICS_CLIENT="/usr/bin/metrics_client"
-
 # File whose existence mocks crash sending.  If empty we pretend the
 # crash sending was successful, otherwise unsuccessful.
-MOCK_CRASH_SENDING="/tmp/mock-crash-sending"
+MOCK_CRASH_SENDING="${CRASH_STATE_DIR}/tmp/mock-crash-sending"
 
 # Set this to 1 in the environment to pretend to have booted in developer
 # mode.  This is used by autotests.
@@ -64,40 +61,39 @@
 
 # File whose existence causes crash sending to be delayed (for testing).
 # Must be stateful to enable testing kernel crashes.
-PAUSE_CRASH_SENDING="/var/lib/crash_sender_paused"
+PAUSE_CRASH_SENDING="${CRASH_STATE_DIR}/lock/crash_sender_paused"
 
 # URL to send official build crash reports to.
 REPORT_UPLOAD_PROD_URL="https://clients2.google.com/cr/report"
 
 # Path to a directory of restricted certificates which includes
 # a certificate for ${REPORT_UPLOAD_PROD_URL}.
-RESTRICTED_CERTIFICATES_PATH="/usr/share/chromeos-ca-certificates"
+RESTRICTED_CERTIFICATES_PATH="/system/etc/security/cacerts"
 
 # File whose existence implies we're running and not to start again.
-RUN_FILE="/var/run/crash_sender.pid"
+RUN_FILE="${CRASH_STATE_DIR}/run/crash_sender.pid"
 
 # Maximum time to sleep between sends.
 SECONDS_SEND_SPREAD=${SECONDS_SEND_SPREAD:-600}
 
 # Set this to 1 to allow uploading of device coredumps.
-DEVCOREDUMP_UPLOAD_FLAG_FILE=\
-"/var/lib/crash_reporter/device_coredump_upload_allowed"
+DEVCOREDUMP_UPLOAD_FLAG_FILE="${CRASH_STATE_DIR}/device_coredump_upload_allowed"
 
 # The syslog tag for all logging we emit.
 TAG="$(basename $0)[$$]"
 
 # Directory to store timestamp files indicating the uploads in the past 24
 # hours.
-TIMESTAMPS_DIR="/var/lib/crash_sender"
+TIMESTAMPS_DIR="${CRASH_STATE_DIR}/crash_sender"
 
 # Temp directory for this process.
 TMP_DIR=""
 
-# Chrome's crash report log file.
-CHROME_CRASH_LOG="/var/log/chrome/Crash Reports/uploads.log"
+# Crash report log file.
+CRASH_LOG="${CRASH_STATE_DIR}/log/uploads.log"
 
 lecho() {
-  logger -t "${TAG}" "$@"
+  log -t "${TAG}" "$@"
 }
 
 # Returns true if mock is enabled.
@@ -117,6 +113,9 @@
     rm -rf "${TMP_DIR}"
   fi
   rm -f "${RUN_FILE}"
+  if [ -n "${CRASH_SENDER_LOCK}" ]; then
+    rm -rf "${CRASH_SENDER_LOCK}"
+  fi
   crash_done
 }
 
@@ -130,7 +129,7 @@
 
 is_official_image() {
   [ ${FORCE_OFFICIAL} -ne 0 ] && return 0
-  grep ^CHROMEOS_RELEASE_DESCRIPTION /etc/lsb-release | grep -q Official
+  getprop ro.product.description | grep -q Official
 }
 
 # Returns 0 if the a crash test is currently running.  NOTE: Mirrors
@@ -167,7 +166,11 @@
   # If we're testing crash reporter itself, we don't want to special-case
   # for developer mode.
   is_crash_test_in_progress && return 1
-  crossystem "devsw_boot?1"  # exit status will be accurate
+  if [ "$(getprop ro.build.type)" = "eng" ]; then
+    return 0
+  else
+    return 1
+  fi
 }
 
 # Return 0 if the uploading of device coredumps is allowed.
@@ -188,7 +191,7 @@
 check_rate() {
   mkdir -p ${TIMESTAMPS_DIR}
   # Only consider minidumps written in the past 24 hours by removing all older.
-  ${FIND} "${TIMESTAMPS_DIR}" -mindepth 1 -mmin +$((24 * 60)) \
+  find "${TIMESTAMPS_DIR}" -mindepth 1 -mtime +1 \
       -exec rm -- '{}' ';'
   local sends_in_24hrs=$(echo "${TIMESTAMPS_DIR}"/* | wc -w)
   lecho "Current send rate: ${sends_in_24hrs}sends/24hrs"
@@ -198,7 +201,7 @@
           "max ${MAX_CRASH_RATE}send/24hrs"
     return 1
   fi
-  mktemp "${TIMESTAMPS_DIR}"/XXXX > /dev/null
+  mktemp "${TIMESTAMPS_DIR}"/XXXXXX > /dev/null
   return 0
 }
 
@@ -252,27 +255,18 @@
 get_keys() {
   local file="$1" regex="$2"
 
-  awk -F'[[:space:]=]' -vregex="${regex}" \
-      'match($1, regex) { print $1 }' "${file}"
-}
-
-# Return the board name.
-get_board() {
-  get_key_value "/etc/lsb-release" "CHROMEOS_RELEASE_BOARD"
+  cut -d '=' -f1 "${file}" | grep --color=never "${regex}"
 }
 
 # Return the channel name (sans "-channel" suffix).
 get_channel() {
-  get_key_value "/etc/lsb-release" "CHROMEOS_RELEASE_TRACK" |
-    sed 's:-channel$::'
+  getprop ro.product.channel | sed 's:-channel$::'
 }
 
 # Return the hardware class or "undefined".
 get_hardware_class() {
   if [ -r "${HWCLASS_PATH}" ]; then
     cat "${HWCLASS_PATH}"
-  elif crossystem hwid > /dev/null 2>&1; then
-    echo "$(crossystem hwid)"
   else
     echo "undefined"
   fi
@@ -284,13 +278,12 @@
   local kind="$(get_kind "${meta_path}")"
   local exec_name="$(get_key_value "${meta_path}" "exec_name")"
   local url="${REPORT_UPLOAD_PROD_URL}"
-  local chromeos_version="$(get_key_value "${meta_path}" "ver")"
-  local board="$(get_board)"
+  local brillo_version="$(get_key_value "${meta_path}" "ver")"
   local hwclass="$(get_hardware_class)"
   local write_payload_size="$(get_key_value "${meta_path}" "payload_size")"
   local log="$(get_key_value "${meta_path}" "log")"
   local sig="$(get_key_value "${meta_path}" "sig")"
-  local send_payload_size="$(stat --printf=%s "${report_payload}" 2>/dev/null)"
+  local send_payload_size="$(stat -c "%s" "${report_payload}" 2>/dev/null)"
   local product="$(get_key_value "${meta_path}" "upload_var_prod")"
   local version="$(get_key_value "${meta_path}" "upload_var_ver")"
   local upload_prefix="$(get_key_value "${meta_path}" "upload_prefix")"
@@ -358,10 +351,10 @@
   # If ID or VERSION_ID is undefined, we use the default product name
   # and CHROMEOS_RELEASE_VERSION from /etc/lsb-release.
   if [ "${product}" = "undefined" ]; then
-    product="${CHROMEOS_PRODUCT}"
+    product="${BRILLO_PRODUCT}"
   fi
   if [ "${version}" = "undefined" ]; then
-    version="${chromeos_version}"
+    version="${brillo_version}"
   fi
 
   local image_type
@@ -376,11 +369,7 @@
   fi
 
   local boot_mode
-  if ! crossystem "cros_debug" > /dev/null 2>&1; then
-    # Sanity-check failed that makes sure crossystem exists.
-    lecho "Cannot determine boot mode due to error running crossystem command"
-    boot_mode="missing-crossystem"
-  elif is_developer_mode; then
+  if is_developer_mode; then
     boot_mode="dev"
   fi
 
@@ -392,7 +381,7 @@
   [ "${error_type}" = "undefined" ] && error_type=
 
   lecho "Sending crash:"
-  if [ "${product}" != "${CHROMEOS_PRODUCT}" ]; then
+  if [ "${product}" != "${BRILLO_PRODUCT}" ]; then
     lecho "  Sending crash report on behalf of ${product}"
   fi
   lecho "  Metadata: ${meta_path} (${kind})"
@@ -403,7 +392,6 @@
   if is_mock; then
     lecho "  Product: ${product}"
     lecho "  URL: ${url}"
-    lecho "  Board: ${board}"
     lecho "  HWClass: ${hwclass}"
     lecho "  write_payload_size: ${write_payload_size}"
     lecho "  send_payload_size: ${send_payload_size}"
@@ -451,7 +439,6 @@
     --capath "${RESTRICTED_CERTIFICATES_PATH}" --ciphers HIGH \
     -F "prod=${product}" \
     -F "ver=${version}" \
-    -F "board=${board}" \
     -F "hwclass=${hwclass}" \
     -F "exec_name=${exec_name}" \
     ${image_type:+-F "image_type=${image_type}"} \
@@ -477,15 +464,11 @@
       fi
       ;;
     *)
-      if is_official_image; then
-        product_name="ChromeOS"
-      else
-        product_name="ChromiumOS"
-      fi
+      product_name="Brillo"
       ;;
     esac
     printf '%s,%s,%s\n' \
-      "${timestamp}" "${id}" "${product_name}" >> "${CHROME_CRASH_LOG}"
+      "${timestamp}" "${id}" "${product_name}" >> "${CRASH_LOG}"
     lecho "Crash report receipt ID ${id}"
   else
     lecho "Crash sending failed with exit code ${curl_result}: " \
@@ -512,6 +495,7 @@
 # 3G connection (see crosbug.com/3304 for discussion).
 send_crashes() {
   local dir="$1"
+  lecho "Sending crashes for ${dir}"
 
   if [ ! -d "${dir}" ]; then
     return
@@ -519,8 +503,8 @@
 
   # Consider any old files which still have no corresponding meta file
   # as orphaned, and remove them.
-  for old_file in $(${FIND} "${dir}" -mindepth 1 \
-                    -mmin +$((24 * 60)) -type f); do
+  for old_file in $(find "${dir}" -mindepth 1 \
+                    -mtime +1 -type f); do
     if [ ! -e "$(get_base "${old_file}").meta" ]; then
       lecho "Removing old orphaned file: ${old_file}."
       rm -f -- "${old_file}"
@@ -548,8 +532,8 @@
 
     if ! is_complete_metadata "${meta_path}"; then
       # This report is incomplete, so if it's old, just remove it.
-      local old_meta=$(${FIND} "${dir}" -mindepth 1 -name \
-        $(basename "${meta_path}") -mmin +$((24 * 60)) -type f)
+      local old_meta=$(find "${dir}" -mindepth 1 -name \
+        $(basename "${meta_path}") -mtime +1 -type f)
       if [ -n "${old_meta}" ]; then
         lecho "Removing old incomplete metadata."
         remove_report "${meta_path}"
@@ -571,19 +555,10 @@
       continue
     fi
 
-    # Don't send crash reports from previous sessions while we're in guest mode
-    # to avoid the impression that crash reporting was enabled, which it isn't.
-    # (Don't exit right now because subsequent reports may be candidates for
-    # deletion.)
-    if ${METRICS_CLIENT} -g; then
-      lecho "Guest mode has been entered.  Delaying crash sending until exited."
-      continue
-    fi
-
     # Remove existing crashes in case user consent has not (yet) been given or
     # has been revoked.  This must come after the guest mode check because
-    # ${METRICS_CLIENT} always returns "not consented" in guest mode.
-    if ! ${METRICS_CLIENT} -c; then
+    # metrics_client always returns "not consented" in guest mode.
+    if ! metrics_client -c; then
       lecho "Crash reporting is disabled.  Removing crash."
       remove_report "${meta_path}"
       continue
@@ -602,7 +577,7 @@
     # reports is spread out randomly by up to SECONDS_SEND_SPREAD.  Thus, for
     # the sleep call the greater of the two delays is used.
     local now=$(date +%s)
-    local holdoff_time=$(($(stat --format=%Y "${meta_path}") + 30 - ${now}))
+    local holdoff_time=$(($(stat -c "%Y" "${meta_path}") + 30 - ${now}))
     local spread_time=$(generate_uniform_random "${SECONDS_SEND_SPREAD}")
     local sleep_time
     if [ ${spread_time} -gt ${holdoff_time} ]; then
@@ -673,8 +648,6 @@
 }
 
 main() {
-  trap cleanup EXIT INT TERM
-
   parseargs "$@"
 
   if [ -e "${PAUSE_CRASH_SENDING}" ] && \
@@ -693,31 +666,25 @@
   # (like with autotests) that we're still running.
   echo $$ > "${RUN_FILE}"
 
-  for dependency in "${FIND}" "${METRICS_CLIENT}" \
-                    "${RESTRICTED_CERTIFICATES_PATH}"; do
+  for dependency in "${RESTRICTED_CERTIFICATES_PATH}"; do
     if [ ! -x "${dependency}" ]; then
       lecho "Fatal: Crash sending disabled: ${dependency} not found."
       exit 1
     fi
   done
 
-  TMP_DIR="$(mktemp -d /tmp/crash_sender.XXXXXX)"
+  TMP_DIR="$(mktemp -d "${CRASH_STATE_DIR}/tmp/crash_sender.XXXXXX")"
 
   # Send system-wide crashes
-  send_crashes "/var/spool/crash"
-
-  # Send user-specific crashes
-  local d
-  for d in /home/chronos/crash /home/chronos/u-*/crash; do
-    send_crashes "${d}"
-  done
+  send_crashes "${CRASH_STATE_DIR}/crash"
 }
 
-(
-if ! flock -n 9; then
+trap cleanup EXIT INT TERM
+
+#TODO(http://b/23937249): Change the locking logic back to using flock.
+if ! mkdir "${CRASH_SENDER_LOCK}" 2>/dev/null; then
   lecho "Already running; quitting."
   crash_done
   exit 1
 fi
 main "$@"
-) 9>"${CRASH_SENDER_LOCK}"
diff --git a/crash_reporter/init.crash_reporter.rc b/crash_reporter/init.crash_reporter.rc
index 6882b77..db9bb6f 100644
--- a/crash_reporter/init.crash_reporter.rc
+++ b/crash_reporter/init.crash_reporter.rc
@@ -10,9 +10,20 @@
     # number to prevent infinitely recursing on crash handling.
     write /proc/sys/kernel/core_pipe_limit 4
 
+    # Remove any previous orphaned locks.
+    rmdir /data/misc/crash_reporter/lock/crash_sender
+
     # Create crash directories.
     mkdir /data/misc/crash_reporter 0700 root root
+    mkdir /data/misc/crash_reporter/lock 0700 root root
+    mkdir /data/misc/crash_reporter/log 0700 root root
+    mkdir /data/misc/crash_reporter/run 0700 root root
+    mkdir /data/misc/crash_reporter/tmp 0700 root root
 
 service crash_reporter /system/bin/crash_reporter --init
     class late_start
     oneshot
+
+service crash_sender /system/bin/periodic_scheduler 3600 14400 crash_sender \
+    /system/bin/crash_sender
+    class late_start
diff --git a/crash_reporter/periodic_scheduler b/crash_reporter/periodic_scheduler
new file mode 100755
index 0000000..7fdb5c9
--- /dev/null
+++ b/crash_reporter/periodic_scheduler
@@ -0,0 +1,81 @@
+#!/system/bin/sh
+
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Run tasks periodically.
+# Usage: $0 <delay_seconds> <timeout_seconds> <task_name> <task_binary>
+#
+# Executes task <task_name> by running <task_binary> every <delay_seconds>.
+
+set -e -u
+
+SCRIPT_NAME="$(basename "$0")"
+#CHECK_DELAY=300  # Check every 5 minutes.
+CHECK_DELAY=15  # Check every 5 minutes.
+KILL_DELAY=10    # How long to let the job clean up after a timeout.
+# Let the unittests override.
+: ${SPOOL_DIR:=/data/misc/crash_reporter/spool/cron-lite}
+
+loginfo() {
+  log -p i -t "${SCRIPT_NAME}" "$@"
+}
+
+trap "loginfo 'exiting'" EXIT
+
+check_and_fix_spool_paths() {
+  # Avoid weird spool paths if possible.
+  rm -f "$(dirname "${SPOOL_DIR}")" "${SPOOL_DIR}" 2>/dev/null || :
+  mkdir -p "${SPOOL_DIR}"
+  if [ ! -O "${SPOOL_DIR}" -o ! -d "${SPOOL_DIR}" ]; then
+    loginfo "Spool directory is damaged. Aborting!"
+    exit 1
+  fi
+}
+
+main() {
+  local delay="$1"
+  local timeout="$2"
+  local name="$3"
+  local spool_file="${SPOOL_DIR}/${name}"
+  shift 3
+
+  [ -z "${delay}" ] && exit 1
+  [ -z "${timeout}" ] && exit 1
+  [ -z "${name}" ] && exit 1
+  [ $# -eq 0 ] && exit 1
+  check_and_fix_spool_paths
+
+  while true; do
+    # Allow the sleep to be killed manually without terminating the handler.
+    # Send stderr to /dev/null to suppress the shell's "Terminated" message.
+    sleep $(( CHECK_DELAY + KILL_DELAY )) 2>/dev/null || true
+
+    [ ! -e "${spool_file}" ] && touch "${spool_file}"
+
+    local last_rotation="$(stat -c "%Y" "${spool_file}" 2>/dev/null || echo 0)"
+    local now="$(date +%s)"
+    local time_diff=$((now - last_rotation))
+
+    if [ ${time_diff} -gt ${delay} ]; then
+      rm "${spool_file}" || true
+      touch "${spool_file}"
+      loginfo "${name}: running $*"
+      timeout -k ${KILL_DELAY} ${timeout} "$@" || true
+      loginfo "${name}: job completed"
+    fi
+  done
+}
+
+main "$@"