lmkd: Add optional kill reason description into kill reports

Allow kill report to be appended with the explanation of the reasons
killing has been done. This would help identify kill reasons while
troubleshooting lmkd kills.

Change-Id: Ie5dd7a44e51d04c43c2492be8c1bc964d1b03555
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
diff --git a/lmkd/lmkd.c b/lmkd/lmkd.c
index 1db3d5a..b9a7366 100644
--- a/lmkd/lmkd.c
+++ b/lmkd/lmkd.c
@@ -1743,7 +1743,7 @@
 static int last_killed_pid = -1;
 
 /* Kill one process specified by procp.  Returns the size of the process killed */
-static int kill_one_process(struct proc* procp, int min_oom_score) {
+static int kill_one_process(struct proc* procp, int min_oom_score, const char *reason) {
     int pid = procp->pid;
     uid_t uid = procp->uid;
     int tgid;
@@ -1794,8 +1794,13 @@
     set_process_group_and_prio(pid, SP_FOREGROUND, ANDROID_PRIORITY_HIGHEST);
 
     inc_killcnt(procp->oomadj);
-    ALOGE("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB", taskname, pid, uid, procp->oomadj,
-          tasksize * page_k);
+    if (reason) {
+        ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB; reason: %s", taskname, pid,
+              uid, procp->oomadj, tasksize * page_k, reason);
+    } else {
+        ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB", taskname, pid,
+              uid, procp->oomadj, tasksize * page_k);
+    }
 
     TRACE_KILL_END();
 
@@ -1833,7 +1838,7 @@
  * Find one process to kill at or above the given oom_adj level.
  * Returns size of the killed process.
  */
-static int find_and_kill_process(int min_score_adj) {
+static int find_and_kill_process(int min_score_adj, const char *reason) {
     int i;
     int killed_size = 0;
 
@@ -1851,7 +1856,7 @@
             if (!procp)
                 break;
 
-            killed_size = kill_one_process(procp, min_score_adj);
+            killed_size = kill_one_process(procp, min_score_adj, reason);
             if (killed_size >= 0) {
 #ifdef LMKD_LOG_STATS
                 if (enable_stats_log && !lmk_state_change_start) {
@@ -2040,6 +2045,7 @@
     bool cycle_after_kill = false;
     enum reclaim_state reclaim = NO_RECLAIM;
     enum zone_watermark wmark = WMARK_NONE;
+    char kill_desc[LINE_MAX];
 
     /* Skip while still killing a process */
     if (is_kill_pending()) {
@@ -2135,6 +2141,7 @@
          * free even after a kill. Mostly happens when running memory stress tests.
          */
         kill_reason = PRESSURE_AFTER_KILL;
+        strncpy(kill_desc, "min watermark is breached even after kill", sizeof(kill_desc));
     } else if (level == VMPRESS_LEVEL_CRITICAL && events != 0) {
         /*
          * Device is too busy reclaiming memory which might lead to ANR.
@@ -2142,25 +2149,36 @@
          * of the memory congestion) breaches the configured threshold.
          */
         kill_reason = NOT_RESPONDING;
+        strncpy(kill_desc, "device is not responding", sizeof(kill_desc));
     } else if (swap_is_low && thrashing > thrashing_limit_pct) {
         /* Page cache is thrashing while swap is low */
         kill_reason = LOW_SWAP_AND_THRASHING;
+        snprintf(kill_desc, sizeof(kill_desc), "device is low on swap (%" PRId64
+            "kB < %" PRId64 "kB) and thrashing (%" PRId64 "%%)",
+            mi.field.free_swap * page_k, swap_low_threshold * page_k, thrashing);
     } else if (swap_is_low && wmark < WMARK_HIGH) {
         /* Both free memory and swap are low */
         kill_reason = LOW_MEM_AND_SWAP;
+        snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap is low (%"
+            PRId64 "kB < %" PRId64 "kB)", wmark > WMARK_LOW ? "min" : "low",
+            mi.field.free_swap * page_k, swap_low_threshold * page_k);
     } else if (wmark < WMARK_HIGH && thrashing > thrashing_limit) {
         /* Page cache is thrashing while memory is low */
         thrashing_limit = (thrashing_limit * (100 - thrashing_limit_decay_pct)) / 100;
         kill_reason = LOW_MEM_AND_THRASHING;
+        snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and thrashing (%"
+            PRId64 "%%)", wmark > WMARK_LOW ? "min" : "low", thrashing);
     } else if (reclaim == DIRECT_RECLAIM && thrashing > thrashing_limit) {
         /* Page cache is thrashing while in direct reclaim (mostly happens on lowram devices) */
         thrashing_limit = (thrashing_limit * (100 - thrashing_limit_decay_pct)) / 100;
         kill_reason = DIRECT_RECL_AND_THRASHING;
+        snprintf(kill_desc, sizeof(kill_desc), "device is in direct reclaim and thrashing (%"
+            PRId64 "%%)", thrashing);
     }
 
     /* Kill a process if necessary */
     if (kill_reason != NONE) {
-        int pages_freed = find_and_kill_process(0);
+        int pages_freed = find_and_kill_process(0, kill_desc);
         killing = (pages_freed > 0);
         meminfo_log(&mi);
     }
@@ -2351,7 +2369,7 @@
 do_kill:
     if (low_ram_device) {
         /* For Go devices kill only one task */
-        if (find_and_kill_process(level_oomadj[level]) == 0) {
+        if (find_and_kill_process(level_oomadj[level], NULL) == 0) {
             if (debug_process_killing) {
                 ALOGI("Nothing to kill");
             }
@@ -2376,7 +2394,7 @@
             min_score_adj = level_oomadj[level];
         }
 
-        pages_freed = find_and_kill_process(min_score_adj);
+        pages_freed = find_and_kill_process(min_score_adj, NULL);
 
         if (pages_freed == 0) {
             /* Rate limit kill reports when nothing was reclaimed */