Implement stats logging in LMKD.

It implements logging of following atoms:
  -- LMK_STATE_CHANGED
  -- LMK_KILL_OCCURRED

We would like to gather memory metrics of the process killed by LMKD
because by gathering this info we would be able to analyze and improve
system health by potentially reducing memory footprint of the process.

This feature would be available on production builds.

To know more about this see: http://go/android-p-memory-metrics

Bug: 65738734
Test: Tested manually
Change-Id: I064e0cdcb47c3b4c95d8b8d5654050c9812008d8
diff --git a/lmkd/Android.bp b/lmkd/Android.bp
index fc31693..5e306ab 100644
--- a/lmkd/Android.bp
+++ b/lmkd/Android.bp
@@ -6,6 +6,9 @@
         "liblog",
         "libcutils",
     ],
+    static_libs: [
+        "libstatslogc",
+    ],
     local_include_dirs: ["include"],
     cflags: ["-Werror"],
 
@@ -20,7 +23,7 @@
     },
 }
 
-cc_library_shared {
+cc_library_static {
     name: "libstatslogc",
     srcs: ["statslog.c"],
     cflags: [
diff --git a/lmkd/lmkd.c b/lmkd/lmkd.c
index 946a68c..c514bc3 100644
--- a/lmkd/lmkd.c
+++ b/lmkd/lmkd.c
@@ -36,6 +36,11 @@
 #include <lmkd.h>
 #include <log/log.h>
 
+#ifdef LMKD_LOG_STATS
+#include <log/log_event_list.h>
+#include <statslog.h>
+#endif
+
 /*
  * Define LMKD_TRACE_KILLS to record lmkd kills in kernel traces
  * to profile and correlate with OOM kills
@@ -62,6 +67,7 @@
 #define MEMCG_SYSFS_PATH "/dev/memcg/"
 #define MEMCG_MEMORY_USAGE "/dev/memcg/memory.usage_in_bytes"
 #define MEMCG_MEMORYSW_USAGE "/dev/memcg/memory.memsw.usage_in_bytes"
+
 #define LINE_MAX 128
 
 #define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
@@ -70,6 +76,18 @@
 #define ARRAY_SIZE(x)   (sizeof(x) / sizeof(*(x)))
 #define EIGHT_MEGA (1 << 23)
 
+#ifdef LMKD_LOG_STATS
+#define MEMCG_PROCESS_MEMORY_STAT_PATH "/dev/memcg/apps/uid_%d/pid_%d/memory.stat"
+/*
+ * These are defined in
+ * http://cs/android/frameworks/base/cmds/statsd/src/atoms.proto
+ */
+#define LMK_KILL_OCCURRED 51
+#define LMK_STATE_CHANGED 54
+#define LMK_STATE_CHANGE_START 1
+#define LMK_STATE_CHANGE_STOP 2
+#endif
+
 /* default to old in-kernel interface if no memory pressure events */
 static int use_inkernel_interface = 1;
 static bool has_inkernel_module;
@@ -163,6 +181,18 @@
     struct proc *pidhash_next;
 };
 
+#ifdef LMKD_LOG_STATS
+struct memory_stat {
+   int64_t pgfault;
+   int64_t pgmajfault;
+   int64_t rss_in_bytes;
+   int64_t cache_in_bytes;
+   int64_t swap_in_bytes;
+};
+static bool enable_stats_log;
+static android_log_context log_ctx;
+#endif
+
 #define PIDHASH_SZ 1024
 static struct proc *pidhash[PIDHASH_SZ];
 #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
@@ -543,6 +573,51 @@
     maxevents++;
 }
 
+#ifdef LMKD_LOG_STATS
+static void memory_stat_parse_line(char *line, struct memory_stat *mem_st) {
+    char key[LINE_MAX];
+    int64_t value;
+
+    sscanf(line,"%s  %" SCNd64 "", key, &value);
+
+    if (strcmp(key, "total_") < 0) {
+        return;
+    }
+
+    if (!strcmp(key, "total_pgfault"))
+        mem_st->pgfault = value;
+    else if (!strcmp(key, "total_pgmajfault"))
+        mem_st->pgmajfault = value;
+    else if (!strcmp(key, "total_rss"))
+        mem_st->rss_in_bytes = value;
+    else if (!strcmp(key, "total_cache"))
+        mem_st->cache_in_bytes = value;
+    else if (!strcmp(key, "total_swap"))
+        mem_st->swap_in_bytes = value;
+}
+
+static int memory_stat_parse(struct memory_stat *mem_st,  int pid, uid_t uid) {
+   FILE *fp;
+   char buf[PATH_MAX];
+
+   snprintf(buf, sizeof(buf), MEMCG_PROCESS_MEMORY_STAT_PATH, uid, pid);
+
+   fp = fopen(buf, "r");
+
+   if (fp == NULL) {
+       ALOGE("%s open failed: %s", path, strerror(errno));
+       return -1;
+   }
+
+   while (fgets(buf, PAGE_SIZE, fp) != NULL ) {
+       memory_stat_parse_line(buf, mem_st);
+   }
+   fclose(fp);
+
+   return 0;
+}
+#endif
+
 static int get_free_memory(struct mem_size *ms) {
     struct sysinfo si;
 
@@ -639,6 +714,11 @@
     int tasksize;
     int r;
 
+#ifdef LMKD_LOG_STATS
+    struct memory_stat mem_st;
+    int memory_stat_parse_result = -1;
+#endif
+
     taskname = proc_get_name(pid);
     if (!taskname) {
         pid_remove(pid);
@@ -651,6 +731,12 @@
         return -1;
     }
 
+#ifdef LMKD_LOG_STATS
+    if (enable_stats_log) {
+        memory_stat_parse_result = memory_stat_parse(&mem_st, pid, uid);
+    }
+#endif
+
     TRACE_KILL_START(pid);
 
     r = kill(pid, SIGKILL);
@@ -666,6 +752,15 @@
     if (r) {
         ALOGE("kill(%d): errno=%d", pid, errno);
         return -1;
+    } else {
+#ifdef LMKD_LOG_STATS
+        if (memory_stat_parse_result == 0) {
+            stats_write_lmk_kill_occurred(log_ctx, LMK_KILL_OCCURRED, uid, taskname,
+                    procp->oomadj, mem_st.pgfault, mem_st.pgmajfault, mem_st.rss_in_bytes,
+                    mem_st.cache_in_bytes, mem_st.swap_in_bytes);
+        }
+#endif
+        return tasksize;
     }
 
     return tasksize;
@@ -683,6 +778,12 @@
     int pages_freed = 0;
     int min_score_adj = level_oomadj[level];
 
+#ifdef LMKD_LOG_STATS
+    if (enable_stats_log) {
+        stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED, LMK_STATE_CHANGE_START);
+    }
+#endif
+
     for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
         struct proc *procp;
 
@@ -699,12 +800,25 @@
             if (killed_size >= 0) {
                 pages_freed += killed_size;
                 if (pages_freed >= pages_to_free) {
+
+#ifdef LMKD_LOG_STATS
+                    if (enable_stats_log) {
+                        stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED,
+                                LMK_STATE_CHANGE_STOP);
+                    }
+#endif
                     return pages_freed;
                 }
             }
         }
     }
 
+#ifdef LMKD_LOG_STATS
+    if (enable_stats_log) {
+        stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED, LMK_STATE_CHANGE_STOP);
+    }
+#endif
+
     return pages_freed;
 }
 
@@ -1106,6 +1220,14 @@
     kill_timeout_ms =
         (unsigned long)property_get_int32("ro.lmk.kill_timeout_ms", 0);
 
+#ifdef LMKD_LOG_STATS
+    enable_stats_log = property_get_bool("ro.lmk.log_stats", false);
+
+    if (enable_stats_log) {
+        log_ctx = create_android_logger(kStatsEventTag);
+    }
+#endif
+
     // MCL_ONFAULT pins pages as they fault instead of loading
     // everything immediately all at once. (Which would be bad,
     // because as of this writing, we have a lot of mapped pages we
@@ -1122,6 +1244,12 @@
     if (!init())
         mainloop();
 
+#ifdef LMKD_LOG_STATS
+    if (log_ctx) {
+        android_log_destroy(&log_ctx);
+    }
+#endif
+
     ALOGI("exiting");
     return 0;
 }
diff --git a/lmkd/statslog.h b/lmkd/statslog.h
index ea05fa6..6a27030 100644
--- a/lmkd/statslog.h
+++ b/lmkd/statslog.h
@@ -19,6 +19,12 @@
 #include <sys/cdefs.h>
 __BEGIN_DECLS
 
+/*
+ * The single event tag id for all stats logs.
+ * Keep this in sync with system/core/logcat/event.logtags
+ */
+const static int kStatsEventTag = 1937006964;
+
 /**
  * Logs the change in LMKD state which is used as start/stop boundaries for logging
  * LMK_KILL_OCCURRED event.