Optimize memory usage in incidentd

EncodedBuffer is used a lot in incidentd. EncodedBuffer uses malloc
internally to acquire memory. Frequently creating and destroying
EncodedBuffer creates memory fragmentation, leading to high memory
usage after taking an incident report.
Also fixes a few other places with lots of malloc/free operations.

This change:
* Creates a pool of EncodedBuffer in incidentd. The saving is
significant. It reduces EncodedBuffer creation from 3 per section to
3 per report.
* Replaces malloc with mmap inside EncodedBuffer. mmap is guaranteed
to be mem page aligned, so there will be no mem fragmentation after
destroying EncodedBuffer.
* Replaces new with mmap inside TombstoneSection
* Forks a process to execute LogSection, because liblog malloc & free
significant amount of memory

Result:
PSS before taking a report: 1295 KB
PSS after taking a report: 1336 KB

Bug: 150311553
Test: heapprofd
Change-Id: I83bd9c969b751c80b2f42747020799bd85d8aae6
diff --git a/cmds/incidentd/src/FdBuffer.cpp b/cmds/incidentd/src/FdBuffer.cpp
index d295b84..78c322e 100644
--- a/cmds/incidentd/src/FdBuffer.cpp
+++ b/cmds/incidentd/src/FdBuffer.cpp
@@ -17,6 +17,7 @@
 #include "Log.h"
 
 #include "FdBuffer.h"
+#include "incidentd_util.h"
 
 #include <log/log.h>
 #include <utils/SystemClock.h>
@@ -31,17 +32,24 @@
 namespace incidentd {
 
 const ssize_t BUFFER_SIZE = 16 * 1024;  // 16 KB
-const ssize_t MAX_BUFFER_COUNT = 6144;   // 96 MB max
+const ssize_t MAX_BUFFER_SIZE = 96 * 1024 * 1024;  // 96 MB
 
-FdBuffer::FdBuffer()
-        :mBuffer(new EncodedBuffer(BUFFER_SIZE)),
+FdBuffer::FdBuffer(): FdBuffer(get_buffer_from_pool(), /* isBufferPooled= */ true)  {
+}
+
+FdBuffer::FdBuffer(sp<EncodedBuffer> buffer, bool isBufferPooled)
+        :mBuffer(buffer),
          mStartTime(-1),
          mFinishTime(-1),
          mTimedOut(false),
-         mTruncated(false) {
+         mTruncated(false),
+         mIsBufferPooled(isBufferPooled) {
 }
 
 FdBuffer::~FdBuffer() {
+    if (mIsBufferPooled) {
+        return_buffer_to_pool(mBuffer);
+    }
 }
 
 status_t FdBuffer::read(int fd, int64_t timeout) {
@@ -51,7 +59,7 @@
     fcntl(fd, F_SETFL, fcntl(fd, F_GETFL, 0) | O_NONBLOCK);
 
     while (true) {
-        if (mBuffer->size() >= MAX_BUFFER_COUNT * BUFFER_SIZE) {
+        if (mBuffer->size() >= MAX_BUFFER_SIZE) {
             mTruncated = true;
             VLOG("Truncating data");
             break;
@@ -106,7 +114,7 @@
     mStartTime = uptimeMillis();
 
     while (true) {
-        if (mBuffer->size() >= MAX_BUFFER_COUNT * BUFFER_SIZE) {
+        if (mBuffer->size() >= MAX_BUFFER_SIZE) {
             // Don't let it get too big.
             mTruncated = true;
             VLOG("Truncating data");
@@ -156,7 +164,7 @@
 
     // This is the buffer used to store processed data
     while (true) {
-        if (mBuffer->size() >= MAX_BUFFER_COUNT * BUFFER_SIZE) {
+        if (mBuffer->size() >= MAX_BUFFER_SIZE) {
             VLOG("Truncating data");
             mTruncated = true;
             break;
diff --git a/cmds/incidentd/src/FdBuffer.h b/cmds/incidentd/src/FdBuffer.h
index a349360..9b2794d 100644
--- a/cmds/incidentd/src/FdBuffer.h
+++ b/cmds/incidentd/src/FdBuffer.h
@@ -35,6 +35,7 @@
 class FdBuffer {
 public:
     FdBuffer();
+    FdBuffer(sp<EncodedBuffer> buffer, bool isBufferPooled = false);
     ~FdBuffer();
 
     /**
@@ -114,6 +115,7 @@
     int64_t mFinishTime;
     bool mTimedOut;
     bool mTruncated;
+    bool mIsBufferPooled;
 };
 
 }  // namespace incidentd
diff --git a/cmds/incidentd/src/PrivacyFilter.cpp b/cmds/incidentd/src/PrivacyFilter.cpp
index d00ecdd..0d427d1 100644
--- a/cmds/incidentd/src/PrivacyFilter.cpp
+++ b/cmds/incidentd/src/PrivacyFilter.cpp
@@ -19,9 +19,6 @@
 #include "incidentd_util.h"
 #include "PrivacyFilter.h"
 #include "proto_util.h"
-
-#include "incidentd_util.h"
-#include "proto_util.h"
 #include "Section.h"
 
 #include <android-base/file.h>
@@ -129,6 +126,8 @@
     FieldStripper(const Privacy* restrictions, const sp<ProtoReader>& data,
             uint8_t bufferLevel);
 
+    ~FieldStripper();
+
     /**
      * Take the data that we have, and filter it down so that no fields
      * are more sensitive than the given privacy policy.
@@ -167,6 +166,7 @@
      */
     uint8_t mCurrentLevel;
 
+    sp<EncodedBuffer> mEncodedBuffer;
 };
 
 FieldStripper::FieldStripper(const Privacy* restrictions, const sp<ProtoReader>& data,
@@ -174,19 +174,25 @@
         :mRestrictions(restrictions),
          mData(data),
          mSize(data->size()),
-         mCurrentLevel(bufferLevel) {
+         mCurrentLevel(bufferLevel),
+         mEncodedBuffer(get_buffer_from_pool()) {
     if (mSize < 0) {
         ALOGW("FieldStripper constructed with a ProtoReader that doesn't support size."
                 " Data will be missing.");
     }
 }
 
+FieldStripper::~FieldStripper() {
+    return_buffer_to_pool(mEncodedBuffer);
+}
+
 status_t FieldStripper::strip(const uint8_t privacyPolicy) {
     // If the current strip level is less (fewer fields retained) than what's already in the
     // buffer, then we can skip it.
     if (mCurrentLevel < privacyPolicy) {
         PrivacySpec spec(privacyPolicy);
-        ProtoOutputStream proto;
+        mEncodedBuffer->clear();
+        ProtoOutputStream proto(mEncodedBuffer);
 
         // Optimization when no strip happens.
         if (mRestrictions == NULL || spec.RequireAll()) {
@@ -267,7 +273,7 @@
     // Order the writes by privacy filter, with increasing levels of filtration,k
     // so we can do the filter once, and then write many times.
     sort(mOutputs.begin(), mOutputs.end(),
-        [](const sp<FilterFd>& a, const sp<FilterFd>& b) -> bool { 
+        [](const sp<FilterFd>& a, const sp<FilterFd>& b) -> bool {
             return a->getPrivacyPolicy() < b->getPrivacyPolicy();
         });
 
@@ -370,7 +376,7 @@
             write_field_or_skip(NULL, reader, fieldTag, true);
         }
     }
-
+    clear_buffer_pool();
     err = reader->getError();
     if (err != NO_ERROR) {
         ALOGW("filter_and_write_report reader had an error: %s", strerror(-err));
diff --git a/cmds/incidentd/src/Reporter.cpp b/cmds/incidentd/src/Reporter.cpp
index ad25342..86a78f09 100644
--- a/cmds/incidentd/src/Reporter.cpp
+++ b/cmds/incidentd/src/Reporter.cpp
@@ -698,7 +698,7 @@
             listener->onReportFailed();
         });
     }
-
+    clear_buffer_pool();
     ALOGI("Done taking incident report err=%s", strerror(-err));
 }
 
diff --git a/cmds/incidentd/src/Section.cpp b/cmds/incidentd/src/Section.cpp
index c703c3c..dec9cb0 100644
--- a/cmds/incidentd/src/Section.cpp
+++ b/cmds/incidentd/src/Section.cpp
@@ -36,6 +36,7 @@
 #include <log/log_read.h>
 #include <log/logprint.h>
 #include <private/android_logger.h>
+#include <sys/mman.h>
 
 #include "FdBuffer.h"
 #include "Privacy.h"
@@ -106,7 +107,6 @@
         return NO_ERROR;
     }
 
-    FdBuffer buffer;
     Fpipe p2cPipe;
     Fpipe c2pPipe;
     // initiate pipes to pass data to/from incident_helper
@@ -122,6 +122,7 @@
     }
 
     // parent process
+    FdBuffer buffer;
     status_t readStatus = buffer.readProcessedDataInStream(fd.get(), std::move(p2cPipe.writeFd()),
                                                            std::move(c2pPipe.readFd()),
                                                            this->timeoutMs, mIsSysfs);
@@ -356,7 +357,6 @@
 CommandSection::~CommandSection() { free(mCommand); }
 
 status_t CommandSection::Execute(ReportWriter* writer) const {
-    FdBuffer buffer;
     Fpipe cmdPipe;
     Fpipe ihPipe;
 
@@ -377,6 +377,7 @@
     }
 
     cmdPipe.writeFd().reset();
+    FdBuffer buffer;
     status_t readStatus = buffer.read(ihPipe.readFd().get(), this->timeoutMs);
     writer->setSectionStats(buffer);
     if (readStatus != NO_ERROR || buffer.timedOut()) {
@@ -574,6 +575,16 @@
 }
 
 status_t LogSection::BlockingCall(unique_fd& pipeWriteFd) const {
+    // heap profile shows that liblog malloc & free significant amount of memory in this process.
+    // Hence forking a new process to prevent memory fragmentation.
+    pid_t pid = fork();
+    if (pid < 0) {
+        ALOGW("[%s] failed to fork", this->name.string());
+        return errno;
+    }
+    if (pid > 0) {
+        return wait_child(pid, this->timeoutMs);
+    }
     // Open log buffer and getting logs since last retrieved time if any.
     unique_ptr<logger_list, void (*)(logger_list*)> loggers(
             gLastLogsRetrieved.find(mLogID) == gLastLogsRetrieved.end()
@@ -583,31 +594,31 @@
 
     if (android_logger_open(loggers.get(), mLogID) == NULL) {
         ALOGE("[%s] Can't get logger.", this->name.string());
-        return -1;
+        _exit(EXIT_FAILURE);
     }
 
     log_msg msg;
     log_time lastTimestamp(0);
 
     ProtoOutputStream proto;
+    status_t err = OK;
     while (true) {  // keeps reading until logd buffer is fully read.
-        status_t err = android_logger_list_read(loggers.get(), &msg);
-        // err = 0 - no content, unexpected connection drop or EOF.
-        // err = +ive number - size of retrieved data from logger
-        // err = -ive number, OS supplied error _except_ for -EAGAIN
-        // err = -EAGAIN, graceful indication for ANDRODI_LOG_NONBLOCK that this is the end of data.
-        if (err <= 0) {
-            if (err != -EAGAIN) {
+        status_t status = android_logger_list_read(loggers.get(), &msg);
+        // status = 0 - no content, unexpected connection drop or EOF.
+        // status = +ive number - size of retrieved data from logger
+        // status = -ive number, OS supplied error _except_ for -EAGAIN
+        // status = -EAGAIN, graceful indication for ANDRODI_LOG_NONBLOCK that this is the end.
+        if (status <= 0) {
+            if (status != -EAGAIN) {
                 ALOGW("[%s] fails to read a log_msg.\n", this->name.string());
+                err = -status;
             }
-            // dump previous logs and don't consider this error a failure.
             break;
         }
         if (mBinary) {
             // remove the first uint32 which is tag's index in event log tags
             android_log_context context = create_android_log_parser(msg.msg() + sizeof(uint32_t),
                                                                     msg.len() - sizeof(uint32_t));
-            ;
             android_log_list_element elem;
 
             lastTimestamp.tv_sec = msg.entry.sec;
@@ -667,9 +678,10 @@
             }
         } else {
             AndroidLogEntry entry;
-            err = android_log_processLogBuffer(&msg.entry, &entry);
-            if (err != NO_ERROR) {
+            status = android_log_processLogBuffer(&msg.entry, &entry);
+            if (status != OK) {
                 ALOGW("[%s] fails to process to an entry.\n", this->name.string());
+                err = status;
                 break;
             }
             lastTimestamp.tv_sec = entry.tv_sec;
@@ -688,17 +700,24 @@
                         trimTail(entry.message, entry.messageLen));
             proto.end(token);
         }
+        if (!proto.flush(pipeWriteFd.get())) {
+            if (errno == EPIPE) {
+                ALOGW("[%s] wrote to a broken pipe\n", this->name.string());
+            }
+            err = errno;
+            break;
+        }
+        proto.clear();
     }
     gLastLogsRetrieved[mLogID] = lastTimestamp;
-    if (!proto.flush(pipeWriteFd.get()) && errno == EPIPE) {
-        ALOGE("[%s] wrote to a broken pipe\n", this->name.string());
-        return EPIPE;
-    }
-    return NO_ERROR;
+    _exit(err);
 }
 
 // ================================================================================
 
+const int LINK_NAME_LEN = 64;
+const int EXE_NAME_LEN = 1024;
+
 TombstoneSection::TombstoneSection(int id, const char* type, const int64_t timeoutMs)
     : WorkerThreadSection(id, timeoutMs), mType(type) {
     name = "tombstone ";
@@ -716,25 +735,37 @@
 
     const std::set<int> hal_pids = get_interesting_hal_pids();
 
-    ProtoOutputStream proto;
+    auto pooledBuffer = get_buffer_from_pool();
+    ProtoOutputStream proto(pooledBuffer);
+    // dumpBufferSize should be a multiple of page size (4 KB) to reduce memory fragmentation
+    size_t dumpBufferSize = 64 * 1024; // 64 KB is enough for most tombstone dump
+    char* dumpBuffer = (char*)mmap(NULL, dumpBufferSize, PROT_READ | PROT_WRITE,
+                MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
     struct dirent* d;
+    char link_name[LINK_NAME_LEN];
+    char exe_name[EXE_NAME_LEN];
     status_t err = NO_ERROR;
     while ((d = readdir(proc.get()))) {
         int pid = atoi(d->d_name);
         if (pid <= 0) {
             continue;
         }
-
-        const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
-        std::string exe;
-        if (!android::base::Readlink(link_name, &exe)) {
-            ALOGE("Section %s: Can't read '%s': %s\n", name.string(),
-                    link_name.c_str(), strerror(errno));
+        snprintf(link_name, LINK_NAME_LEN, "/proc/%d/exe", pid);
+        struct stat fileStat;
+        if (stat(link_name, &fileStat) != OK) {
             continue;
         }
+        size_t exe_name_len = readlink(link_name, exe_name, EXE_NAME_LEN);
+        if (exe_name_len < 0 || exe_name_len >= EXE_NAME_LEN) {
+            ALOGE("[%s] Can't read '%s': %s", name.string(), link_name, strerror(errno));
+            continue;
+        }
+        // readlink(2) does not put a null terminator at the end
+        exe_name[exe_name_len] = '\0';
 
         bool is_java_process;
-        if (exe == "/system/bin/app_process32" || exe == "/system/bin/app_process64") {
+        if (strncmp(exe_name, "/system/bin/app_process32", LINK_NAME_LEN) == 0 ||
+                strncmp(exe_name, "/system/bin/app_process64", LINK_NAME_LEN) == 0) {
             if (mType != "java") continue;
             // Don't bother dumping backtraces for the zygote.
             if (IsZygote(pid)) {
@@ -743,7 +774,7 @@
             }
 
             is_java_process = true;
-        } else if (should_dump_native_traces(exe.c_str())) {
+        } else if (should_dump_native_traces(exe_name)) {
             if (mType != "native") continue;
             is_java_process = false;
         } else if (hal_pids.find(pid) != hal_pids.end()) {
@@ -799,29 +830,37 @@
             ALOGE("[%s] child had an issue: %s\n", this->name.string(), strerror(-cStatus));
         }
 
-        auto dump = std::make_unique<char[]>(buffer.size());
+        // Resize dump buffer
+        if (dumpBufferSize < buffer.size()) {
+            munmap(dumpBuffer, dumpBufferSize);
+            while(dumpBufferSize < buffer.size()) dumpBufferSize = dumpBufferSize << 1;
+            dumpBuffer = (char*)mmap(NULL, dumpBufferSize, PROT_READ | PROT_WRITE,
+                    MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+        }
         sp<ProtoReader> reader = buffer.data()->read();
         int i = 0;
         while (reader->hasNext()) {
-            dump[i] = reader->next();
+            dumpBuffer[i] = reader->next();
             i++;
         }
         uint64_t token = proto.start(android::os::BackTraceProto::TRACES);
         proto.write(android::os::BackTraceProto::Stack::PID, pid);
-        proto.write(android::os::BackTraceProto::Stack::DUMP, dump.get(), i);
+        proto.write(android::os::BackTraceProto::Stack::DUMP, dumpBuffer, i);
         proto.write(android::os::BackTraceProto::Stack::DUMP_DURATION_NS,
                     static_cast<long long>(Nanotime() - start));
         proto.end(token);
         dumpPipe.readFd().reset();
-    }
-
-    if (!proto.flush(pipeWriteFd.get()) && errno == EPIPE) {
-        ALOGE("[%s] wrote to a broken pipe\n", this->name.string());
-        if (err != NO_ERROR) {
-            return EPIPE;
+        if (!proto.flush(pipeWriteFd.get())) {
+            if (errno == EPIPE) {
+                ALOGE("[%s] wrote to a broken pipe\n", this->name.string());
+            }
+            err = errno;
+            break;
         }
+        proto.clear();
     }
-
+    munmap(dumpBuffer, dumpBufferSize);
+    return_buffer_to_pool(pooledBuffer);
     return err;
 }
 
diff --git a/cmds/incidentd/src/incidentd_util.cpp b/cmds/incidentd/src/incidentd_util.cpp
index 2649fb9..150ab99 100644
--- a/cmds/incidentd/src/incidentd_util.cpp
+++ b/cmds/incidentd/src/incidentd_util.cpp
@@ -18,6 +18,7 @@
 
 #include "incidentd_util.h"
 
+#include <android/util/EncodedBuffer.h>
 #include <fcntl.h>
 #include <sys/prctl.h>
 #include <wait.h>
@@ -28,8 +29,6 @@
 namespace os {
 namespace incidentd {
 
-using namespace android::base;
-
 const Privacy* get_privacy_of_section(int id) {
     int l = 0;
     int r = PRIVACY_POLICY_COUNT - 1;
@@ -48,6 +47,30 @@
     return NULL;
 }
 
+std::vector<sp<EncodedBuffer>> gBufferPool;
+std::mutex gBufferPoolLock;
+
+sp<EncodedBuffer> get_buffer_from_pool() {
+    std::scoped_lock<std::mutex> lock(gBufferPoolLock);
+    if (gBufferPool.size() == 0) {
+        return new EncodedBuffer();
+    }
+    sp<EncodedBuffer> buffer = gBufferPool.back();
+    gBufferPool.pop_back();
+    return buffer;
+}
+
+void return_buffer_to_pool(sp<EncodedBuffer> buffer) {
+    buffer->clear();
+    std::scoped_lock<std::mutex> lock(gBufferPoolLock);
+    gBufferPool.push_back(buffer);
+}
+
+void clear_buffer_pool() {
+    std::scoped_lock<std::mutex> lock(gBufferPoolLock);
+    gBufferPool.clear();
+}
+
 // ================================================================================
 Fpipe::Fpipe() : mRead(), mWrite() {}
 
@@ -84,7 +107,7 @@
         status = &dummy_status;
     }
     *status = 0;
-    pid_t pid = fork();
+    pid_t pid = vfork();
     if (pid < 0) {
         *status = -errno;
         return -1;
diff --git a/cmds/incidentd/src/incidentd_util.h b/cmds/incidentd/src/incidentd_util.h
index a54993fe..8499889 100644
--- a/cmds/incidentd/src/incidentd_util.h
+++ b/cmds/incidentd/src/incidentd_util.h
@@ -19,18 +19,21 @@
 #define INCIDENTD_UTIL_H
 
 #include <stdarg.h>
-#include <unistd.h>
-
-#include <android-base/unique_fd.h>
 #include <utils/Errors.h>
 
 #include "Privacy.h"
 
 namespace android {
+
+namespace util {
+class EncodedBuffer;
+}
+
 namespace os {
 namespace incidentd {
 
-using namespace android::base;
+using android::base::unique_fd;
+using android::util::EncodedBuffer;
 
 /**
  * Looks up Privacy of a section in the auto-gen PRIVACY_POLICY_LIST;
@@ -38,6 +41,25 @@
 const Privacy* get_privacy_of_section(int id);
 
 /**
+ * Get an EncodedBuffer from an internal pool, or create and return a new one if the pool is empty.
+ * The EncodedBuffer should be returned after use.
+ * Thread safe.
+ */
+sp<EncodedBuffer> get_buffer_from_pool();
+
+/**
+ * Return the EncodedBuffer back to the pool for reuse.
+ * Thread safe.
+ */
+void return_buffer_to_pool(sp<EncodedBuffer> buffer);
+
+/**
+ * Clear the buffer pool to free memory, after taking an incident report.
+ * Thread safe.
+ */
+void clear_buffer_pool();
+
+/**
  * This class wraps android::base::Pipe.
  */
 class Fpipe {