Implement PII stripper in incidentd, part 1

1. automatically parse privacy options and generate lookup table
2. create FdBuffer iterator API in order to remove dependency on Reporter.h

Bug: 64687253
Test: Unit test for iterator API, and manually tested lookup table
Change-Id: I1ea376a4481fc4afc7bdf447936f767b63690fd3
diff --git a/cmds/incidentd/src/FdBuffer.cpp b/cmds/incidentd/src/FdBuffer.cpp
index 7743301..4d6a36c 100644
--- a/cmds/incidentd/src/FdBuffer.cpp
+++ b/cmds/incidentd/src/FdBuffer.cpp
@@ -174,7 +174,7 @@
             if (rpos >= wpos) {
                 amt = ::read(fd, cirBuf + rpos, BUFFER_SIZE - rpos);
             } else {
-                amt = :: read(fd, cirBuf + rpos, wpos - rpos);
+                amt = ::read(fd, cirBuf + rpos, wpos - rpos);
             }
             if (amt < 0) {
                 if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
@@ -241,6 +241,7 @@
 size_t
 FdBuffer::size()
 {
+    if (mBuffers.empty()) return 0;
     return ((mBuffers.size() - 1) * BUFFER_SIZE) + mCurrentWritten;
 }
 
@@ -255,4 +256,30 @@
     return NO_ERROR;
 }
 
+FdBuffer::iterator
+FdBuffer::end()
+{
+    if (mBuffers.empty() || mCurrentWritten < 0) return begin();
+    if (mCurrentWritten == BUFFER_SIZE)
+        // FdBuffer doesn't allocate another buf since no more bytes to read.
+        return FdBuffer::iterator(*this, mBuffers.size(), 0);
+    return FdBuffer::iterator(*this, mBuffers.size() - 1, mCurrentWritten);
+}
 
+FdBuffer::iterator&
+FdBuffer::iterator::operator+(size_t offset)
+{
+    size_t newOffset = mOffset + offset;
+    while (newOffset >= BUFFER_SIZE) {
+        mIndex++;
+        newOffset -= BUFFER_SIZE;
+    }
+    mOffset = newOffset;
+    return *this;
+}
+
+size_t
+FdBuffer::iterator::bytesRead()
+{
+    return mIndex * BUFFER_SIZE + mOffset;
+}
diff --git a/cmds/incidentd/src/FdBuffer.h b/cmds/incidentd/src/FdBuffer.h
index 03a6d18..e9a53ff 100644
--- a/cmds/incidentd/src/FdBuffer.h
+++ b/cmds/incidentd/src/FdBuffer.h
@@ -21,7 +21,6 @@
 
 #include <utils/Errors.h>
 
-#include <set>
 #include <vector>
 
 using namespace android;
@@ -74,7 +73,8 @@
     size_t size();
 
     /**
-     * Write the data that we recorded to the fd given.
+     * [Deprecated] Write the data that we recorded to the fd given.
+     * TODO: remove it once the iterator api is working
      */
     status_t write(ReportRequestSet* requests);
 
@@ -83,6 +83,37 @@
      */
     int64_t durationMs() { return mFinishTime - mStartTime; }
 
+    /**
+     * Read data stored in FdBuffer
+     */
+    class iterator;
+    friend class iterator;
+    class iterator : public std::iterator<std::random_access_iterator_tag, uint8_t> {
+    private:
+        FdBuffer& mFdBuffer;
+        size_t mIndex;
+        size_t mOffset;
+    public:
+        explicit iterator(FdBuffer& buffer, ssize_t index, ssize_t offset)
+                : mFdBuffer(buffer), mIndex(index), mOffset(offset) {}
+        iterator& operator=(iterator& other) { return other; }
+        iterator& operator+(size_t offset); // this is implemented in .cpp
+        iterator& operator+=(size_t offset) { return *this + offset; }
+        iterator& operator++() { return *this + 1; }
+        iterator operator++(int) { return *this + 1; }
+        bool operator==(iterator other) const {
+            return mIndex == other.mIndex && mOffset == other.mOffset;
+        }
+        bool operator!=(iterator other) const { return !(*this == other); }
+        reference operator*() const { return mFdBuffer.mBuffers[mIndex][mOffset]; }
+
+        // random access could make the iterator out of bound
+        size_t bytesRead();
+        bool outOfBound() { return bytesRead() > mFdBuffer.size(); };
+    };
+    iterator begin() { return iterator(*this, 0, 0); }
+    iterator end();
+
 private:
     vector<uint8_t*> mBuffers;
     int64_t mStartTime;
diff --git a/cmds/incidentd/src/Section.cpp b/cmds/incidentd/src/Section.cpp
index 0f6f38e..ac87fe3 100644
--- a/cmds/incidentd/src/Section.cpp
+++ b/cmds/incidentd/src/Section.cpp
@@ -22,9 +22,6 @@
 #include <private/android_filesystem_config.h>
 #include <binder/IServiceManager.h>
 #include <mutex>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
 #include <wait.h>
 #include <unistd.h>
 
@@ -37,7 +34,7 @@
 static pid_t
 forkAndExecuteIncidentHelper(const int id, const char* name, Fpipe& p2cPipe, Fpipe& c2pPipe)
 {
-    const char* ihArgs[] { INCIDENT_HELPER, "-s", to_string(id).c_str(), NULL };
+    const char* ihArgs[] { INCIDENT_HELPER, "-s", String8::format("%d", id).string(), NULL };
 
     // fork used in multithreaded environment, avoid adding unnecessary code in child process
     pid_t pid = fork();
diff --git a/cmds/incidentd/src/protobuf.cpp b/cmds/incidentd/src/protobuf.cpp
index a703ef9..b865339 100644
--- a/cmds/incidentd/src/protobuf.cpp
+++ b/cmds/incidentd/src/protobuf.cpp
@@ -16,6 +16,7 @@
 
 #include "protobuf.h"
 
+
 uint8_t* 
 write_raw_varint(uint8_t* buf, uint32_t val)
 {
diff --git a/cmds/incidentd/src/section_list.h b/cmds/incidentd/src/section_list.h
index c977519..1abdb52 100644
--- a/cmds/incidentd/src/section_list.h
+++ b/cmds/incidentd/src/section_list.h
@@ -21,8 +21,46 @@
 
 /**
  * This is the mapping of section IDs to the commands that are run to get those commands.
+ * The section IDs are guaranteed in ascending order
  */
 extern const Section* SECTION_LIST[];
 
+/*
+ * In order not to use libprotobuf-cpp-full nor libplatformprotos in incidentd
+ * privacy options's data structure are explicityly redefined in this file.
+ */
+
+// DESTINATION enum
+extern const uint8_t DEST_LOCAL;
+extern const uint8_t DEST_EXPLICIT;
+extern const uint8_t DEST_AUTOMATIC;
+
+// This is the default value of DEST enum
+// field with this value doesn't generate Privacy to save too much generated code
+extern const uint8_t DEST_DEFAULT_VALUE;
+
+// type of the field, identitical to protobuf definition
+extern const uint8_t TYPE_STRING;
+extern const uint8_t TYPE_MESSAGE;
+
+struct Privacy {
+    int field_id;
+    uint8_t type;
+
+    // the following two fields are identitical to
+    // frameworks/base/libs/incident/proto/android/privacy.proto
+    uint8_t dest;
+    const char** patterns;
+
+    // ignore parent's privacy flags if children are set, NULL-terminated
+    const Privacy** children;
+};
+
+/**
+ * This is the mapping of section IDs to each section's privacy policy.
+ * The section IDs are guaranteed in ascending order
+ */
+extern const Privacy* PRIVACY_POLICY_LIST[];
+
 #endif // SECTION_LIST_H
 
diff --git a/cmds/incidentd/tests/FdBuffer_test.cpp b/cmds/incidentd/tests/FdBuffer_test.cpp
index ba8b77a..403a2ab 100644
--- a/cmds/incidentd/tests/FdBuffer_test.cpp
+++ b/cmds/incidentd/tests/FdBuffer_test.cpp
@@ -92,6 +92,30 @@
     AssertBufferContent(testdata.c_str());
 }
 
+TEST_F(FdBufferTest, IterateEmpty) {
+    FdBuffer::iterator it = buffer.begin();
+    EXPECT_EQ(it, buffer.end());
+    it += 1;
+    EXPECT_TRUE(it.outOfBound());
+}
+
+TEST_F(FdBufferTest, ReadAndIterate) {
+    std::string testdata = "FdBuffer test string";
+    ASSERT_TRUE(WriteStringToFile(testdata, tf.path, false));
+    ASSERT_EQ(NO_ERROR, buffer.read(tf.fd, READ_TIMEOUT));
+
+    int i=0;
+    for (FdBuffer::iterator it = buffer.begin(); it != buffer.end(); ++it) {
+        EXPECT_EQ(*it, (uint8_t)testdata[i++]);
+    }
+
+    FdBuffer::iterator it = buffer.begin();
+    it += buffer.size();
+    EXPECT_EQ(it, buffer.end());
+    EXPECT_EQ(it.bytesRead(), testdata.size());
+    EXPECT_FALSE(it.outOfBound());
+}
+
 TEST_F(FdBufferTest, ReadTimeout) {
     int pid = fork();
     ASSERT_TRUE(pid != -1);
@@ -202,6 +226,7 @@
 
 TEST_F(FdBufferTest, ReadInStreamMoreThan4MB) {
     const std::string testFile = kTestDataPath + "morethan4MB.txt";
+    size_t fourMB = (size_t) 4 * 1024 * 1024;
     int fd = open(testFile.c_str(), O_RDONLY);
     ASSERT_NE(fd, -1);
     int pid = fork();
@@ -220,10 +245,18 @@
 
         ASSERT_EQ(NO_ERROR, buffer.readProcessedDataInStream(fd,
             p2cPipe.writeFd(), c2pPipe.readFd(), READ_TIMEOUT));
-        EXPECT_EQ(buffer.size(), (size_t) (4 * 1024 * 1024));
+        EXPECT_EQ(buffer.size(), fourMB);
         EXPECT_FALSE(buffer.timedOut());
         EXPECT_TRUE(buffer.truncated());
         wait(&pid);
+        FdBuffer::iterator it = buffer.begin();
+        it += fourMB;
+        EXPECT_EQ(it.bytesRead(), fourMB);
+        EXPECT_EQ(it, buffer.end());
+        for (FdBuffer::iterator it = buffer.begin(); it != buffer.end(); it++) {
+            char c = 'A' + (it.bytesRead() % 64 / 8);
+            ASSERT_TRUE(*it == c);
+        }
     }
 }
 
diff --git a/core/proto/android/os/incident.proto b/core/proto/android/os/incident.proto
index bef5e75..5dfcd2a 100644
--- a/core/proto/android/os/incident.proto
+++ b/core/proto/android/os/incident.proto
@@ -47,6 +47,9 @@
     Cause cause = 1;
 }
 
+// privacy field options must not be set at this level because all
+// the sections are able to be controlled and configured by section ids.
+// Instead privacy field options need to be configured in each section proto message.
 message IncidentProto {
     // Incident header
     repeated IncidentHeaderProto header = 1;
diff --git a/libs/incident/proto/android/privacy.proto b/libs/incident/proto/android/privacy.proto
index ae5af0e..5fd75d6 100644
--- a/libs/incident/proto/android/privacy.proto
+++ b/libs/incident/proto/android/privacy.proto
@@ -36,7 +36,7 @@
     // off the device with an explicit user action.
     DEST_EXPLICIT = 1;
 
-    // Fields or messages annotated with DEST_LOCAL can be sent by
+    // Fields or messages annotated with DEST_AUTOMATIC can be sent by
     // automatic means, without per-sending user consent. The user
     // still must have previously accepted a consent to share this
     // information.
@@ -47,8 +47,11 @@
 
 message PrivacyFlags {
   optional Destination dest = 1  [
-      default = DEST_LOCAL
+      default = DEST_EXPLICIT
   ];
+
+  // regex to filter pii sensitive info from a string field type
+  repeated string patterns = 2;
 }
 
 extend google.protobuf.FieldOptions {
diff --git a/tools/incident_section_gen/main.cpp b/tools/incident_section_gen/main.cpp
index 8e5c4f9..e76fef5 100644
--- a/tools/incident_section_gen/main.cpp
+++ b/tools/incident_section_gen/main.cpp
@@ -27,21 +27,24 @@
 using namespace google::protobuf::internal;
 using namespace std;
 
-static void generateHead(const char* header) {
-    printf("// Auto generated file. Do not modify\n");
-    printf("\n");
-    printf("#include \"%s.h\"\n", header);
+static inline void emptyline() {
     printf("\n");
 }
 
+static void generateHead(const char* header) {
+    printf("// Auto generated file. Do not modify\n");
+    emptyline();
+    printf("#include \"%s.h\"\n", header);
+    emptyline();
+}
+
 // ================================================================================
-static bool generateIncidentSectionsCpp()
+static bool generateIncidentSectionsCpp(Descriptor const* descriptor)
 {
     generateHead("incident_sections");
 
     map<string,FieldDescriptor const*> sections;
     int N;
-    Descriptor const* descriptor = IncidentProto::descriptor();
     N = descriptor->field_count();
     for (int i=0; i<N; i++) {
         const FieldDescriptor* field = descriptor->field(i);
@@ -85,11 +88,100 @@
     }
 }
 
-static bool generateSectionListCpp() {
+static const char* replaceAll(const string& field_name, const char oldC, const string& newS) {
+    if (field_name.find_first_of(oldC) == field_name.npos) return field_name.c_str();
+    size_t pos = 0, idx = 0;
+    char* res = new char[field_name.size() * newS.size() + 1]; // assign a larger buffer
+    while (pos != field_name.size()) {
+        char cur = field_name[pos++];
+        if (cur != oldC) {
+            res[idx++] = cur;
+            continue;
+        }
+
+        for (size_t i=0; i<newS.size(); i++) {
+            res[idx++] = newS[i];
+        }
+    }
+    res[idx] = '\0';
+    return res;
+}
+
+static inline bool isDefaultDest(const FieldDescriptor* field) {
+    return field->options().GetExtension(privacy).dest() == PrivacyFlags::default_instance().dest();
+}
+
+// Returns true if the descriptor doesn't have any non default privacy flags set, including its submessages
+static bool generatePrivacyFlags(const Descriptor* descriptor, const char* alias, map<string, bool> &msgNames) {
+    bool hasDefaultFlags[descriptor->field_count()];
+    // iterate though its field and generate sub flags first
+    for (int i=0; i<descriptor->field_count(); i++) {
+        hasDefaultFlags[i] = true; // set default to true
+        const FieldDescriptor* field = descriptor->field(i);
+        const char* field_name = replaceAll(field->full_name(), '.', "__");
+        // check if the same name is already defined
+        if (msgNames.find(field_name) != msgNames.end()) {
+            hasDefaultFlags[i] = msgNames[field_name];
+            continue;
+        };
+
+        PrivacyFlags p = field->options().GetExtension(privacy);
+
+        switch (field->type()) {
+            case FieldDescriptor::TYPE_MESSAGE:
+                if (generatePrivacyFlags(field->message_type(), field_name, msgNames) &&
+                    isDefaultDest(field)) break;
+
+                printf("static Privacy %s = { %d, %d, %d, NULL, %s_LIST };\n", field_name, field->number(),
+                        (int) field->type(), p.dest(), field_name);
+                hasDefaultFlags[i] = false;
+                break;
+            case FieldDescriptor::TYPE_STRING:
+                if (isDefaultDest(field) && p.patterns_size() == 0) break;
+
+                printf("static const char* %s_patterns[] = {\n", field_name);
+                for (int i=0; i<p.patterns_size(); i++) {
+                    // the generated string need to escape backslash as well, need to dup it here
+                    printf("    \"%s\",\n", replaceAll(p.patterns(i), '\\', "\\\\"));
+                }
+                printf("    NULL };\n");
+                printf("static Privacy %s = { %d, %d, %d, %s_patterns };\n", field_name, field->number(),
+                        (int) field->type(), p.dest(), field_name);
+                hasDefaultFlags[i] = false;
+                break;
+            default:
+                if (isDefaultDest(field)) break;
+                printf("static Privacy %s = { %d, %d, %d };\n", field_name, field->number(),
+                        (int) field->type(), p.dest());
+                hasDefaultFlags[i] = false;
+        }
+        // add the field name to message map, true means it has default flags
+        msgNames[field_name] = hasDefaultFlags[i];
+    }
+
+    bool allDefaults = true;
+    for (int i=0; i<descriptor->field_count(); i++) {
+        allDefaults &= hasDefaultFlags[i];
+    }
+    if (allDefaults) return true;
+
+    emptyline();
+    printf("const Privacy* %s_LIST[] = {\n", alias);
+    for (int i=0; i<descriptor->field_count(); i++) {
+        const FieldDescriptor* field = descriptor->field(i);
+        if (hasDefaultFlags[i]) continue;
+        printf("    &%s,\n", replaceAll(field->full_name(), '.', "__"));
+    }
+    printf("    NULL };\n");
+    emptyline();
+    return false;
+}
+
+static bool generateSectionListCpp(Descriptor const* descriptor) {
     generateHead("section_list");
 
+    // generates SECTION_LIST
     printf("const Section* SECTION_LIST[] = {\n");
-    Descriptor const* descriptor = IncidentProto::descriptor();
     for (int i=0; i<descriptor->field_count(); i++) {
         const FieldDescriptor* field = descriptor->field(i);
 
@@ -115,8 +207,30 @@
                 break;
         }
     }
-    printf("    NULL\n");
-    printf("};\n");
+    printf("    NULL };\n");
+    emptyline();
+
+    // generates DESTINATION enum values
+    EnumDescriptor const* destination = Destination_descriptor();
+    for (int i=0; i<destination->value_count(); i++) {
+        EnumValueDescriptor const* val = destination->value(i);
+        printf("const uint8_t %s = %d;\n", val->name().c_str(), val->number());
+    }
+    emptyline();
+    printf("const uint8_t DEST_DEFAULT_VALUE = %d;\n", PrivacyFlags::default_instance().dest());
+    emptyline();
+    // populates string type and message type values
+    printf("const uint8_t TYPE_STRING = %d;\n", (int) FieldDescriptor::TYPE_STRING);
+    printf("const uint8_t TYPE_MESSAGE = %d;\n", (int) FieldDescriptor::TYPE_MESSAGE);
+    emptyline();
+
+    // generates PRIVACY_POLICY
+    map<string, bool> messageNames;
+    if (generatePrivacyFlags(descriptor, "PRIVACY_POLICY", messageNames)) {
+        // if no privacy options set at all, define an empty list
+        printf("const Privacy* PRIVACY_POLICY_LIST[] = { NULL };\n");
+    }
+
     return true;
 }
 
@@ -126,11 +240,13 @@
     if (argc != 2) return 1;
     const char* module = argv[1];
 
+    Descriptor const* descriptor = IncidentProto::descriptor();
+
     if (strcmp(module, "incident") == 0) {
-        return !generateIncidentSectionsCpp();
+        return !generateIncidentSectionsCpp(descriptor);
     }
     if (strcmp(module, "incidentd") == 0 ) {
-        return !generateSectionListCpp();
+        return !generateSectionListCpp(descriptor);
     }
 
     // return failure if not called by the whitelisted modules