Implement PII stripper in incidentd, part 1
1. automatically parse privacy options and generate lookup table
2. create FdBuffer iterator API in order to remove dependency on Reporter.h
Bug: 64687253
Test: Unit test for iterator API, and manually tested lookup table
Change-Id: I1ea376a4481fc4afc7bdf447936f767b63690fd3
diff --git a/cmds/incidentd/src/FdBuffer.cpp b/cmds/incidentd/src/FdBuffer.cpp
index 7743301..4d6a36c 100644
--- a/cmds/incidentd/src/FdBuffer.cpp
+++ b/cmds/incidentd/src/FdBuffer.cpp
@@ -174,7 +174,7 @@
if (rpos >= wpos) {
amt = ::read(fd, cirBuf + rpos, BUFFER_SIZE - rpos);
} else {
- amt = :: read(fd, cirBuf + rpos, wpos - rpos);
+ amt = ::read(fd, cirBuf + rpos, wpos - rpos);
}
if (amt < 0) {
if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
@@ -241,6 +241,7 @@
size_t
FdBuffer::size()
{
+ if (mBuffers.empty()) return 0;
return ((mBuffers.size() - 1) * BUFFER_SIZE) + mCurrentWritten;
}
@@ -255,4 +256,30 @@
return NO_ERROR;
}
+FdBuffer::iterator
+FdBuffer::end()
+{
+ if (mBuffers.empty() || mCurrentWritten < 0) return begin();
+ if (mCurrentWritten == BUFFER_SIZE)
+ // FdBuffer doesn't allocate another buf since no more bytes to read.
+ return FdBuffer::iterator(*this, mBuffers.size(), 0);
+ return FdBuffer::iterator(*this, mBuffers.size() - 1, mCurrentWritten);
+}
+FdBuffer::iterator&
+FdBuffer::iterator::operator+(size_t offset)
+{
+ size_t newOffset = mOffset + offset;
+ while (newOffset >= BUFFER_SIZE) {
+ mIndex++;
+ newOffset -= BUFFER_SIZE;
+ }
+ mOffset = newOffset;
+ return *this;
+}
+
+size_t
+FdBuffer::iterator::bytesRead()
+{
+ return mIndex * BUFFER_SIZE + mOffset;
+}
diff --git a/cmds/incidentd/src/FdBuffer.h b/cmds/incidentd/src/FdBuffer.h
index 03a6d18..e9a53ff 100644
--- a/cmds/incidentd/src/FdBuffer.h
+++ b/cmds/incidentd/src/FdBuffer.h
@@ -21,7 +21,6 @@
#include <utils/Errors.h>
-#include <set>
#include <vector>
using namespace android;
@@ -74,7 +73,8 @@
size_t size();
/**
- * Write the data that we recorded to the fd given.
+ * [Deprecated] Write the data that we recorded to the fd given.
+ * TODO: remove it once the iterator api is working
*/
status_t write(ReportRequestSet* requests);
@@ -83,6 +83,37 @@
*/
int64_t durationMs() { return mFinishTime - mStartTime; }
+ /**
+ * Read data stored in FdBuffer
+ */
+ class iterator;
+ friend class iterator;
+ class iterator : public std::iterator<std::random_access_iterator_tag, uint8_t> {
+ private:
+ FdBuffer& mFdBuffer;
+ size_t mIndex;
+ size_t mOffset;
+ public:
+ explicit iterator(FdBuffer& buffer, ssize_t index, ssize_t offset)
+ : mFdBuffer(buffer), mIndex(index), mOffset(offset) {}
+ iterator& operator=(iterator& other) { return other; }
+ iterator& operator+(size_t offset); // this is implemented in .cpp
+ iterator& operator+=(size_t offset) { return *this + offset; }
+ iterator& operator++() { return *this + 1; }
+ iterator operator++(int) { return *this + 1; }
+ bool operator==(iterator other) const {
+ return mIndex == other.mIndex && mOffset == other.mOffset;
+ }
+ bool operator!=(iterator other) const { return !(*this == other); }
+ reference operator*() const { return mFdBuffer.mBuffers[mIndex][mOffset]; }
+
+ // random access could make the iterator out of bound
+ size_t bytesRead();
+ bool outOfBound() { return bytesRead() > mFdBuffer.size(); };
+ };
+ iterator begin() { return iterator(*this, 0, 0); }
+ iterator end();
+
private:
vector<uint8_t*> mBuffers;
int64_t mStartTime;
diff --git a/cmds/incidentd/src/Section.cpp b/cmds/incidentd/src/Section.cpp
index 0f6f38e..ac87fe3 100644
--- a/cmds/incidentd/src/Section.cpp
+++ b/cmds/incidentd/src/Section.cpp
@@ -22,9 +22,6 @@
#include <private/android_filesystem_config.h>
#include <binder/IServiceManager.h>
#include <mutex>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
#include <wait.h>
#include <unistd.h>
@@ -37,7 +34,7 @@
static pid_t
forkAndExecuteIncidentHelper(const int id, const char* name, Fpipe& p2cPipe, Fpipe& c2pPipe)
{
- const char* ihArgs[] { INCIDENT_HELPER, "-s", to_string(id).c_str(), NULL };
+ const char* ihArgs[] { INCIDENT_HELPER, "-s", String8::format("%d", id).string(), NULL };
// fork used in multithreaded environment, avoid adding unnecessary code in child process
pid_t pid = fork();
diff --git a/cmds/incidentd/src/protobuf.cpp b/cmds/incidentd/src/protobuf.cpp
index a703ef9..b865339 100644
--- a/cmds/incidentd/src/protobuf.cpp
+++ b/cmds/incidentd/src/protobuf.cpp
@@ -16,6 +16,7 @@
#include "protobuf.h"
+
uint8_t*
write_raw_varint(uint8_t* buf, uint32_t val)
{
diff --git a/cmds/incidentd/src/section_list.h b/cmds/incidentd/src/section_list.h
index c977519..1abdb52 100644
--- a/cmds/incidentd/src/section_list.h
+++ b/cmds/incidentd/src/section_list.h
@@ -21,8 +21,46 @@
/**
* This is the mapping of section IDs to the commands that are run to get those commands.
+ * The section IDs are guaranteed in ascending order
*/
extern const Section* SECTION_LIST[];
+/*
+ * In order not to use libprotobuf-cpp-full nor libplatformprotos in incidentd
+ * privacy options's data structure are explicityly redefined in this file.
+ */
+
+// DESTINATION enum
+extern const uint8_t DEST_LOCAL;
+extern const uint8_t DEST_EXPLICIT;
+extern const uint8_t DEST_AUTOMATIC;
+
+// This is the default value of DEST enum
+// field with this value doesn't generate Privacy to save too much generated code
+extern const uint8_t DEST_DEFAULT_VALUE;
+
+// type of the field, identitical to protobuf definition
+extern const uint8_t TYPE_STRING;
+extern const uint8_t TYPE_MESSAGE;
+
+struct Privacy {
+ int field_id;
+ uint8_t type;
+
+ // the following two fields are identitical to
+ // frameworks/base/libs/incident/proto/android/privacy.proto
+ uint8_t dest;
+ const char** patterns;
+
+ // ignore parent's privacy flags if children are set, NULL-terminated
+ const Privacy** children;
+};
+
+/**
+ * This is the mapping of section IDs to each section's privacy policy.
+ * The section IDs are guaranteed in ascending order
+ */
+extern const Privacy* PRIVACY_POLICY_LIST[];
+
#endif // SECTION_LIST_H
diff --git a/cmds/incidentd/tests/FdBuffer_test.cpp b/cmds/incidentd/tests/FdBuffer_test.cpp
index ba8b77a..403a2ab 100644
--- a/cmds/incidentd/tests/FdBuffer_test.cpp
+++ b/cmds/incidentd/tests/FdBuffer_test.cpp
@@ -92,6 +92,30 @@
AssertBufferContent(testdata.c_str());
}
+TEST_F(FdBufferTest, IterateEmpty) {
+ FdBuffer::iterator it = buffer.begin();
+ EXPECT_EQ(it, buffer.end());
+ it += 1;
+ EXPECT_TRUE(it.outOfBound());
+}
+
+TEST_F(FdBufferTest, ReadAndIterate) {
+ std::string testdata = "FdBuffer test string";
+ ASSERT_TRUE(WriteStringToFile(testdata, tf.path, false));
+ ASSERT_EQ(NO_ERROR, buffer.read(tf.fd, READ_TIMEOUT));
+
+ int i=0;
+ for (FdBuffer::iterator it = buffer.begin(); it != buffer.end(); ++it) {
+ EXPECT_EQ(*it, (uint8_t)testdata[i++]);
+ }
+
+ FdBuffer::iterator it = buffer.begin();
+ it += buffer.size();
+ EXPECT_EQ(it, buffer.end());
+ EXPECT_EQ(it.bytesRead(), testdata.size());
+ EXPECT_FALSE(it.outOfBound());
+}
+
TEST_F(FdBufferTest, ReadTimeout) {
int pid = fork();
ASSERT_TRUE(pid != -1);
@@ -202,6 +226,7 @@
TEST_F(FdBufferTest, ReadInStreamMoreThan4MB) {
const std::string testFile = kTestDataPath + "morethan4MB.txt";
+ size_t fourMB = (size_t) 4 * 1024 * 1024;
int fd = open(testFile.c_str(), O_RDONLY);
ASSERT_NE(fd, -1);
int pid = fork();
@@ -220,10 +245,18 @@
ASSERT_EQ(NO_ERROR, buffer.readProcessedDataInStream(fd,
p2cPipe.writeFd(), c2pPipe.readFd(), READ_TIMEOUT));
- EXPECT_EQ(buffer.size(), (size_t) (4 * 1024 * 1024));
+ EXPECT_EQ(buffer.size(), fourMB);
EXPECT_FALSE(buffer.timedOut());
EXPECT_TRUE(buffer.truncated());
wait(&pid);
+ FdBuffer::iterator it = buffer.begin();
+ it += fourMB;
+ EXPECT_EQ(it.bytesRead(), fourMB);
+ EXPECT_EQ(it, buffer.end());
+ for (FdBuffer::iterator it = buffer.begin(); it != buffer.end(); it++) {
+ char c = 'A' + (it.bytesRead() % 64 / 8);
+ ASSERT_TRUE(*it == c);
+ }
}
}
diff --git a/core/proto/android/os/incident.proto b/core/proto/android/os/incident.proto
index bef5e75..5dfcd2a 100644
--- a/core/proto/android/os/incident.proto
+++ b/core/proto/android/os/incident.proto
@@ -47,6 +47,9 @@
Cause cause = 1;
}
+// privacy field options must not be set at this level because all
+// the sections are able to be controlled and configured by section ids.
+// Instead privacy field options need to be configured in each section proto message.
message IncidentProto {
// Incident header
repeated IncidentHeaderProto header = 1;
diff --git a/libs/incident/proto/android/privacy.proto b/libs/incident/proto/android/privacy.proto
index ae5af0e..5fd75d6 100644
--- a/libs/incident/proto/android/privacy.proto
+++ b/libs/incident/proto/android/privacy.proto
@@ -36,7 +36,7 @@
// off the device with an explicit user action.
DEST_EXPLICIT = 1;
- // Fields or messages annotated with DEST_LOCAL can be sent by
+ // Fields or messages annotated with DEST_AUTOMATIC can be sent by
// automatic means, without per-sending user consent. The user
// still must have previously accepted a consent to share this
// information.
@@ -47,8 +47,11 @@
message PrivacyFlags {
optional Destination dest = 1 [
- default = DEST_LOCAL
+ default = DEST_EXPLICIT
];
+
+ // regex to filter pii sensitive info from a string field type
+ repeated string patterns = 2;
}
extend google.protobuf.FieldOptions {
diff --git a/tools/incident_section_gen/main.cpp b/tools/incident_section_gen/main.cpp
index 8e5c4f9..e76fef5 100644
--- a/tools/incident_section_gen/main.cpp
+++ b/tools/incident_section_gen/main.cpp
@@ -27,21 +27,24 @@
using namespace google::protobuf::internal;
using namespace std;
-static void generateHead(const char* header) {
- printf("// Auto generated file. Do not modify\n");
- printf("\n");
- printf("#include \"%s.h\"\n", header);
+static inline void emptyline() {
printf("\n");
}
+static void generateHead(const char* header) {
+ printf("// Auto generated file. Do not modify\n");
+ emptyline();
+ printf("#include \"%s.h\"\n", header);
+ emptyline();
+}
+
// ================================================================================
-static bool generateIncidentSectionsCpp()
+static bool generateIncidentSectionsCpp(Descriptor const* descriptor)
{
generateHead("incident_sections");
map<string,FieldDescriptor const*> sections;
int N;
- Descriptor const* descriptor = IncidentProto::descriptor();
N = descriptor->field_count();
for (int i=0; i<N; i++) {
const FieldDescriptor* field = descriptor->field(i);
@@ -85,11 +88,100 @@
}
}
-static bool generateSectionListCpp() {
+static const char* replaceAll(const string& field_name, const char oldC, const string& newS) {
+ if (field_name.find_first_of(oldC) == field_name.npos) return field_name.c_str();
+ size_t pos = 0, idx = 0;
+ char* res = new char[field_name.size() * newS.size() + 1]; // assign a larger buffer
+ while (pos != field_name.size()) {
+ char cur = field_name[pos++];
+ if (cur != oldC) {
+ res[idx++] = cur;
+ continue;
+ }
+
+ for (size_t i=0; i<newS.size(); i++) {
+ res[idx++] = newS[i];
+ }
+ }
+ res[idx] = '\0';
+ return res;
+}
+
+static inline bool isDefaultDest(const FieldDescriptor* field) {
+ return field->options().GetExtension(privacy).dest() == PrivacyFlags::default_instance().dest();
+}
+
+// Returns true if the descriptor doesn't have any non default privacy flags set, including its submessages
+static bool generatePrivacyFlags(const Descriptor* descriptor, const char* alias, map<string, bool> &msgNames) {
+ bool hasDefaultFlags[descriptor->field_count()];
+ // iterate though its field and generate sub flags first
+ for (int i=0; i<descriptor->field_count(); i++) {
+ hasDefaultFlags[i] = true; // set default to true
+ const FieldDescriptor* field = descriptor->field(i);
+ const char* field_name = replaceAll(field->full_name(), '.', "__");
+ // check if the same name is already defined
+ if (msgNames.find(field_name) != msgNames.end()) {
+ hasDefaultFlags[i] = msgNames[field_name];
+ continue;
+ };
+
+ PrivacyFlags p = field->options().GetExtension(privacy);
+
+ switch (field->type()) {
+ case FieldDescriptor::TYPE_MESSAGE:
+ if (generatePrivacyFlags(field->message_type(), field_name, msgNames) &&
+ isDefaultDest(field)) break;
+
+ printf("static Privacy %s = { %d, %d, %d, NULL, %s_LIST };\n", field_name, field->number(),
+ (int) field->type(), p.dest(), field_name);
+ hasDefaultFlags[i] = false;
+ break;
+ case FieldDescriptor::TYPE_STRING:
+ if (isDefaultDest(field) && p.patterns_size() == 0) break;
+
+ printf("static const char* %s_patterns[] = {\n", field_name);
+ for (int i=0; i<p.patterns_size(); i++) {
+ // the generated string need to escape backslash as well, need to dup it here
+ printf(" \"%s\",\n", replaceAll(p.patterns(i), '\\', "\\\\"));
+ }
+ printf(" NULL };\n");
+ printf("static Privacy %s = { %d, %d, %d, %s_patterns };\n", field_name, field->number(),
+ (int) field->type(), p.dest(), field_name);
+ hasDefaultFlags[i] = false;
+ break;
+ default:
+ if (isDefaultDest(field)) break;
+ printf("static Privacy %s = { %d, %d, %d };\n", field_name, field->number(),
+ (int) field->type(), p.dest());
+ hasDefaultFlags[i] = false;
+ }
+ // add the field name to message map, true means it has default flags
+ msgNames[field_name] = hasDefaultFlags[i];
+ }
+
+ bool allDefaults = true;
+ for (int i=0; i<descriptor->field_count(); i++) {
+ allDefaults &= hasDefaultFlags[i];
+ }
+ if (allDefaults) return true;
+
+ emptyline();
+ printf("const Privacy* %s_LIST[] = {\n", alias);
+ for (int i=0; i<descriptor->field_count(); i++) {
+ const FieldDescriptor* field = descriptor->field(i);
+ if (hasDefaultFlags[i]) continue;
+ printf(" &%s,\n", replaceAll(field->full_name(), '.', "__"));
+ }
+ printf(" NULL };\n");
+ emptyline();
+ return false;
+}
+
+static bool generateSectionListCpp(Descriptor const* descriptor) {
generateHead("section_list");
+ // generates SECTION_LIST
printf("const Section* SECTION_LIST[] = {\n");
- Descriptor const* descriptor = IncidentProto::descriptor();
for (int i=0; i<descriptor->field_count(); i++) {
const FieldDescriptor* field = descriptor->field(i);
@@ -115,8 +207,30 @@
break;
}
}
- printf(" NULL\n");
- printf("};\n");
+ printf(" NULL };\n");
+ emptyline();
+
+ // generates DESTINATION enum values
+ EnumDescriptor const* destination = Destination_descriptor();
+ for (int i=0; i<destination->value_count(); i++) {
+ EnumValueDescriptor const* val = destination->value(i);
+ printf("const uint8_t %s = %d;\n", val->name().c_str(), val->number());
+ }
+ emptyline();
+ printf("const uint8_t DEST_DEFAULT_VALUE = %d;\n", PrivacyFlags::default_instance().dest());
+ emptyline();
+ // populates string type and message type values
+ printf("const uint8_t TYPE_STRING = %d;\n", (int) FieldDescriptor::TYPE_STRING);
+ printf("const uint8_t TYPE_MESSAGE = %d;\n", (int) FieldDescriptor::TYPE_MESSAGE);
+ emptyline();
+
+ // generates PRIVACY_POLICY
+ map<string, bool> messageNames;
+ if (generatePrivacyFlags(descriptor, "PRIVACY_POLICY", messageNames)) {
+ // if no privacy options set at all, define an empty list
+ printf("const Privacy* PRIVACY_POLICY_LIST[] = { NULL };\n");
+ }
+
return true;
}
@@ -126,11 +240,13 @@
if (argc != 2) return 1;
const char* module = argv[1];
+ Descriptor const* descriptor = IncidentProto::descriptor();
+
if (strcmp(module, "incident") == 0) {
- return !generateIncidentSectionsCpp();
+ return !generateIncidentSectionsCpp(descriptor);
}
if (strcmp(module, "incidentd") == 0 ) {
- return !generateSectionListCpp();
+ return !generateSectionListCpp(descriptor);
}
// return failure if not called by the whitelisted modules