logd: sepolicy dynamic rate limiting
Processing overhead for selinux violation messages is costly. We want
to deal with bursts of violations, but we have no intent of allowing
that sustained burst to go unabated as there is a cost of processing
and battery usage.
Tunables in libaudit.h are:
AUDIT_RATE_LIMIT_DEFAULT 20 /* acceptable burst rate */
AUDIT_RATE_LIMIT_BURST_DURATION 10 /* number of seconds of burst */
AUDIT_RATE_LIMIT_MAX 5 /* acceptable sustained rate */
Since we can only asymptotically handle DEFAULT rate, we set an upper
threshold of half way between the MAX and DEFAULT rate.
Default kernel audit subsystem message rate is set to 20 a second.
If sepolicy exceeds 125 violation messages over up to ten seconds
(>=~12/s), tell kernel audit subsystem to drop the rate to 5 messages
a second. If rate drops below 50 messages over the past ten seconds
(<5/s), tell kernel it is ok to increase the burst rate back to 20
messages a second.
Test: gTest logd-unit-tests --gtest_filter=logd.sepolicy_rate_limiter_*
Bug: 27878170
Change-Id: I843f8dcfbb3ecfbbe94a4865ea332c858e3be7f2
diff --git a/logd/LogAudit.cpp b/logd/LogAudit.cpp
index 11ffcb7..92d957f 100644
--- a/logd/LogAudit.cpp
+++ b/logd/LogAudit.cpp
@@ -43,7 +43,7 @@
'>'
LogAudit::LogAudit(LogBuffer *buf, LogReader *reader, int fdDmesg) :
- SocketListener(getLogSocket(), false),
+ SocketListener(mSock = getLogSocket(), false),
logbuf(buf),
reader(reader),
fdDmesg(fdDmesg),
@@ -51,19 +51,62 @@
BOOL_DEFAULT_TRUE)),
events(__android_logger_property_get_bool("ro.logd.auditd.events",
BOOL_DEFAULT_TRUE)),
- initialized(false) {
+ initialized(false),
+ tooFast(false) {
static const char auditd_message[] = { KMSG_PRIORITY(LOG_INFO),
'l', 'o', 'g', 'd', '.', 'a', 'u', 'd', 'i', 't', 'd', ':',
' ', 's', 't', 'a', 'r', 't', '\n' };
write(fdDmesg, auditd_message, sizeof(auditd_message));
}
+void LogAudit::checkRateLimit() {
+
+ // trim list for AUDIT_RATE_LIMIT_BURST_DURATION of history
+ log_time oldest(AUDIT_RATE_LIMIT_BURST_DURATION, 0);
+ bucket.emplace(android_log_clockid());
+ oldest = bucket.back() - oldest;
+ while (bucket.front() < oldest) bucket.pop();
+
+ static const size_t upperThreshold =
+ ((AUDIT_RATE_LIMIT_BURST_DURATION *
+ (AUDIT_RATE_LIMIT_DEFAULT + AUDIT_RATE_LIMIT_MAX)) + 1) /
+ 2;
+ if (bucket.size() >= upperThreshold) {
+ // Hit peak, slow down source
+ if (!tooFast) {
+ tooFast = true;
+ audit_rate_limit(mSock, AUDIT_RATE_LIMIT_MAX);
+ }
+
+ // We do not need to hold on to the full set of timing data history,
+ // let's ensure it does not grow without bounds. This also ensures
+ // that std::dequeue underneath behaves almost like a ring buffer.
+ do {
+ bucket.pop();
+ } while (bucket.size() >= upperThreshold);
+ return;
+ }
+
+ if (!tooFast) return;
+
+ static const size_t lowerThreshold = AUDIT_RATE_LIMIT_BURST_DURATION *
+ AUDIT_RATE_LIMIT_MAX;
+
+ if (bucket.size() >= lowerThreshold) return;
+
+ tooFast = false;
+ // Went below max sustained rate, allow source to speed up
+ audit_rate_limit(mSock, AUDIT_RATE_LIMIT_DEFAULT);
+}
+
bool LogAudit::onDataAvailable(SocketClient *cli) {
if (!initialized) {
prctl(PR_SET_NAME, "logd.auditd");
initialized = true;
}
+ checkRateLimit();
+
struct audit_message rep;
rep.nlh.nlmsg_type = 0;
@@ -75,8 +118,7 @@
return false;
}
- logPrint("type=%d %.*s",
- rep.nlh.nlmsg_type, rep.nlh.nlmsg_len, rep.data);
+ logPrint("type=%d %.*s", rep.nlh.nlmsg_type, rep.nlh.nlmsg_len, rep.data);
return true;
}
@@ -351,5 +393,6 @@
audit_close(fd);
fd = -1;
}
+ (void)audit_rate_limit(fd, AUDIT_RATE_LIMIT_DEFAULT);
return fd;
}
diff --git a/logd/LogAudit.h b/logd/LogAudit.h
index 844951d..045d631 100644
--- a/logd/LogAudit.h
+++ b/logd/LogAudit.h
@@ -17,6 +17,8 @@
#ifndef _LOGD_LOG_AUDIT_H__
#define _LOGD_LOG_AUDIT_H__
+#include <queue>
+
#include <sysutils/SocketListener.h>
#include "LogBuffer.h"
@@ -31,6 +33,11 @@
bool events;
bool initialized;
+ bool tooFast;
+ int mSock;
+ std::queue<log_time> bucket;
+ void checkRateLimit();
+
public:
LogAudit(LogBuffer *buf, LogReader *reader, int fdDmesg);
int log(char *buf, size_t len);
diff --git a/logd/libaudit.c b/logd/libaudit.c
index d2b212e..216f1a1 100644
--- a/logd/libaudit.c
+++ b/logd/libaudit.c
@@ -149,7 +149,7 @@
return rc;
}
-int audit_setup(int fd, uint32_t pid)
+int audit_setup(int fd, pid_t pid)
{
int rc;
struct audit_message rep;
@@ -163,8 +163,7 @@
* and the the mask set to AUDIT_STATUS_PID
*/
status.pid = pid;
- status.mask = AUDIT_STATUS_PID | AUDIT_STATUS_RATE_LIMIT;
- status.rate_limit = 20; // audit entries per second
+ status.mask = AUDIT_STATUS_PID;
/* Let the kernel know this pid will be registering for audit events */
rc = audit_send(fd, AUDIT_SET, &status, sizeof(status));
@@ -187,6 +186,27 @@
return 0;
}
+int audit_rate_limit(int fd, unsigned rate_limit)
+{
+ int rc;
+ struct audit_message rep;
+ struct audit_status status;
+
+ memset(&status, 0, sizeof(status));
+
+ status.mask = AUDIT_STATUS_RATE_LIMIT;
+ status.rate_limit = rate_limit; /* audit entries per second */
+
+ rc = audit_send(fd, AUDIT_SET, &status, sizeof(status));
+ if (rc < 0) {
+ return rc;
+ }
+
+ audit_get_reply(fd, &rep, GET_REPLY_NONBLOCKING, 0);
+
+ return 0;
+}
+
int audit_open()
{
return socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_AUDIT);
diff --git a/logd/libaudit.h b/logd/libaudit.h
index b9e330d..9865d43 100644
--- a/logd/libaudit.h
+++ b/logd/libaudit.h
@@ -33,7 +33,7 @@
#define MAX_AUDIT_MESSAGE_LENGTH 8970
typedef enum {
- GET_REPLY_BLOCKING=0,
+ GET_REPLY_BLOCKING = 0,
GET_REPLY_NONBLOCKING
} reply_t;
@@ -55,7 +55,7 @@
* A valid fd on success or < 0 on error with errno set.
* Returns the same errors as man 2 socket.
*/
-extern int audit_open(void);
+extern int audit_open(void);
/**
* Closes the fd returned from audit_open()
@@ -78,19 +78,36 @@
* @return
* This function returns 0 on success, else -errno.
*/
-extern int audit_get_reply(int fd, struct audit_message *rep, reply_t block,
- int peek);
+extern int audit_get_reply(int fd, struct audit_message *rep, reply_t block,
+ int peek);
/**
- * Sets a pid to recieve audit netlink events from the kernel
+ * Sets a pid to receive audit netlink events from the kernel
* @param fd
* The fd returned by a call to audit_open()
* @param pid
- * The pid whom to set as the reciever of audit messages
+ * The pid whom to set as the receiver of audit messages
* @return
* This function returns 0 on success, -errno on error.
*/
-extern int audit_setup(int fd, uint32_t pid);
+extern int audit_setup(int fd, pid_t pid);
+
+/**
+ * Sets the rate limit to receive audit netlink events from the kernel
+ * @param fd
+ * The fd returned by a call to audit_open()
+ * @param max_rate
+ * The cap of the maximum number of audit messages a second
+ * @return
+ * This function returns 0 on success, -errno on error.
+ */
+
+/* Guidelines to follow for dynamic rate_limit */
+#define AUDIT_RATE_LIMIT_DEFAULT 20 /* acceptable burst rate */
+#define AUDIT_RATE_LIMIT_BURST_DURATION 10 /* number of seconds of burst */
+#define AUDIT_RATE_LIMIT_MAX 5 /* acceptable sustained rate */
+
+extern int audit_rate_limit(int fd, unsigned rate_limit);
__END_DECLS
diff --git a/logd/tests/Android.mk b/logd/tests/Android.mk
index 808087a..c053993 100644
--- a/logd/tests/Android.mk
+++ b/logd/tests/Android.mk
@@ -27,12 +27,15 @@
# Unit tests.
# -----------------------------------------------------------------------------
+event_flag := -DAUDITD_LOG_TAG=1003 -DCHATTY_LOG_TAG=1004
+
test_c_flags := \
-fstack-protector-all \
-g \
-Wall -Wextra \
-Werror \
-fno-builtin \
+ $(event_flag)
test_src_files := \
logd_test.cpp
@@ -43,6 +46,6 @@
LOCAL_MODULE := $(test_module_prefix)unit-tests
LOCAL_MODULE_TAGS := $(test_tags)
LOCAL_CFLAGS += $(test_c_flags)
-LOCAL_SHARED_LIBRARIES := libbase libcutils liblog
+LOCAL_SHARED_LIBRARIES := libbase libcutils liblog libselinux
LOCAL_SRC_FILES := $(test_src_files)
include $(BUILD_NATIVE_TEST)
diff --git a/logd/tests/logd_test.cpp b/logd/tests/logd_test.cpp
index 5cd9fc8..703c0fb 100644
--- a/logd/tests/logd_test.cpp
+++ b/logd/tests/logd_test.cpp
@@ -20,6 +20,9 @@
#include <signal.h>
#include <stdio.h>
#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
#include <string>
@@ -28,7 +31,12 @@
#include <cutils/sockets.h>
#include <gtest/gtest.h>
#include <log/log.h>
+#include <private/android_filesystem_config.h>
+#ifdef __ANDROID__
+#include <selinux/selinux.h>
+#endif
+#include "../libaudit.h" // pickup AUDIT_RATE_LIMIT_*
#include "../LogReader.h" // pickup LOGD_SNDTIMEO
/*
@@ -873,10 +881,10 @@
++second_count;
}
} else if (eventData[4] == EVENT_TYPE_STRING) {
- // chatty
- if (tag != 1004) continue;
+ if (tag != CHATTY_LOG_TAG) continue;
++chatty_count;
// int len = get4LE(eventData + 4 + 1);
+ log_msg.buf[LOGGER_ENTRY_MAX_LEN] = '\0';
const char *cp = strstr(eventData + 4 + 1 + 4, " expire ");
if (!cp) continue;
unsigned val = 0;
@@ -908,3 +916,154 @@
TEST(logd, multiple_test_10) {
__android_log_btwrite_multiple__helper(10);
}
+
+#ifdef __ANDROID__
+// returns violating pid
+static pid_t sepolicy_rate(unsigned rate, unsigned num) {
+ pid_t pid = fork();
+
+ if (pid) {
+ siginfo_t info = {};
+ if (TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, WEXITED))) return 0;
+ if (info.si_status) return 0;
+ return pid;
+ }
+
+ // We may have DAC, but let's not have MAC
+ if (setcon("u:object_r:shell:s0") < 0) {
+ int save_errno = errno;
+ security_context_t context;
+ getcon(&context);
+ fprintf(stderr, "setcon(\"u:r:shell:s0\") failed @\"%s\" %s\n",
+ context, strerror(save_errno));
+ freecon(context);
+ _exit(-1);
+ // NOTREACHED
+ return 0;
+ }
+
+ // Requests dac_read_search, falls back to request dac_override
+ rate /= 2;
+ useconds_t usec;
+ if (rate == 0) {
+ rate = 1;
+ usec = 2000000;
+ } else {
+ usec = (1000000 + (rate / 2)) / rate;
+ }
+ num = (num + 1) / 2;
+
+ if (usec < 2) usec = 2;
+
+ while (num > 0) {
+ if (access(android::base::StringPrintf(
+ "/data/misc/logd/cannot_access_directory_%u",
+ num).c_str(),
+ F_OK) == 0) {
+ _exit(-1);
+ // NOTREACHED
+ return 0;
+ }
+ usleep(usec);
+ --num;
+ }
+ _exit(0);
+ // NOTREACHED
+ return 0;
+}
+
+static int count_avc(pid_t pid) {
+ int count = 0;
+
+ if (pid == 0) return count;
+
+ struct logger_list *logger_list;
+ if (!(logger_list = android_logger_list_open(LOG_ID_EVENTS,
+ ANDROID_LOG_RDONLY |
+ ANDROID_LOG_NONBLOCK,
+ 0,
+ pid))) return count;
+ for (;;) {
+ log_msg log_msg;
+
+ if (android_logger_list_read(logger_list, &log_msg) <= 0) break;
+
+ if ((log_msg.entry.pid != pid) ||
+ (log_msg.entry.len < (4 + 1 + 8)) ||
+ (log_msg.id() != LOG_ID_EVENTS)) continue;
+
+ char *eventData = log_msg.msg();
+ if (!eventData) continue;
+
+ uint32_t tag = get4LE(eventData);
+ if (tag != AUDITD_LOG_TAG) continue;
+
+ if (eventData[4] != EVENT_TYPE_STRING) continue;
+
+ // int len = get4LE(eventData + 4 + 1);
+ log_msg.buf[LOGGER_ENTRY_MAX_LEN] = '\0';
+ const char *cp = strstr(eventData + 4 + 1 + 4, "): avc: ");
+ if (!cp) continue;
+
+ ++count;
+ }
+
+ android_logger_list_close(logger_list);
+
+ return count;
+}
+#endif
+
+TEST(logd, sepolicy_rate_limiter_maximum) {
+#ifdef __ANDROID__
+ static const int rate = AUDIT_RATE_LIMIT_MAX;
+ static const int duration = 2;
+ // Two seconds of a liveable sustained rate
+ EXPECT_EQ(rate * duration, count_avc(sepolicy_rate(rate, rate * duration)));
+#else
+ GTEST_LOG_(INFO) << "This test does nothing.\n";
+#endif
+}
+
+TEST(logd, sepolicy_rate_limiter_sub_burst) {
+#ifdef __ANDROID__
+ // maximum period below half way between sustainable and burst rate.
+ static const int threshold = ((AUDIT_RATE_LIMIT_BURST_DURATION *
+ (AUDIT_RATE_LIMIT_DEFAULT +
+ AUDIT_RATE_LIMIT_MAX)) +
+ 1) / 2;
+ static const int rate = (threshold / AUDIT_RATE_LIMIT_BURST_DURATION) - 1;
+ static const int duration = AUDIT_RATE_LIMIT_BURST_DURATION;
+ EXPECT_EQ(rate * duration, count_avc(sepolicy_rate(rate, rate * duration)));
+#else
+ GTEST_LOG_(INFO) << "This test does nothing.\n";
+#endif
+}
+
+TEST(logd, sepolicy_rate_limiter_spam) {
+#ifdef __ANDROID__
+ // maximum period of double the maximum burst rate
+ static const int threshold = ((AUDIT_RATE_LIMIT_BURST_DURATION *
+ (AUDIT_RATE_LIMIT_DEFAULT +
+ AUDIT_RATE_LIMIT_MAX)) +
+ 1) / 2;
+ static const int rate = AUDIT_RATE_LIMIT_DEFAULT * 2;
+ static const int duration = threshold / AUDIT_RATE_LIMIT_DEFAULT;
+ EXPECT_GE(((AUDIT_RATE_LIMIT_DEFAULT * duration) * 115) /
+ 100, // +15% margin
+ count_avc(sepolicy_rate(rate, rate * duration)));
+ // give logd another 3 seconds to react to the burst before checking
+ sepolicy_rate(rate, rate * 3);
+ // maximum period at double the maximum burst rate (spam filter kicked in)
+ EXPECT_GE(((AUDIT_RATE_LIMIT_MAX * AUDIT_RATE_LIMIT_BURST_DURATION) * 130) /
+ 100, // +30% margin
+ count_avc(sepolicy_rate(rate,
+ rate * AUDIT_RATE_LIMIT_BURST_DURATION)));
+ // cool down, and check unspammy rate still works
+ sleep(2);
+ EXPECT_LE(AUDIT_RATE_LIMIT_BURST_DURATION - 1, // allow _one_ to be lost
+ count_avc(sepolicy_rate(1, AUDIT_RATE_LIMIT_BURST_DURATION)));
+#else
+ GTEST_LOG_(INFO) << "This test does nothing.\n";
+#endif
+}