llkd: handle 'adbd shell setsid' to preserve adbd
A zombie setsid process occurs when adb shell setsid <command> is
issued, however llkd can only detect if it is a result of a kernel
livelock by killing the associated parent, which would be adbd;
resulting in the adb connection(s) being terminated. Will special
case this condition in order to preserve adbd for debugging purposes.
We parse <parent>&<child> in ro.llk.blacklist.parent as this
association, thus adbd&[setsid] covers this special case.
Ampersand was selected because it is never part of a process name,
however a setprop in the shell requires it to be escaped or quoted;
init rc file where this is normally specified does not have issue.
getComm() is effectively pure, so hold on to the return value for
sake of efficiency.
This also reverts commit 599958d1148c1137bee925f4cf08eda7f8050d98
which granted adbd blanket parent immunity from monitoring on
userdebug builds. The new logic is a more refined means of
preserving the live lock checking associated with adbd and allows
the operation to be performed on user builds.
POC: date ; adb shell setsid sleep 900 ; date
Positive for bug, reports less than 15 minutes, otherwise solved.
Test: llkd_unit_test
Bug: 120983740
Change-Id: I6442463a48499d925a3a074423a24a1622905559
diff --git a/llkd/README.md b/llkd/README.md
index 43bb94a..191f988 100644
--- a/llkd/README.md
+++ b/llkd/README.md
@@ -165,9 +165,14 @@
NB: false is a very very very unlikely process to want to blacklist.
#### ro.llk.blacklist.parent
-default 0,2,adbd (kernel, [kthreadd] and adbd).
+default 0,2,adbd&[setsid] (kernel, [kthreadd] and adbd *only for zombie setsid*).
Do not watch processes that have this parent.
-A parent process can be comm, cmdline or pid reference.
+An ampersand (*&*) separator is used to specify that the parent is ignored
+only in combination with the target child process.
+Ampersand was selected because it is never part of a process name,
+however a setprop in the shell requires it to be escaped or quoted;
+init rc file where this is normally specified does not have this issue.
+A parent or target processes can be specified as comm, cmdline or pid reference.
#### ro.llk.blacklist.uid
default *empty* or false, comma separated list of uid numbers or names.
diff --git a/llkd/include/llkd.h b/llkd/include/llkd.h
index 7b7dbf9..3586ca1 100644
--- a/llkd/include/llkd.h
+++ b/llkd/include/llkd.h
@@ -56,11 +56,7 @@
#define LLK_BLACKLIST_PROCESS_DEFAULT \
"0,1,2,init,[kthreadd],[khungtaskd],lmkd,llkd,watchdogd,[watchdogd],[watchdogd/0]"
#define LLK_BLACKLIST_PARENT_PROPERTY "ro.llk.blacklist.parent"
-#ifdef __PTRACE_ENABLED__ // defined if userdebug build
-#define LLK_BLACKLIST_PARENT_DEFAULT "0,2,[kthreadd],adbd"
-#else
-#define LLK_BLACKLIST_PARENT_DEFAULT "0,2,[kthreadd]"
-#endif
+#define LLK_BLACKLIST_PARENT_DEFAULT "0,2,[kthreadd],adbd&[setsid]"
#define LLK_BLACKLIST_UID_PROPERTY "ro.llk.blacklist.uid"
#define LLK_BLACKLIST_UID_DEFAULT ""
#define LLK_BLACKLIST_STACK_PROPERTY "ro.llk.blacklist.process.stack"
diff --git a/llkd/libllkd.cpp b/llkd/libllkd.cpp
index 3a593ec..3c295b5 100644
--- a/llkd/libllkd.cpp
+++ b/llkd/libllkd.cpp
@@ -108,6 +108,9 @@
// list of parent pids, comm or cmdline names to skip. default:
// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
std::unordered_set<std::string> llkBlacklistParent;
+// list of parent and target processes to skip. default:
+// adbd *and* [setsid]
+std::unordered_map<std::string, std::unordered_set<std::string>> llkBlacklistParentAndChild;
// list of uids, and uid names, to skip, default nothing
std::unordered_set<std::string> llkBlacklistUid;
#ifdef __PTRACE_ENABLED__
@@ -624,6 +627,19 @@
return ret;
}
+std::string llkFormat(
+ const std::unordered_map<std::string, std::unordered_set<std::string>>& blacklist,
+ bool leading_comma = false) {
+ std::string ret;
+ for (const auto& entry : blacklist) {
+ for (const auto& target : entry.second) {
+ if (leading_comma || !ret.empty()) ret += ",";
+ ret += entry.first + "&" + target;
+ }
+ }
+ return ret;
+}
+
// This function parses the properties as a list, incorporating the supplied
// default. A leading comma separator means preserve the defaults and add
// entries (with an optional leading + sign), or removes entries with a leading
@@ -691,6 +707,27 @@
return false;
}
+const std::unordered_set<std::string>& llkSkipName(
+ const std::string& name,
+ const std::unordered_map<std::string, std::unordered_set<std::string>>& blacklist) {
+ static const std::unordered_set<std::string> empty;
+ if (name.empty() || blacklist.empty()) return empty;
+ auto found = blacklist.find(name);
+ if (found == blacklist.end()) return empty;
+ return found->second;
+}
+
+bool llkSkipPproc(proc* pprocp, proc* procp,
+ const std::unordered_map<std::string, std::unordered_set<std::string>>&
+ blacklist = llkBlacklistParentAndChild) {
+ if (!pprocp || !procp || blacklist.empty()) return false;
+ if (llkSkipProc(procp, llkSkipName(std::to_string(pprocp->pid), blacklist))) return true;
+ if (llkSkipProc(procp, llkSkipName(pprocp->getComm(), blacklist))) return true;
+ if (llkSkipProc(procp, llkSkipName(pprocp->getCmdline(), blacklist))) return true;
+ return llkSkipProc(procp,
+ llkSkipName(android::base::Basename(pprocp->getCmdline()), blacklist));
+}
+
bool llkSkipPid(pid_t pid) {
return llkSkipName(std::to_string(pid), llkBlacklistProcess);
}
@@ -875,7 +912,8 @@
<< LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
#endif
<< LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
- << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
+ << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent)
+ << llkFormat(llkBlacklistParentAndChild, true) << "\n"
<< LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
}
@@ -1050,7 +1088,8 @@
break;
}
- if (llkSkipName(procp->getComm())) {
+ auto process_comm = procp->getComm();
+ if (llkSkipName(process_comm)) {
continue;
}
if (llkSkipName(procp->getCmdline())) {
@@ -1065,6 +1104,7 @@
pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
}
if (pprocp) {
+ if (llkSkipPproc(pprocp, procp)) break;
if (llkSkipProc(pprocp, llkBlacklistParent)) break;
} else {
if (llkSkipName(std::to_string(ppid), llkBlacklistParent)) break;
@@ -1084,7 +1124,7 @@
stuck = true;
} else if (procp->count != 0ms) {
LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
- << pid << "->" << tid << ' ' << procp->getComm();
+ << pid << "->" << tid << ' ' << process_comm;
}
}
if (!stuck) continue;
@@ -1092,7 +1132,7 @@
if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
if (procp->count != 0ms) {
LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
- << pid << "->" << tid << ' ' << procp->getComm();
+ << pid << "->" << tid << ' ' << process_comm;
}
continue;
}
@@ -1120,7 +1160,7 @@
break;
}
LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
- << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
+ << pid << "->" << tid << ' ' << process_comm << " [kill]";
if ((llkKillOneProcess(pprocp, procp) >= 0) ||
(llkKillOneProcess(ppid, procp) >= 0)) {
continue;
@@ -1137,7 +1177,7 @@
// kernel (worse).
default:
LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
- << "->" << tid << ' ' << procp->getComm() << " [kill]";
+ << "->" << tid << ' ' << process_comm << " [kill]";
if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
(llkKillOneProcess(pid, state, tid) >= 0) ||
(llkKillOneProcess(procp, procp) >= 0) ||
@@ -1150,7 +1190,7 @@
// We are here because we have confirmed kernel live-lock
const auto message = state + " "s + llkFormat(procp->count) + " " +
std::to_string(ppid) + "->" + std::to_string(pid) + "->" +
- std::to_string(tid) + " " + procp->getComm() + " [panic]";
+ std::to_string(tid) + " " + process_comm + " [panic]";
llkPanicKernel(dump, tid,
(state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping",
message);
@@ -1274,6 +1314,26 @@
llkBlacklistParent = llkSplit(LLK_BLACKLIST_PARENT_PROPERTY,
std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
"," LLK_BLACKLIST_PARENT_DEFAULT);
+ // derive llkBlacklistParentAndChild by moving entries with '&' from above
+ for (auto it = llkBlacklistParent.begin(); it != llkBlacklistParent.end();) {
+ auto pos = it->find('&');
+ if (pos == std::string::npos) {
+ ++it;
+ continue;
+ }
+ auto parent = it->substr(0, pos);
+ auto child = it->substr(pos + 1);
+ it = llkBlacklistParent.erase(it);
+
+ auto found = llkBlacklistParentAndChild.find(parent);
+ if (found == llkBlacklistParentAndChild.end()) {
+ llkBlacklistParentAndChild.emplace(std::make_pair(
+ std::move(parent), std::unordered_set<std::string>({std::move(child)})));
+ } else {
+ found->second.emplace(std::move(child));
+ }
+ }
+
llkBlacklistUid = llkSplit(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT);
// internal watchdog
diff --git a/llkd/tests/llkd_test.cpp b/llkd/tests/llkd_test.cpp
index d738935..96079cc 100644
--- a/llkd/tests/llkd_test.cpp
+++ b/llkd/tests/llkd_test.cpp
@@ -17,6 +17,7 @@
#include <fcntl.h>
#include <signal.h>
#include <stdint.h>
+#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -333,3 +334,37 @@
unlink(stack_pipe_file);
}
+
+// b/120983740
+TEST(llkd, adbd_and_setsid) {
+ if (checkKill("kernel_panic,sysrq,livelock,zombie")) {
+ return;
+ }
+ const auto period = llkdSleepPeriod('S');
+
+ // expect llkd.zombie to trigger, but not for adbd&[setsid]
+ // Create a Persistent Zombie setsid Process
+ pid_t child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (!child_pid) {
+ prctl(PR_SET_NAME, "adbd");
+ auto zombie_pid = fork();
+ ASSERT_LE(0, zombie_pid);
+ if (!zombie_pid) {
+ prctl(PR_SET_NAME, "setsid");
+ sleep(1);
+ exit(0);
+ }
+ sleep(period.count());
+ exit(42);
+ }
+
+ // Reverse of waitForPid, do _not_ expect kill
+ int wstatus;
+ ASSERT_LE(0, waitpid(child_pid, &wstatus, 0));
+ EXPECT_TRUE(WIFEXITED(wstatus));
+ if (WIFEXITED(wstatus)) {
+ EXPECT_EQ(42, WEXITSTATUS(wstatus));
+ }
+ ASSERT_FALSE(WIFSIGNALED(wstatus)) << "[ INFO ] signo=" << WTERMSIG(wstatus);
+}