Improve stack overflow diagnostics.
We notify debuggerd of problems by installing signal handlers. That's
fine except for when the signal is caused by us running off the end of
a thread's stack and into the guard page.
Bug: 8557703
Change-Id: I1ef65b4bb3bbca7e9a9743056177094921e60ed3
diff --git a/libc/bionic/pthread_attr.cpp b/libc/bionic/pthread_attr.cpp
index fe1ed4a..d7c6c13 100644
--- a/libc/bionic/pthread_attr.cpp
+++ b/libc/bionic/pthread_attr.cpp
@@ -30,12 +30,16 @@
#include "pthread_internal.h"
-#define DEFAULT_STACK_SIZE (1024 * 1024)
+// Traditionally we give threads a 1MiB stack. When we started allocating per-thread
+// alternate signal stacks to ease debugging of stack overflows, we subtracted the
+// same amount we were using there from the default thread stack size. This should
+// keep memory usage roughly constant.
+#define DEFAULT_THREAD_STACK_SIZE ((1 * 1024 * 1024) - SIGSTKSZ)
int pthread_attr_init(pthread_attr_t* attr) {
attr->flags = 0;
attr->stack_base = NULL;
- attr->stack_size = DEFAULT_STACK_SIZE;
+ attr->stack_size = DEFAULT_THREAD_STACK_SIZE;
attr->guard_size = PAGE_SIZE;
attr->sched_policy = SCHED_NORMAL;
attr->sched_priority = 0;
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index f45f5e7..5908a1b 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -62,6 +62,15 @@
thread->tls[i] = NULL;
}
+ // Create and set an alternate signal stack.
+ stack_t ss;
+ ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (ss.ss_sp != MAP_FAILED) {
+ ss.ss_size = SIGSTKSZ;
+ ss.ss_flags = 0;
+ sigaltstack(&ss, NULL);
+ }
+
// Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0.
thread->tls[TLS_SLOT_SELF] = thread->tls;
thread->tls[TLS_SLOT_THREAD_ID] = thread;
@@ -71,7 +80,9 @@
__set_tls(thread->tls);
}
-// This trampoline is called from the assembly _pthread_clone() function.
+// This trampoline is called from the assembly _pthread_clone function.
+// Our 'tls' and __pthread_clone's 'child_stack' are one and the same, just growing in
+// opposite directions.
extern "C" void __thread_entry(void* (*func)(void*), void* arg, void** tls) {
// Wait for our creating thread to release us. This lets it have time to
// notify gdb about this thread before we start doing anything.
@@ -187,8 +198,12 @@
thread->attr.flags |= PTHREAD_ATTR_FLAG_USER_STACK;
}
- // Make room for TLS.
+ // Make room for the TLS area.
+ // The child stack is the same address, just growing in the opposite direction.
+ // At offsets >= 0, we have the TLS slots.
+ // At offsets < 0, we have the child stack.
void** tls = (void**)((uint8_t*)(thread->attr.stack_base) + thread->attr.stack_size - BIONIC_TLS_SLOTS * sizeof(void*));
+ void* child_stack = tls;
// Create a mutex for the thread in TLS_SLOT_SELF to wait on once it starts so we can keep
// it from doing anything until after we notify the debugger about it
@@ -204,7 +219,7 @@
int flags = CLONE_FILES | CLONE_FS | CLONE_VM | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM;
- int tid = __pthread_clone(start_routine, tls, flags, arg);
+ int tid = __pthread_clone(start_routine, child_stack, flags, arg);
if (tid < 0) {
int clone_errno = errno;
if ((thread->attr.flags & PTHREAD_ATTR_FLAG_USER_STACK) == 0) {