Fix alignment error for pthread_internal_t/pthread stack.

aligned attribute can only control compiler's behavior, but we
are manually allocating pthread_internal_t. So we need to make
sure of alignment manually.

Change-Id: Iea4c46eadf10dfd15dc955c5f41cf6063cfd8536
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index a4bd054..5389f14 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -162,15 +162,16 @@
   }
 
   // Mapped space(or user allocated stack) is used for:
-  //   thread_internal_t
+  //   pthread_internal_t
   //   thread stack (including guard page)
-  stack_top -= sizeof(pthread_internal_t);
+
+  // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
+  stack_top = reinterpret_cast<uint8_t*>(
+                (reinterpret_cast<uintptr_t>(stack_top) - sizeof(pthread_internal_t)) & ~0xf);
+
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
   attr->stack_size = stack_top - reinterpret_cast<uint8_t*>(attr->stack_base);
 
-  // No need to check stack_top alignment. The size of pthread_internal_t is 16-bytes aligned,
-  // and user allocated stack is guaranteed by pthread_attr_setstack.
-
   thread->mmap_size = mmap_size;
   thread->attr = *attr;
   __init_tls(thread);