ART can't create more than 8k threads during the worktime

ART uses LDT to point to the Java Thread structure. This structure
can holds up to 8k pointers, so we can have 8k threads simultaneously.
But the current implementation does not free slots for the finished threads.
This means there cannot be more than 8k threads created during the whole
life cycle.
This patch implements the LDT slots freeing mechanism.

Change-Id: Ifcf8fe1f4434a13f940146fff39b9c7bf91ee17b
Signed-off-by: Alexei Zavjalov <alexei.zavjalov@intel.com>
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 39bad58..235da99 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -40,8 +40,9 @@
 
 namespace art {
 
+static Mutex modify_ldt_lock("modify_ldt lock");
+
 void Thread::InitCpu() {
-  static Mutex modify_ldt_lock("modify_ldt lock");
   MutexLock mu(Thread::Current(), modify_ldt_lock);
 
   const uintptr_t base = reinterpret_cast<uintptr_t>(this);
@@ -113,7 +114,6 @@
   uint16_t table_indicator = 1 << 2;  // LDT
   uint16_t rpl = 3;  // Requested privilege level
   uint16_t selector = (entry_number << 3) | table_indicator | rpl;
-  // TODO: use our assembler to generate code
   __asm__ __volatile__("movw %w0, %%fs"
       :    // output
       : "q"(selector)  // input
@@ -124,7 +124,6 @@
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
-  // TODO: use our assembler to generate code
   CHECK_EQ(THREAD_SELF_OFFSET, OFFSETOF_MEMBER(Thread, self_));
   __asm__ __volatile__("movl %%fs:(%1), %0"
       : "=r"(self_check)  // output
@@ -138,4 +137,36 @@
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
+void Thread::CleanupCpu() {
+  MutexLock mu(Thread::Current(), modify_ldt_lock);
+
+  // Sanity check that reads from %fs point to this Thread*.
+  Thread* self_check;
+  __asm__ __volatile__("movl %%fs:(%1), %0"
+      : "=r"(self_check)  // output
+      : "r"(THREAD_SELF_OFFSET)  // input
+      :);  // clobber
+  CHECK_EQ(self_check, this);
+
+  // Extract the LDT entry number from the FS register.
+  uint16_t selector;
+  __asm__ __volatile__("movw %%fs, %w0"
+      : "=q"(selector)  // output
+      :  // input
+      :);  // clobber
+
+  // Free LDT entry.
+#if defined(__APPLE__)
+  i386_set_ldt(selector >> 3, 0, 1);
+#else
+  user_desc ldt_entry;
+  memset(&ldt_entry, 0, sizeof(ldt_entry));
+  ldt_entry.entry_number = selector >> 3;
+  ldt_entry.contents = MODIFY_LDT_CONTENTS_DATA;
+  ldt_entry.seg_not_present = 1;
+
+  syscall(__NR_modify_ldt, 1, &ldt_entry, sizeof(ldt_entry));
+#endif
+}
+
 }  // namespace art