Add thread pool class

Added a thread pool class loosely based on google3 code.

Modified the compiler to have a single thread pool instead of creating new threads in ForAll.

Moved barrier to be in top level directory as it is not GC specific code.

Performance Timings:

Reference:
boot.oat: 14.306596s
time mm oat-target:
real    2m33.748s
user    10m23.190s
sys 5m54.140s

Thread pool:
boot.oat: 13.111049s
time mm oat-target:
real    2m29.372s
user    10m3.130s
sys 5m46.290s

The speed increase is probably just noise.

Change-Id: If3c1280cbaa4c7e4361127d064ac744ea12cdf49
diff --git a/src/gc/mark_sweep.cc b/src/gc/mark_sweep.cc
index 0869e26..e93eb1a 100644
--- a/src/gc/mark_sweep.cc
+++ b/src/gc/mark_sweep.cc
@@ -527,7 +527,7 @@
   Thread* self;
 };
 
-class CheckpointMarkThreadRoots : public Thread::CheckpointFunction {
+class CheckpointMarkThreadRoots : public Closure {
  public:
   CheckpointMarkThreadRoots(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {
 
@@ -536,7 +536,8 @@
   virtual void Run(Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
     // Note: self is not necessarily equal to thread since thread may be suspended.
     Thread* self = Thread::Current();
-    DCHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc);
+    DCHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
+        << thread->GetState();
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     thread->VisitRoots(MarkSweep::MarkObjectVisitor, mark_sweep_);
     mark_sweep_->GetBarrier().Pass(self);