Merge "Change test 149 to run for 10 seconds"
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d776fb4..c8d6ddc 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1745,6 +1745,7 @@
 
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1770,29 +1771,57 @@
   Register dstBegin = XRegisterFrom(locations->InAt(4));
 
   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
-  Register src_ptr_end = XRegisterFrom(locations->GetTemp(1));
+  Register num_chr = XRegisterFrom(locations->GetTemp(1));
+  Register tmp1 = XRegisterFrom(locations->GetTemp(2));
 
   UseScratchRegisterScope temps(masm);
   Register dst_ptr = temps.AcquireX();
-  Register tmp = temps.AcquireW();
+  Register tmp2 = temps.AcquireX();
 
-  // src range to copy.
+  // src address to copy from.
   __ Add(src_ptr, srcObj, Operand(value_offset));
-  __ Add(src_ptr_end, src_ptr, Operand(srcEnd, LSL, 1));
   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
 
-  // dst to be copied.
+  // dst address start to copy to.
   __ Add(dst_ptr, dstObj, Operand(data_offset));
   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1));
 
+  __ Sub(num_chr, srcEnd, srcBegin);
+
   // Do the copy.
-  vixl::Label loop, done;
+  vixl::Label loop;
+  vixl::Label done;
+  vixl::Label remainder;
+
+  // Early out for valid zero-length retrievals.
+  __ Cbz(num_chr, &done);
+
+  // Save repairing the value of num_chr on the < 8 character path.
+  __ Subs(tmp1, num_chr, 8);
+  __ B(lt, &remainder);
+
+  // Keep the result of the earlier subs, we are going to fetch at least 8 characters.
+  __ Mov(num_chr, tmp1);
+
+  // Main loop used for longer fetches loads and stores 8x16-bit characters at a time.
+  // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.)
   __ Bind(&loop);
-  __ Cmp(src_ptr, src_ptr_end);
-  __ B(&done, eq);
-  __ Ldrh(tmp, MemOperand(src_ptr, char_size, vixl::PostIndex));
-  __ Strh(tmp, MemOperand(dst_ptr, char_size, vixl::PostIndex));
-  __ B(&loop);
+  __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, vixl::PostIndex));
+  __ Subs(num_chr, num_chr, 8);
+  __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, vixl::PostIndex));
+  __ B(ge, &loop);
+
+  __ Adds(num_chr, num_chr, 8);
+  __ B(eq, &done);
+
+  // Main loop for < 8 character case and remainder handling. Loads and stores one
+  // 16-bit Java character at a time.
+  __ Bind(&remainder);
+  __ Ldrh(tmp1, MemOperand(src_ptr, char_size, vixl::PostIndex));
+  __ Subs(num_chr, num_chr, 1);
+  __ Strh(tmp1, MemOperand(dst_ptr, char_size, vixl::PostIndex));
+  __ B(gt, &remainder);
+
   __ Bind(&done);
 }
 
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index f58af5a..5bdb36c 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -418,26 +418,6 @@
   (*icu_cleanup_fn)();
 
   Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
-
-  // Manually closing the JNI libraries.
-  // Runtime does not support repeatedly doing JNI->CreateVM, thus we need to manually clean up the
-  // dynamic linking loader so that gtests would not fail.
-  // Bug: 25785594
-  if (runtime_->IsStarted()) {
-    {
-      // We retrieve the handle by calling dlopen on the library. To close it, we need to call
-      // dlclose twice, the first time to undo our dlopen and the second time to actually unload it.
-      // See man dlopen.
-      void* handle = dlopen("libjavacore.so", RTLD_LAZY);
-      dlclose(handle);
-      CHECK_EQ(0, dlclose(handle));
-    }
-    {
-      void* handle = dlopen("libopenjdkd.so", RTLD_LAZY);
-      dlclose(handle);
-      CHECK_EQ(0, dlclose(handle));
-    }
-  }
 }
 
 static std::string GetDexFileName(const std::string& jar_prefix, bool host) {
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index d983a9f..c216412 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -74,6 +74,10 @@
     if (self != nullptr) {
       self->GetJniEnv()->DeleteWeakGlobalRef(class_loader_);
     }
+
+    if (!needs_native_bridge_) {
+      android::CloseNativeLibrary(handle_);
+    }
   }
 
   jweak GetClassLoader() const {
@@ -271,8 +275,7 @@
       REQUIRES(!Locks::jni_libraries_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     ScopedObjectAccessUnchecked soa(Thread::Current());
-    typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
-    std::vector<JNI_OnUnloadFn> unload_functions;
+    std::vector<SharedLibrary*> unload_libraries;
     {
       MutexLock mu(soa.Self(), *Locks::jni_libraries_lock_);
       for (auto it = libraries_.begin(); it != libraries_.end(); ) {
@@ -283,15 +286,7 @@
         // the native libraries of the boot class loader.
         if (class_loader != nullptr &&
             soa.Self()->IsJWeakCleared(class_loader)) {
-          void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);
-          if (sym == nullptr) {
-            VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]";
-          } else {
-            VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]";
-            JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
-            unload_functions.push_back(jni_on_unload);
-          }
-          delete library;
+          unload_libraries.push_back(library);
           it = libraries_.erase(it);
         } else {
           ++it;
@@ -299,9 +294,17 @@
       }
     }
     // Do this without holding the jni libraries lock to prevent possible deadlocks.
-    for (JNI_OnUnloadFn fn : unload_functions) {
-      VLOG(jni) << "Calling JNI_OnUnload";
-      (*fn)(soa.Vm(), nullptr);
+    typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
+    for (auto library : unload_libraries) {
+      void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);
+      if (sym == nullptr) {
+        VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]";
+      } else {
+        VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]: Calling...";
+        JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
+        jni_on_unload(soa.Vm(), nullptr);
+      }
+      delete library;
     }
   }
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 28f863e..b9ee442 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -924,10 +924,22 @@
 
   Runtime* runtime = Runtime::Current();
   bool implicit_stack_check = !runtime->ExplicitStackOverflowChecks() && !runtime->IsAotCompiler();
+
+  // Valgrind on arm doesn't give the right values here. Do not install the guard page, and
+  // effectively disable stack overflow checks (we'll get segfaults, potentially) by setting
+  // stack_begin to 0.
+  const bool valgrind_on_arm =
+      (kRuntimeISA == kArm || kRuntimeISA == kArm64) &&
+      kMemoryToolIsValgrind &&
+      RUNNING_ON_MEMORY_TOOL != 0;
+  if (valgrind_on_arm) {
+    tlsPtr_.stack_begin = nullptr;
+  }
+
   ResetDefaultStackEnd();
 
   // Install the protected region if we are doing implicit overflow checks.
-  if (implicit_stack_check) {
+  if (implicit_stack_check && !valgrind_on_arm) {
     // The thread might have protected region at the bottom.  We need
     // to install our own region so we need to move the limits
     // of the stack to make room for it.
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
index c9110a9..b729301 100644
--- a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -27,8 +27,20 @@
 namespace {
 
 static volatile std::atomic<bool> vm_was_shutdown(false);
+static const int kThreadCount = 4;
+
+static std::atomic<int> barrier_count(kThreadCount + 1);
+
+static void JniThreadBarrierWait() {
+  barrier_count--;
+  while (barrier_count.load() != 0) {
+    usleep(1000);
+  }
+}
 
 extern "C" JNIEXPORT void JNICALL Java_Main_waitAndCallIntoJniEnv(JNIEnv* env, jclass) {
+  // Wait for all threads to enter JNI together.
+  JniThreadBarrierWait();
   // Wait until the runtime is shutdown.
   while (!vm_was_shutdown.load()) {
     usleep(1000);
@@ -40,6 +52,8 @@
 
 // NO_RETURN does not work with extern "C" for target builds.
 extern "C" JNIEXPORT void JNICALL Java_Main_destroyJavaVMAndExit(JNIEnv* env, jclass) {
+  // Wait for all threads to enter JNI together.
+  JniThreadBarrierWait();
   // Fake up the managed stack so we can detach.
   Thread* const self = Thread::Current();
   self->SetTopOfStack(nullptr);
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index feee7c2..8598474 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -87,7 +87,11 @@
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libarttest.mk
   ifeq ($$(art_target_or_host),target)
     $(call set-target-local-clang-vars)
-    $(call set-target-local-cflags-vars,debug)
+    ifeq ($$(suffix),d)
+      $(call set-target-local-cflags-vars,debug)
+    else
+      $(call set-target-local-cflags-vars,ndebug)
+    endif
     LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_MULTILIB := both
     LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
diff --git a/test/valgrind-target-suppressions.txt b/test/valgrind-target-suppressions.txt
index 896850c..7ae6d53 100644
--- a/test/valgrind-target-suppressions.txt
+++ b/test/valgrind-target-suppressions.txt
@@ -40,3 +40,13 @@
    fun:je_tsd_fetch
    fun:je_malloc_tsd_boot0
 }
+
+# Setenv is known-leaking when overwriting mappings. This is triggered by re-initializing
+# ANDROID_DATA. Ignore all setenv leaks.
+{
+   SetenvAndroidDataReinit
+   Memcheck:Leak
+   match-leak-kinds: definite
+   fun:malloc
+   fun:setenv
+}