diff --git a/src/asm_support.h b/src/asm_support.h
index 470b81f..bda0b7c 100644
--- a/src/asm_support.h
+++ b/src/asm_support.h
@@ -27,6 +27,9 @@
 #define STRING_OFFSET_OFFSET 20
 #define STRING_DATA_OFFSET 12
 
+// Offset of field Method::code_
+#define METHOD_CODE_OFFSET 32
+
 #if defined(__arm__)
 // Register holding suspend check count down.
 #define rSUSPEND r4
diff --git a/src/class_linker.cc b/src/class_linker.cc
index 4a07eba..725e710 100644
--- a/src/class_linker.cc
+++ b/src/class_linker.cc
@@ -2694,9 +2694,11 @@
 
   if (clinit != NULL) {
     if (Runtime::Current()->IsStarted()) {
-      clinit->Invoke(self, NULL, NULL, NULL);
+      JValue result;
+      JValue float_result;
+      clinit->Invoke(self, NULL, 0, &result, &float_result);
     } else {
-      art::interpreter::EnterInterpreterFromInvoke(self, clinit, NULL, NULL, NULL);
+      art::interpreter::EnterInterpreterFromInvoke(self, clinit, NULL, NULL, NULL, NULL);
     }
   }
 
diff --git a/src/debugger.cc b/src/debugger.cc
index c96bb66..09c930a 100644
--- a/src/debugger.cc
+++ b/src/debugger.cc
@@ -26,6 +26,7 @@
 #include "gc/card_table-inl.h"
 #include "gc/large_object_space.h"
 #include "gc/space.h"
+#include "invoke_arg_array_builder.h"
 #include "jdwp/object_registry.h"
 #include "mirror/abstract_method-inl.h"
 #include "mirror/class.h"
@@ -2723,8 +2724,16 @@
 
   LOG(INFO) << "self=" << soa.Self() << " pReq->receiver_=" << pReq->receiver_ << " m=" << m
       << " #" << pReq->arg_count_ << " " << pReq->arg_values_;
-  pReq->result_value = InvokeWithJValues(soa, pReq->receiver_, m,
-                                         reinterpret_cast<JValue*>(pReq->arg_values_));
+
+  MethodHelper mh(m);
+  ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
+  arg_array.BuildArgArray(soa, pReq->receiver_, reinterpret_cast<jvalue*>(pReq->arg_values_));
+  JValue unused_result;
+  if (mh.IsReturnFloatOrDouble()) {
+    InvokeWithArgArray(soa, m, &arg_array, &unused_result, &pReq->result_value);
+  } else {
+    InvokeWithArgArray(soa, m, &arg_array, &pReq->result_value, &unused_result);
+  }
 
   pReq->exception = gRegistry->Add(soa.Self()->GetException());
   pReq->result_tag = BasicTagFromDescriptor(MethodHelper(m).GetShorty());
diff --git a/src/heap.cc b/src/heap.cc
index a3a3a28..311052b 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -40,6 +40,7 @@
 #include "gc/mod_union_table-inl.h"
 #include "gc/space.h"
 #include "image.h"
+#include "invoke_arg_array_builder.h"
 #include "mirror/class-inl.h"
 #include "mirror/field-inl.h"
 #include "mirror/object.h"
@@ -1730,10 +1731,12 @@
 
 void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
   ScopedObjectAccess soa(self);
-  JValue args[1];
-  args[0].SetL(object);
-  soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self, NULL, args,
-                                                                                   NULL);
+  JValue result;
+  JValue float_result;
+  ArgArray arg_array(NULL, 0);
+  arg_array.Append(reinterpret_cast<uint32_t>(object));
+  soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self,
+      arg_array.GetArray(), arg_array.GetNumBytes(), &result, &float_result);
 }
 
 size_t Heap::GetBytesAllocated() const {
@@ -1766,10 +1769,12 @@
     // When a runtime isn't started there are no reference queues to care about so ignore.
     if (LIKELY(Runtime::Current()->IsStarted())) {
       ScopedObjectAccess soa(Thread::Current());
-      JValue args[1];
-      args[0].SetL(*cleared);
-      soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(), NULL,
-                                                                                   args, NULL);
+      JValue result;
+      JValue float_result;
+      ArgArray arg_array(NULL, 0);
+      arg_array.Append(reinterpret_cast<uint32_t>(*cleared));
+      soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
+          arg_array.GetArray(), arg_array.GetNumBytes(), &result, &float_result);
     }
     *cleared = NULL;
   }
diff --git a/src/interpreter/interpreter.cc b/src/interpreter/interpreter.cc
index 195549a..1a571ec 100644
--- a/src/interpreter/interpreter.cc
+++ b/src/interpreter/interpreter.cc
@@ -54,13 +54,13 @@
 static uint32_t throw_dex_pc_ = 0;
 
 static void UnstartedRuntimeInvoke(Thread* self, AbstractMethod* target_method,
-                                   Object* receiver, JValue* args, JValue* result)
+                                   Object* receiver, uint32_t* args, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // In a runtime that's not started we intercept certain methods to avoid complicated dependency
   // problems in core libraries.
   std::string name(PrettyMethod(target_method));
   if (name == "java.lang.Class java.lang.Class.forName(java.lang.String)") {
-    std::string descriptor(DotToDescriptor(args[0].GetL()->AsString()->ToModifiedUtf8().c_str()));
+    std::string descriptor(DotToDescriptor(reinterpret_cast<Object*>(args[0])->AsString()->ToModifiedUtf8().c_str()));
     ClassLoader* class_loader = NULL; // shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
     Class* found = Runtime::Current()->GetClassLinker()->FindClass(descriptor.c_str(),
                                                                    class_loader);
@@ -73,13 +73,13 @@
     CHECK(c != NULL);
     Object* obj = klass->AllocObject(self);
     CHECK(obj != NULL);
-    EnterInterpreterFromInvoke(self, c, obj, NULL, NULL);
+    EnterInterpreterFromInvoke(self, c, obj, NULL, NULL, NULL);
     result->SetL(obj);
   } else if (name == "java.lang.reflect.Field java.lang.Class.getDeclaredField(java.lang.String)") {
     // Special managed code cut-out to allow field lookup in a un-started runtime that'd fail
     // going the reflective Dex way.
     Class* klass = receiver->AsClass();
-    String* name = args[0].GetL()->AsString();
+    String* name = reinterpret_cast<Object*>(args[0])->AsString();
     Field* found = NULL;
     FieldHelper fh;
     ObjectArray<Field>* fields = klass->GetIFields();
@@ -108,25 +108,25 @@
     result->SetL(found);
   } else if (name == "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)") {
     // Special case array copying without initializing System.
-    Class* ctype = args[0].GetL()->GetClass()->GetComponentType();
-    jint srcPos = args[1].GetI();
-    jint dstPos = args[3].GetI();
-    jint length = args[4].GetI();
+    Class* ctype = reinterpret_cast<Object*>(args[0])->GetClass()->GetComponentType();
+    jint srcPos = args[1];
+    jint dstPos = args[3];
+    jint length = args[4];
     if (!ctype->IsPrimitive()) {
-      ObjectArray<Object>* src = args[0].GetL()->AsObjectArray<Object>();
-      ObjectArray<Object>* dst = args[2].GetL()->AsObjectArray<Object>();
+      ObjectArray<Object>* src = reinterpret_cast<Object*>(args[0])->AsObjectArray<Object>();
+      ObjectArray<Object>* dst = reinterpret_cast<Object*>(args[2])->AsObjectArray<Object>();
       for (jint i = 0; i < length; ++i) {
         dst->Set(dstPos + i, src->Get(srcPos + i));
       }
     } else if (ctype->IsPrimitiveChar()) {
-      CharArray* src = args[0].GetL()->AsCharArray();
-      CharArray* dst = args[2].GetL()->AsCharArray();
+      CharArray* src = reinterpret_cast<Object*>(args[0])->AsCharArray();
+      CharArray* dst = reinterpret_cast<Object*>(args[2])->AsCharArray();
       for (jint i = 0; i < length; ++i) {
         dst->Set(dstPos + i, src->Get(srcPos + i));
       }
     } else if (ctype->IsPrimitiveInt()) {
-      IntArray* src = args[0].GetL()->AsIntArray();
-      IntArray* dst = args[2].GetL()->AsIntArray();
+      IntArray* src = reinterpret_cast<Object*>(args[0])->AsIntArray();
+      IntArray* dst = reinterpret_cast<Object*>(args[2])->AsIntArray();
       for (jint i = 0; i < length; ++i) {
         dst->Set(dstPos + i, src->Get(srcPos + i));
       }
@@ -135,13 +135,13 @@
     }
   } else {
     // Not special, continue with regular interpreter execution.
-    EnterInterpreterFromInvoke(self, target_method, receiver, args, result);
+    EnterInterpreterFromInvoke(self, target_method, receiver, args, result, result);
   }
 }
 
 // Hand select a number of methods to be run in a not yet started runtime without using JNI.
 static void UnstartedRuntimeJni(Thread* self, AbstractMethod* method,
-                                Object* receiver, JValue* args, JValue* result)
+                                Object* receiver, uint32_t* args, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::string name(PrettyMethod(method));
   if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
@@ -151,55 +151,59 @@
     visitor.WalkStack();
     result->SetL(visitor.caller->GetDeclaringClass());
   } else if (name == "double java.lang.Math.log(double)") {
-    result->SetD(log(args[0].GetD()));
+    JValue value;
+    value.SetJ((static_cast<uint64_t>(args[1]) << 32) | args[0]);
+    result->SetD(log(value.GetD()));
   } else if (name == "java.lang.String java.lang.Class.getNameNative()") {
     result->SetL(receiver->AsClass()->ComputeName());
   } else if (name == "int java.lang.Float.floatToRawIntBits(float)") {
-    result->SetI(args[0].GetI());
+    result->SetI(args[0]);
   } else if (name == "float java.lang.Float.intBitsToFloat(int)") {
-    result->SetF(args[0].GetF());
+    result->SetI(args[0]);
   } else if (name == "double java.lang.Math.exp(double)") {
-    result->SetD(exp(args[0].GetD()));
+    JValue value;
+    value.SetJ((static_cast<uint64_t>(args[1]) << 32) | args[0]);
+    result->SetD(exp(value.GetD()));
   } else if (name == "java.lang.Object java.lang.Object.internalClone()") {
     result->SetL(receiver->Clone(self));
   } else if (name == "void java.lang.Object.notifyAll()") {
     receiver->NotifyAll(self);
   } else if (name == "int java.lang.String.compareTo(java.lang.String)") {
-    String* rhs = args[0].GetL()->AsString();
+    String* rhs = reinterpret_cast<Object*>(args[0])->AsString();
     CHECK(rhs != NULL);
     result->SetI(receiver->AsString()->CompareTo(rhs));
   } else if (name == "java.lang.String java.lang.String.intern()") {
     result->SetL(receiver->AsString()->Intern());
   } else if (name == "int java.lang.String.fastIndexOf(int, int)") {
-    result->SetI(receiver->AsString()->FastIndexOf(args[0].GetI(), args[1].GetI()));
+    result->SetI(receiver->AsString()->FastIndexOf(args[0], args[1]));
   } else if (name == "java.lang.Object java.lang.reflect.Array.createMultiArray(java.lang.Class, int[])") {
-    result->SetL(Array::CreateMultiArray(self, args[0].GetL()->AsClass(), args[1].GetL()->AsIntArray()));
+    result->SetL(Array::CreateMultiArray(self, reinterpret_cast<Object*>(args[0])->AsClass(), reinterpret_cast<Object*>(args[1])->AsIntArray()));
   } else if (name == "java.lang.Object java.lang.Throwable.nativeFillInStackTrace()") {
     ScopedObjectAccessUnchecked soa(self);
     result->SetL(soa.Decode<Object*>(self->CreateInternalStackTrace(soa)));
   } else if (name == "boolean java.nio.ByteOrder.isLittleEndian()") {
     result->SetJ(JNI_TRUE);
   } else if (name == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") {
-    Object* obj = args[0].GetL();
-    jlong offset = args[1].GetJ();
-    jint expectedValue = args[2].GetI();
-    jint newValue = args[3].GetI();
+    Object* obj = reinterpret_cast<Object*>(args[0]);
+    jlong offset = (static_cast<uint64_t>(args[2]) << 32) | args[1];
+    jint expectedValue = args[3];
+    jint newValue = args[4];
     byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
     volatile int32_t* address = reinterpret_cast<volatile int32_t*>(raw_addr);
     // Note: android_atomic_release_cas() returns 0 on success, not failure.
     int r = android_atomic_release_cas(expectedValue, newValue, address);
     result->SetZ(r == 0);
   } else if (name == "void sun.misc.Unsafe.putObject(java.lang.Object, long, java.lang.Object)") {
-    Object* obj = args[0].GetL();
-    Object* newValue = args[2].GetL();
-    obj->SetFieldObject(MemberOffset(args[1].GetJ()), newValue, false);
+    Object* obj = reinterpret_cast<Object*>(args[0]);
+    Object* newValue = reinterpret_cast<Object*>(args[3]);
+    obj->SetFieldObject(MemberOffset((static_cast<uint64_t>(args[2]) << 32) | args[1]), newValue, false);
   } else {
     LOG(FATAL) << "Attempt to invoke native method in non-started runtime: " << name;
   }
 }
 
 static void InterpreterJni(Thread* self, AbstractMethod* method, StringPiece shorty,
-                           Object* receiver, JValue* args, JValue* result)
+                           Object* receiver, uint32_t* args, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // TODO: The following enters JNI code using a typedef-ed function rather than the JNI compiler,
   //       it should be removed and JNI compiled stubs used instead.
@@ -236,21 +240,21 @@
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
-      result->SetB(fn(soa.Env(), klass.get(), args[0].GetI()));
+      result->SetB(fn(soa.Env(), klass.get(), args[0]));
     } else if (shorty == "II") {
       typedef jint (fnptr)(JNIEnv*, jclass, jint);
       fnptr* fn = reinterpret_cast<fnptr*>(method->GetNativeMethod());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
-      result->SetI(fn(soa.Env(), klass.get(), args[0].GetI()));
+      result->SetI(fn(soa.Env(), klass.get(), args[0]));
     } else if (shorty == "LL") {
       typedef jobject (fnptr)(JNIEnv*, jclass, jobject);
       fnptr* fn = reinterpret_cast<fnptr*>(method->GetNativeMethod());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[0].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[0])));
       jobject jresult;
       {
         ScopedThreadStateChange tsc(self, kNative);
@@ -263,39 +267,39 @@
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
-      result->SetI(fn(soa.Env(), klass.get(), args[0].GetI(), args[1].GetZ()));
+      result->SetI(fn(soa.Env(), klass.get(), args[0], args[1]));
     } else if (shorty == "ILI") {
       typedef jint (fnptr)(JNIEnv*, jclass, jobject, jint);
       fnptr* fn = reinterpret_cast<fnptr*>(method->GetNativeMethod());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[0].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[0])));
       ScopedThreadStateChange tsc(self, kNative);
-      result->SetI(fn(soa.Env(), klass.get(), arg0.get(), args[1].GetI()));
+      result->SetI(fn(soa.Env(), klass.get(), arg0.get(), args[1]));
     } else if (shorty == "SIZ") {
       typedef jshort (fnptr)(JNIEnv*, jclass, jint, jboolean);
       fnptr* fn = reinterpret_cast<fnptr*>(method->GetNativeMethod());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
-      result->SetS(fn(soa.Env(), klass.get(), args[0].GetI(), args[1].GetZ()));
+      result->SetS(fn(soa.Env(), klass.get(), args[0], args[1]));
     } else if (shorty == "VIZ") {
       typedef void (fnptr)(JNIEnv*, jclass, jint, jboolean);
       fnptr* fn = reinterpret_cast<fnptr*>(method->GetNativeMethod());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
-      fn(soa.Env(), klass.get(), args[0].GetI(), args[1].GetZ());
+      fn(soa.Env(), klass.get(), args[0], args[1]);
     } else if (shorty == "ZLL") {
       typedef jboolean (fnptr)(JNIEnv*, jclass, jobject, jobject);
       fnptr* fn = reinterpret_cast<fnptr*>(method->GetNativeMethod());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[0].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[0])));
       ScopedLocalRef<jobject> arg1(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[1].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[1])));
       ScopedThreadStateChange tsc(self, kNative);
       result->SetZ(fn(soa.Env(), klass.get(), arg0.get(), arg1.get()));
     } else if (shorty == "ZILL") {
@@ -304,32 +308,31 @@
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg1(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[1].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[1])));
       ScopedLocalRef<jobject> arg2(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[2].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[2])));
       ScopedThreadStateChange tsc(self, kNative);
-      result->SetZ(fn(soa.Env(), klass.get(), args[0].GetI(), arg1.get(), arg2.get()));
+      result->SetZ(fn(soa.Env(), klass.get(), args[0], arg1.get(), arg2.get()));
     } else if (shorty == "VILII") {
       typedef void (fnptr)(JNIEnv*, jclass, jint, jobject, jint, jint);
       fnptr* fn = reinterpret_cast<fnptr*>(method->GetNativeMethod());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg1(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[1].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[1])));
       ScopedThreadStateChange tsc(self, kNative);
-      fn(soa.Env(), klass.get(), args[0].GetI(), arg1.get(), args[2].GetI(), args[3].GetI());
+      fn(soa.Env(), klass.get(), args[0], arg1.get(), args[2], args[3]);
     } else if (shorty == "VLILII") {
       typedef void (fnptr)(JNIEnv*, jclass, jobject, jint, jobject, jint, jint);
       fnptr* fn = reinterpret_cast<fnptr*>(method->GetNativeMethod());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[0].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[0])));
       ScopedLocalRef<jobject> arg2(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[2].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[2])));
       ScopedThreadStateChange tsc(self, kNative);
-      fn(soa.Env(), klass.get(), arg0.get(), args[1].GetI(), arg2.get(), args[3].GetI(),
-         args[4].GetI());
+      fn(soa.Env(), klass.get(), arg0.get(), args[1], arg2.get(), args[3], args[4]);
     } else {
       LOG(FATAL) << "Do something with static native method: " << PrettyMethod(method)
           << " shorty: " << shorty;
@@ -352,7 +355,7 @@
       ScopedLocalRef<jobject> rcvr(soa.Env(),
                                    soa.AddLocalReference<jobject>(receiver));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(args[0].GetL()));
+                                   soa.AddLocalReference<jobject>(reinterpret_cast<Object*>(args[0])));
       jobject jresult;
       {
         ScopedThreadStateChange tsc(self, kNative);
@@ -367,7 +370,7 @@
       ScopedLocalRef<jobject> rcvr(soa.Env(),
                                    soa.AddLocalReference<jobject>(receiver));
       ScopedThreadStateChange tsc(self, kNative);
-      result->SetI(fn(soa.Env(), rcvr.get(), args[0].GetI(), args[1].GetI()));
+      result->SetI(fn(soa.Env(), rcvr.get(), args[0], args[1]));
     } else {
       LOG(FATAL) << "Do something with native method: " << PrettyMethod(method)
           << " shorty: " << shorty;
@@ -405,14 +408,25 @@
   mh.ChangeMethod(target_method);
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   if (is_range) {
-    arg_array.BuildArgArray(shadow_frame, dec_insn.vC + (type != kStatic ? 1 : 0));
+    arg_array.BuildArgArray(shadow_frame, receiver, dec_insn.vC + (type != kStatic ? 1 : 0));
   } else {
-    arg_array.BuildArgArray(shadow_frame, dec_insn.arg + (type != kStatic ? 1 : 0));
+    arg_array.BuildArgArray(shadow_frame, receiver, dec_insn.arg + (type != kStatic ? 1 : 0));
   }
   if (LIKELY(Runtime::Current()->IsStarted())) {
-    target_method->Invoke(self, receiver, arg_array.get(), result);
+    JValue unused_result;
+    if (mh.IsReturnFloatOrDouble()) {
+      target_method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(),
+                            &unused_result, result);
+    } else {
+      target_method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(),
+                            result, &unused_result);
+    }
   } else {
-    UnstartedRuntimeInvoke(self, target_method, receiver, arg_array.get(), result);
+    uint32_t* args = arg_array.GetArray();
+    if (type != kStatic) {
+      args++;
+    }
+    UnstartedRuntimeInvoke(self, target_method, receiver, args, result);
   }
   mh.ChangeMethod(shadow_frame.GetMethod());
 }
@@ -1792,7 +1806,7 @@
 }
 
 void EnterInterpreterFromInvoke(Thread* self, AbstractMethod* method, Object* receiver,
-                                JValue* args, JValue* result) {
+                                uint32_t* args, JValue* result, JValue* float_result) {
   DCHECK_EQ(self, Thread::Current());
   if (__builtin_frame_address(0) < self->GetStackEnd()) {
     ThrowStackOverflowError(self);
@@ -1838,36 +1852,50 @@
     CHECK(method->GetDeclaringClass()->IsInitializing());
   }
   const char* shorty = mh.GetShorty();
-  size_t arg_pos = 0;
-  for (; cur_reg < num_regs; ++cur_reg, ++arg_pos) {
-    DCHECK_LT(arg_pos + 1, mh.GetShortyLength());
-    switch (shorty[arg_pos + 1]) {
+  for (size_t shorty_pos = 0, arg_pos = 0; cur_reg < num_regs; ++shorty_pos, ++arg_pos, cur_reg++) {
+    DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
+    switch (shorty[shorty_pos + 1]) {
       case 'L': {
-        Object* o = args[arg_pos].GetL();
+        Object* o = reinterpret_cast<Object*>(args[arg_pos]);
         shadow_frame->SetVRegReference(cur_reg, o);
         break;
       }
-      case 'J': case 'D':
-        shadow_frame->SetVRegLong(cur_reg, args[arg_pos].GetJ());
+      case 'J': case 'D': {
+        uint64_t wide_value = (static_cast<uint64_t>(args[arg_pos + 1]) << 32) | args[arg_pos];
+        shadow_frame->SetVRegLong(cur_reg, wide_value);
         cur_reg++;
+        arg_pos++;
         break;
+      }
       default:
-        shadow_frame->SetVReg(cur_reg, args[arg_pos].GetI());
+        shadow_frame->SetVReg(cur_reg, args[arg_pos]);
         break;
     }
   }
   if (LIKELY(!method->IsNative())) {
     JValue r = Execute(self, mh, code_item, *shadow_frame.get(), JValue());
-    if (result != NULL) {
-      *result = r;
+    if (result != NULL && float_result != NULL) {
+      if (mh.IsReturnFloatOrDouble()) {
+        *float_result = r;
+      } else {
+        *result = r;
+      }
     }
   } else {
     // We don't expect to be asked to interpret native code (which is entered via a JNI compiler
     // generated stub) except during testing and image writing.
     if (!Runtime::Current()->IsStarted()) {
-      UnstartedRuntimeJni(self, method, receiver, args, result);
+      if (mh.IsReturnFloatOrDouble()) {
+        UnstartedRuntimeJni(self, method, receiver, args, float_result);
+      } else {
+        UnstartedRuntimeJni(self, method, receiver, args, result);
+      }
     } else {
-      InterpreterJni(self, method, shorty, receiver, args, result);
+      if (mh.IsReturnFloatOrDouble()) {
+        InterpreterJni(self, method, shorty, receiver, args, float_result);
+      } else {
+        InterpreterJni(self, method, shorty, receiver, args, result);
+      }
     }
   }
   self->PopShadowFrame();
diff --git a/src/interpreter/interpreter.h b/src/interpreter/interpreter.h
index 12da736..91816c9 100644
--- a/src/interpreter/interpreter.h
+++ b/src/interpreter/interpreter.h
@@ -34,7 +34,8 @@
 namespace interpreter {
 
 extern void EnterInterpreterFromInvoke(Thread* self, mirror::AbstractMethod* method,
-                                       mirror::Object* receiver, JValue* args, JValue* result)
+                                       mirror::Object* receiver, uint32_t* args,
+                                       JValue* result, JValue* float_result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 extern JValue EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame& shadow_frame,
diff --git a/src/invoke_arg_array_builder.h b/src/invoke_arg_array_builder.h
index 19c42ac..ecd0fb0 100644
--- a/src/invoke_arg_array_builder.h
+++ b/src/invoke_arg_array_builder.h
@@ -42,170 +42,240 @@
 class ArgArray {
  public:
   explicit ArgArray(const char* shorty, uint32_t shorty_len)
-      : shorty_(shorty), shorty_len_(shorty_len) {
-    if (shorty_len - 1 < kSmallArgArraySize) {
+      : shorty_(shorty), shorty_len_(shorty_len), num_bytes_(0) {
+    // TODO: This code is conservative. The multiply by 2 is to handle the case where all args are
+    // doubles or longs. We could scan the shorty to use the arg array more often.
+    if (shorty_len * 2 <= kSmallArgArraySize) {
       arg_array_ = small_arg_array_;
     } else {
-      large_arg_array_.reset(new JValue[shorty_len_ - 1]);
+      large_arg_array_.reset(new uint32_t[shorty_len_ * 2]);
       arg_array_ = large_arg_array_.get();
     }
   }
 
-  JValue* get() {
+  uint32_t* GetArray() {
     return arg_array_;
   }
 
-  void BuildArgArray(const ScopedObjectAccess& soa, va_list ap)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    for (size_t i = 1, offset = 0; i < shorty_len_; ++i, ++offset) {
-      switch (shorty_[i]) {
-        case 'Z':
-          arg_array_[offset].SetZ(va_arg(ap, jint));
-          break;
-        case 'B':
-          arg_array_[offset].SetB(va_arg(ap, jint));
-          break;
-        case 'C':
-          arg_array_[offset].SetC(va_arg(ap, jint));
-          break;
-        case 'S':
-          arg_array_[offset].SetS(va_arg(ap, jint));
-          break;
-        case 'I':
-          arg_array_[offset].SetI(va_arg(ap, jint));
-          break;
-        case 'F':
-          arg_array_[offset].SetF(va_arg(ap, jdouble));
-          break;
-        case 'L':
-          arg_array_[offset].SetL(soa.Decode<mirror::Object*>(va_arg(ap, jobject)));
-          break;
-        case 'D':
-          arg_array_[offset].SetD(va_arg(ap, jdouble));
-          break;
-        case 'J':
-          arg_array_[offset].SetJ(va_arg(ap, jlong));
-          break;
-      }
-    }
+  uint32_t GetNumBytes() {
+    return num_bytes_;
   }
 
-  void BuildArgArray(const ScopedObjectAccess& soa, jvalue* args)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    for (size_t i = 1, offset = 0; i < shorty_len_; ++i, ++offset) {
-      switch (shorty_[i]) {
-        case 'Z':
-          arg_array_[offset].SetZ(args[offset].z);
-          break;
-        case 'B':
-          arg_array_[offset].SetB(args[offset].b);
-          break;
-        case 'C':
-          arg_array_[offset].SetC(args[offset].c);
-          break;
-        case 'S':
-          arg_array_[offset].SetS(args[offset].s);
-          break;
-        case 'I':
-          arg_array_[offset].SetI(args[offset].i);
-          break;
-        case 'F':
-          arg_array_[offset].SetF(args[offset].f);
-          break;
-        case 'L':
-          arg_array_[offset].SetL(soa.Decode<mirror::Object*>(args[offset].l));
-          break;
-        case 'D':
-          arg_array_[offset].SetD(args[offset].d);
-          break;
-        case 'J':
-          arg_array_[offset].SetJ(args[offset].j);
-          break;
-      }
-    }
+  void Append(uint32_t value) {
+    arg_array_[num_bytes_ / 4] = value;
+    num_bytes_ += 4;
   }
 
-  void BuildArgArray(const ShadowFrame& shadow_frame, uint32_t range_start)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    for (size_t i = 1, offset = 0; i < shorty_len_; ++i, ++offset) {
-      switch (shorty_[i]) {
-        case 'Z':
-          arg_array_[i - 1].SetZ(shadow_frame.GetVReg(range_start + offset));
-          break;
-        case 'B':
-          arg_array_[i - 1].SetB(shadow_frame.GetVReg(range_start + offset));
-          break;
-        case 'C':
-          arg_array_[i - 1].SetC(shadow_frame.GetVReg(range_start + offset));
-          break;
-        case 'S':
-          arg_array_[i - 1].SetS(shadow_frame.GetVReg(range_start + offset));
-          break;
-        case 'I':
-          arg_array_[i - 1].SetI(shadow_frame.GetVReg(range_start + offset));
-          break;
-        case 'F':
-          arg_array_[i - 1].SetF(shadow_frame.GetVRegFloat(range_start + offset));
-          break;
-        case 'L':
-          arg_array_[i - 1].SetL(shadow_frame.GetVRegReference(range_start + offset));
-          break;
-        case 'D':
-          arg_array_[i - 1].SetD(shadow_frame.GetVRegDouble(range_start + offset));
-          offset++;
-          break;
-        case 'J':
-          arg_array_[i - 1].SetJ(shadow_frame.GetVRegLong(range_start + offset));
-          offset++;
-          break;
-      }
-    }
+  void AppendWide(uint64_t value) {
+    arg_array_[num_bytes_ / 4] = value;
+    arg_array_[(num_bytes_ / 4) + 1] = value >> 32;
+    num_bytes_ += 8;
   }
 
-  void BuildArgArray(const ShadowFrame& shadow_frame, const uint32_t* arg_regs)
+  void BuildArgArray(const ScopedObjectAccess& soa, mirror::Object* receiver, va_list ap)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    for (size_t i = 1, offset = 0; i < shorty_len_; ++i, ++offset) {
+    // Set receiver if non-null (method is not static)
+    size_t offset = 0;
+    if (receiver != NULL) {
+      arg_array_[0] = reinterpret_cast<int32_t>(receiver);
+      offset++;
+    }
+    for (size_t i = 1; i < shorty_len_; ++i, ++offset) {
       switch (shorty_[i]) {
         case 'Z':
-          arg_array_[i - 1].SetZ(shadow_frame.GetVReg(arg_regs[offset]));
+          arg_array_[offset] = va_arg(ap, jint);
           break;
         case 'B':
-          arg_array_[i - 1].SetB(shadow_frame.GetVReg(arg_regs[offset]));
+          arg_array_[offset] = va_arg(ap, jint);
           break;
         case 'C':
-          arg_array_[i - 1].SetC(shadow_frame.GetVReg(arg_regs[offset]));
+          arg_array_[offset] = va_arg(ap, jint);
           break;
         case 'S':
-          arg_array_[i - 1].SetS(shadow_frame.GetVReg(arg_regs[offset]));
+          arg_array_[offset] = va_arg(ap, jint);
           break;
         case 'I':
-          arg_array_[i - 1].SetI(shadow_frame.GetVReg(arg_regs[offset]));
+          arg_array_[offset] = va_arg(ap, jint);
+          break;
+        case 'F': {
+          JValue value;
+          value.SetF(va_arg(ap, jdouble));
+          arg_array_[offset] = value.GetI();
+          break;
+        }
+        case 'L':
+          arg_array_[offset] = reinterpret_cast<int32_t>(soa.Decode<mirror::Object*>(va_arg(ap, jobject)));
+          break;
+        case 'D': {
+          JValue value;
+          value.SetD(va_arg(ap, jdouble));
+          arg_array_[offset] = value.GetJ();
+          arg_array_[offset + 1] = value.GetJ() >> 32;
+          offset++;
+          break;
+        }
+        case 'J': {
+          long long l = va_arg(ap, jlong);
+          arg_array_[offset] = l;
+          arg_array_[offset + 1] = l >> 32;
+          offset++;
+          break;
+        }
+      }
+    }
+    num_bytes_ += 4 * offset;
+  }
+
+  void BuildArgArray(const ScopedObjectAccess& soa, mirror::Object* receiver, jvalue* args)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Set receiver if non-null (method is not static)
+    size_t offset = 0;
+    if (receiver != NULL) {
+      arg_array_[0] = reinterpret_cast<int32_t>(receiver);
+      offset++;
+    }
+    for (size_t i = 1, args_offset = 0; i < shorty_len_; ++i, ++offset, ++args_offset) {
+      switch (shorty_[i]) {
+        case 'Z':
+          arg_array_[offset] = args[args_offset].z;
+          break;
+        case 'B':
+          arg_array_[offset] = args[args_offset].b;
+          break;
+        case 'C':
+          arg_array_[offset] = args[args_offset].c;
+          break;
+        case 'S':
+          arg_array_[offset] = args[args_offset].s;
+          break;
+        case 'I':
+          arg_array_[offset] = args[args_offset].i;
           break;
         case 'F':
-          arg_array_[i - 1].SetF(shadow_frame.GetVRegFloat(arg_regs[offset]));
+          arg_array_[offset] = args[args_offset].i;
           break;
         case 'L':
-          arg_array_[i - 1].SetL(shadow_frame.GetVRegReference(arg_regs[offset]));
+          arg_array_[offset] = reinterpret_cast<int32_t>(soa.Decode<mirror::Object*>(args[args_offset].l));
           break;
         case 'D':
-          arg_array_[i - 1].SetD(shadow_frame.GetVRegDouble(arg_regs[offset]));
+          arg_array_[offset] = args[args_offset].j;
+          arg_array_[offset + 1] = args[args_offset].j >> 32;
           offset++;
           break;
         case 'J':
-          arg_array_[i - 1].SetJ(shadow_frame.GetVRegLong(arg_regs[offset]));
+          arg_array_[offset] = args[args_offset].j;
+          arg_array_[offset + 1] = args[args_offset].j >> 32;
           offset++;
           break;
       }
     }
+    num_bytes_ += 4 * offset;
+  }
+
+  void BuildArgArray(const ShadowFrame& shadow_frame, mirror::Object* receiver, uint32_t range_start)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Set receiver if non-null (method is not static)
+    size_t offset = 0;
+    if (receiver != NULL) {
+      arg_array_[0] = reinterpret_cast<int32_t>(receiver);
+      offset++;
+    }
+    for (size_t i = 1, reg_offset = 0; i < shorty_len_; ++i, ++offset, ++reg_offset) {
+      switch (shorty_[i]) {
+        case 'Z':
+          arg_array_[offset] = shadow_frame.GetVReg(range_start + reg_offset);
+          break;
+        case 'B':
+          arg_array_[offset] = shadow_frame.GetVReg(range_start + reg_offset);
+          break;
+        case 'C':
+          arg_array_[offset] = shadow_frame.GetVReg(range_start + reg_offset);
+          break;
+        case 'S':
+          arg_array_[offset] = shadow_frame.GetVReg(range_start + reg_offset);
+          break;
+        case 'I':
+          arg_array_[offset] = shadow_frame.GetVReg(range_start + reg_offset);
+          break;
+        case 'F':
+          arg_array_[offset] = shadow_frame.GetVReg(range_start + reg_offset);
+          break;
+        case 'L':
+          arg_array_[offset] = reinterpret_cast<int32_t>(shadow_frame.GetVRegReference(range_start + reg_offset));
+          break;
+        case 'D':
+          arg_array_[offset] = shadow_frame.GetVRegLong(range_start + reg_offset);
+          arg_array_[offset + 1] = shadow_frame.GetVRegLong(range_start + reg_offset) >> 32;
+          reg_offset++;
+          offset++;
+          break;
+        case 'J':
+          arg_array_[offset] = shadow_frame.GetVRegLong(range_start + reg_offset);
+          arg_array_[offset + 1] = shadow_frame.GetVRegLong(range_start + reg_offset) >> 32;
+          reg_offset++;
+          offset++;
+          break;
+      }
+    }
+    num_bytes_ += 4 * offset;
+  }
+
+  void BuildArgArray(const ShadowFrame& shadow_frame, mirror::Object* receiver, const uint32_t* arg_regs)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Set receiver if non-null (method is not static)
+    size_t offset = 0;
+    if (receiver != NULL) {
+      arg_array_[0] = reinterpret_cast<int32_t>(receiver);
+      offset++;
+    }
+    for (size_t i = 1, reg_offset = 0; i < shorty_len_; ++i, ++offset, ++reg_offset) {
+      switch (shorty_[i]) {
+        case 'Z':
+          arg_array_[offset] = shadow_frame.GetVReg(arg_regs[reg_offset]);
+          break;
+        case 'B':
+          arg_array_[offset] = shadow_frame.GetVReg(arg_regs[reg_offset]);
+          break;
+        case 'C':
+          arg_array_[offset] = shadow_frame.GetVReg(arg_regs[reg_offset]);
+          break;
+        case 'S':
+          arg_array_[offset] = shadow_frame.GetVReg(arg_regs[reg_offset]);
+          break;
+        case 'I':
+          arg_array_[offset] = shadow_frame.GetVReg(arg_regs[reg_offset]);
+          break;
+        case 'F':
+          arg_array_[offset] = shadow_frame.GetVReg(arg_regs[reg_offset]);
+          break;
+        case 'L':
+          arg_array_[offset] = reinterpret_cast<int32_t>(shadow_frame.GetVRegReference(arg_regs[reg_offset]));
+          break;
+        case 'D':
+          arg_array_[offset] = shadow_frame.GetVRegLong(arg_regs[reg_offset]);
+          arg_array_[offset + 1] = shadow_frame.GetVRegLong(arg_regs[reg_offset]) >> 32;
+          offset++;
+          reg_offset++;
+          break;
+        case 'J':
+          arg_array_[offset] = shadow_frame.GetVRegLong(arg_regs[reg_offset]);
+          arg_array_[offset + 1] = shadow_frame.GetVRegLong(arg_regs[reg_offset]) >> 32;
+          offset++;
+          reg_offset++;
+          break;
+      }
+    }
+    num_bytes_ += 4 * offset;
   }
 
  private:
   enum { kSmallArgArraySize = 16 };
   const char* const shorty_;
   const uint32_t shorty_len_;
-  JValue* arg_array_;
-  JValue small_arg_array_[kSmallArgArraySize];
-  UniquePtr<JValue[]> large_arg_array_;
+  uint32_t num_bytes_;
+  uint32_t* arg_array_;
+  uint32_t small_arg_array_[kSmallArgArraySize];
+  UniquePtr<uint32_t[]> large_arg_array_;
 };
 
 }  // namespace art
diff --git a/src/jni_internal.cc b/src/jni_internal.cc
index 887fcb4..eabce2c 100644
--- a/src/jni_internal.cc
+++ b/src/jni_internal.cc
@@ -85,20 +85,19 @@
   return reinterpret_cast<jweak>(ref);
 }
 
-static bool IsBadJniVersion(int version) {
-  // We don't support JNI_VERSION_1_1. These are the only other valid versions.
-  return version != JNI_VERSION_1_2 && version != JNI_VERSION_1_4 && version != JNI_VERSION_1_6;
-}
-
-static void CheckMethodArguments(AbstractMethod* m, JValue* args)
+static void CheckMethodArguments(AbstractMethod* m, uint32_t* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   MethodHelper mh(m);
   const DexFile::TypeList* params = mh.GetParameterTypeList();
   if (params == NULL) {
     return;  // No arguments so nothing to check.
   }
+  uint32_t offset = 0;
   uint32_t num_params = params->Size();
   size_t error_count = 0;
+  if (!m->IsStatic()) {
+    offset = 1;
+  }
   for (uint32_t i = 0; i < num_params; i++) {
     uint16_t type_idx = params->GetTypeItem(i).type_idx_;
     Class* param_type = mh.GetClassFromTypeIdx(type_idx);
@@ -112,12 +111,14 @@
       ++error_count;
     } else if (!param_type->IsPrimitive()) {
       // TODO: check primitives are in range.
-      Object* argument = args[i].GetL();
+      Object* argument = reinterpret_cast<Object*>(args[i + offset]);
       if (argument != NULL && !argument->InstanceOf(param_type)) {
         LOG(ERROR) << "JNI ERROR (app bug): attempt to pass an instance of "
                    << PrettyTypeOf(argument) << " as argument " << (i + 1) << " to " << PrettyMethod(m);
         ++error_count;
       }
+    } else if (param_type->IsPrimitiveLong() || param_type->IsPrimitiveDouble()) {
+      offset++;
     }
   }
   if (error_count > 0) {
@@ -127,15 +128,13 @@
   }
 }
 
-static JValue InvokeWithArgArray(const ScopedObjectAccess& soa, Object* receiver,
-                                 AbstractMethod* method, JValue* args)
+void InvokeWithArgArray(const ScopedObjectAccess& soa, AbstractMethod* method,
+                        ArgArray* arg_array, JValue* result, JValue* float_result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(soa.Env()->check_jni)) {
-    CheckMethodArguments(method, args);
+    CheckMethodArguments(method, arg_array->GetArray());
   }
-  JValue result;
-  method->Invoke(soa.Self(), receiver, args, &result);
-  return result;
+  method->Invoke(soa.Self(), arg_array->GetArray(), arg_array->GetNumBytes(), result, float_result);
 }
 
 static JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj,
@@ -144,9 +143,16 @@
   Object* receiver = soa.Decode<Object*>(obj);
   AbstractMethod* method = soa.DecodeMethod(mid);
   MethodHelper mh(method);
+  JValue result;
+  JValue float_result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
-  arg_array.BuildArgArray(soa, args);
-  return InvokeWithArgArray(soa, receiver, method, arg_array.get());
+  arg_array.BuildArgArray(soa, receiver, args);
+  InvokeWithArgArray(soa, method, &arg_array, &result, &float_result);
+  if (mh.IsReturnFloatOrDouble()) {
+    return float_result;
+  } else {
+    return result;
+  }
 }
 
 static AbstractMethod* FindVirtualMethod(Object* receiver, AbstractMethod* method)
@@ -160,9 +166,16 @@
   Object* receiver = soa.Decode<Object*>(obj);
   AbstractMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   MethodHelper mh(method);
+  JValue result;
+  JValue float_result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
-  arg_array.BuildArgArray(soa, args);
-  return InvokeWithArgArray(soa, receiver, method, arg_array.get());
+  arg_array.BuildArgArray(soa, receiver, args);
+  InvokeWithArgArray(soa, method, &arg_array, &result, &float_result);
+  if (mh.IsReturnFloatOrDouble()) {
+    return float_result;
+  } else {
+    return result;
+  }
 }
 
 static JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccess& soa,
@@ -171,9 +184,16 @@
   Object* receiver = soa.Decode<Object*>(obj);
   AbstractMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   MethodHelper mh(method);
+  JValue result;
+  JValue float_result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
-  arg_array.BuildArgArray(soa, args);
-  return InvokeWithArgArray(soa, receiver, method, arg_array.get());
+  arg_array.BuildArgArray(soa, receiver, args);
+  InvokeWithArgArray(soa, method, &arg_array, &result, &float_result);
+  if (mh.IsReturnFloatOrDouble()) {
+    return float_result;
+  } else {
+    return result;
+  }
 }
 
 // Section 12.3.2 of the JNI spec describes JNI class descriptors. They're
@@ -570,15 +590,16 @@
   Object* receiver = soa.Decode<Object*>(obj);
   AbstractMethod* method = soa.DecodeMethod(mid);
   MethodHelper mh(method);
+  JValue result;
+  JValue float_result;
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
-  arg_array.BuildArgArray(soa, args);
-  return InvokeWithArgArray(soa, receiver, method, arg_array.get());
-}
-
-JValue InvokeWithJValues(const ScopedObjectAccess& soa, Object* receiver, AbstractMethod* m,
-                         JValue* args)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return InvokeWithArgArray(soa, receiver, m, args);
+  arg_array.BuildArgArray(soa, receiver, args);
+  InvokeWithArgArray(soa, method, &arg_array, &result, &float_result);
+  if (mh.IsReturnFloatOrDouble()) {
+    return float_result;
+  } else {
+    return result;
+  }
 }
 
 class JNI {
diff --git a/src/jni_internal.h b/src/jni_internal.h
index 9b773f3..d4fc514 100644
--- a/src/jni_internal.h
+++ b/src/jni_internal.h
@@ -42,6 +42,7 @@
 class ClassLoader;
 class Field;
 }
+class ArgArray;
 union JValue;
 class Libraries;
 class ScopedObjectAccess;
@@ -55,8 +56,8 @@
 
 JValue InvokeWithJValues(const ScopedObjectAccess&, jobject obj, jmethodID mid, jvalue* args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-JValue InvokeWithJValues(const ScopedObjectAccess&, mirror::Object* receiver,
-                         mirror::AbstractMethod* m, JValue* args)
+void InvokeWithArgArray(const ScopedObjectAccess& soa, mirror::AbstractMethod* method,
+                        ArgArray *arg_array, JValue* result, JValue* float_result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
diff --git a/src/jni_internal_test.cc b/src/jni_internal_test.cc
index 4b820f9..b67b0fd 100644
--- a/src/jni_internal_test.cc
+++ b/src/jni_internal_test.cc
@@ -20,6 +20,7 @@
 #include <cmath>
 
 #include "common_test.h"
+#include "invoke_arg_array_builder.h"
 #include "mirror/abstract_method-inl.h"
 #include "mirror/object_array-inl.h"
 #include "ScopedLocalRef.h"
@@ -27,6 +28,9 @@
 
 namespace art {
 
+extern "C" void art_quick_invoke_stub(const mirror::AbstractMethod*, uint32_t*, uint32_t,
+                                      Thread*, JValue*, JValue*);
+
 class JniInternalTest : public CommonTest {
  protected:
   virtual void SetUp() {
@@ -72,10 +76,10 @@
     CommonTest::TearDown();
   }
 
-  mirror::AbstractMethod::InvokeStub* DoCompile(mirror::AbstractMethod*& method,
-                                                mirror::Object*& receiver,
-                                                bool is_static, const char* method_name,
-                                                const char* method_signature)
+  void DoCompile(mirror::AbstractMethod*& method,
+                 mirror::Object*& receiver,
+                 bool is_static, const char* method_name,
+                 const char* method_signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const char* class_name = is_static ? "StaticLeafMethods" : "NonStaticLeafMethods";
     jobject jclass_loader(LoadDex(class_name));
@@ -99,48 +103,57 @@
     CHECK(method != NULL);
 
     receiver = (is_static ? NULL : c->AllocObject(self));
-
-    mirror::AbstractMethod::InvokeStub* stub = method->GetInvokeStub();
-    CHECK(stub != NULL);
-
-    return stub;
   }
 
   void InvokeNopMethod(bool is_static) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub = DoCompile(method, receiver, is_static, "nop", "()V");
-    (*stub)(method, receiver, Thread::Current(), NULL, NULL);
+    DoCompile(method, receiver, is_static, "nop", "()V");
+
+    ArgArray arg_array(NULL, 0);
+    JValue result;
+    JValue float_result;
+
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+    }
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
   }
 
   void InvokeIdentityByteMethod(bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "identity", "(B)B");
+    DoCompile(method, receiver, is_static, "identity", "(I)I");
 
-    JValue args[1];
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
     JValue result;
+    JValue float_result;
 
-    args[0].SetB(0);
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    arg_array.Append(0);
     result.SetB(-1);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(0, result.GetB());
 
-    args[0].SetB(-1);
+    args[0] = -1;
     result.SetB(0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(-1, result.GetB());
 
-    args[0].SetB(SCHAR_MAX);
+    args[0] = SCHAR_MAX;
     result.SetB(0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(SCHAR_MAX, result.GetB());
 
-    args[0].SetB(SCHAR_MIN);
+    args[0] = (SCHAR_MIN << 24) >> 24;
     result.SetB(0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(SCHAR_MIN, result.GetB());
   }
 
@@ -148,30 +161,36 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "identity", "(I)I");
+    DoCompile(method, receiver, is_static, "identity", "(I)I");
 
-    JValue args[1];
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
     JValue result;
+    JValue float_result;
 
-    args[0].SetI(0);
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    arg_array.Append(0);
     result.SetI(-1);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(0, result.GetI());
 
-    args[0].SetI(-1);
+    args[0] = -1;
     result.SetI(0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(-1, result.GetI());
 
-    args[0].SetI(INT_MAX);
+    args[0] = INT_MAX;
     result.SetI(0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(INT_MAX, result.GetI());
 
-    args[0].SetI(INT_MIN);
+    args[0] = INT_MIN;
     result.SetI(0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(INT_MIN, result.GetI());
   }
 
@@ -179,70 +198,91 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "identity", "(D)D");
+    DoCompile(method, receiver, is_static, "identity", "(D)D");
 
-    JValue args[1];
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
+    JValue value;
     JValue result;
+    JValue float_result;
 
-    args[0].SetD(0.0);
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    value.SetD(0.0);
+    arg_array.AppendWide(value.GetJ());
     result.SetD(-1.0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
-    EXPECT_EQ(0.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(0.0, float_result.GetD());
 
-    args[0].SetD(-1.0);
+    value.SetD(-1.0);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
-    EXPECT_EQ(-1.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(-1.0, float_result.GetD());
 
-    args[0].SetD(DBL_MAX);
+    value.SetD(DBL_MAX);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
-    EXPECT_EQ(DBL_MAX, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(DBL_MAX, float_result.GetD());
 
-    args[0].SetD(DBL_MIN);
+    value.SetD(DBL_MIN);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, receiver, Thread::Current(), args, &result);
-    EXPECT_EQ(DBL_MIN, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(DBL_MIN, float_result.GetD());
   }
 
   void InvokeSumIntIntMethod(bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "sum", "(II)I");
+    DoCompile(method, receiver, is_static, "sum", "(II)I");
 
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
     JValue result;
+    JValue float_result;
+
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    arg_array.Append(0);
+    arg_array.Append(0);
     result.SetI(-1);
-    JValue args[2];
-    args[0].SetI(0);
-    args[1].SetI(0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(0, result.GetI());
 
+    args[0] = 1;
+    args[1] = 2;
     result.SetI(0);
-    args[0].SetI(1);
-    args[1].SetI(2);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(3, result.GetI());
 
+    args[0] = -2;
+    args[1] = 5;
     result.SetI(0);
-    args[0].SetI(-2);
-    args[1].SetI(5);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(3, result.GetI());
 
+    args[0] = INT_MAX;
+    args[1] = INT_MIN;
     result.SetI(1234);
-    args[0].SetI(INT_MAX);
-    args[1].SetI(INT_MIN);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(-1, result.GetI());
 
+    args[0] = INT_MAX;
+    args[1] = INT_MAX;
     result.SetI(INT_MIN);
-    args[0].SetI(INT_MAX);
-    args[1].SetI(INT_MAX);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(-2, result.GetI());
   }
 
@@ -250,44 +290,51 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "sum", "(III)I");
+    DoCompile(method, receiver, is_static, "sum", "(III)I");
 
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
     JValue result;
+    JValue float_result;
+
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    arg_array.Append(0);
+    arg_array.Append(0);
+    arg_array.Append(0);
     result.SetI(-1);
-    JValue args[3];
-    args[0].SetI(0);
-    args[1].SetI(0);
-    args[2].SetI(0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(0, result.GetI());
 
+    args[0] = 1;
+    args[1] = 2;
+    args[2] = 3;
     result.SetI(0);
-    args[0].SetI(1);
-    args[1].SetI(2);
-    args[2].SetI(3);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(6, result.GetI());
 
+    args[0] = -1;
+    args[1] = 2;
+    args[2] = -3;
     result.SetI(0);
-    args[0].SetI(-1);
-    args[1].SetI(2);
-    args[2].SetI(-3);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(-2, result.GetI());
 
+    args[0] = INT_MAX;
+    args[1] = INT_MIN;
+    args[2] = INT_MAX;
     result.SetI(1234);
-    args[0].SetI(INT_MAX);
-    args[1].SetI(INT_MIN);
-    args[2].SetI(INT_MAX);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(2147483646, result.GetI());
 
+    args[0] = INT_MAX;
+    args[1] = INT_MAX;
+    args[2] = INT_MAX;
     result.SetI(INT_MIN);
-    args[0].SetI(INT_MAX);
-    args[1].SetI(INT_MAX);
-    args[2].SetI(INT_MAX);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(2147483645, result.GetI());
   }
 
@@ -295,49 +342,56 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "sum", "(IIII)I");
+    DoCompile(method, receiver, is_static, "sum", "(IIII)I");
 
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
     JValue result;
+    JValue float_result;
+
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    arg_array.Append(0);
+    arg_array.Append(0);
+    arg_array.Append(0);
+    arg_array.Append(0);
     result.SetI(-1);
-    JValue args[4];
-    args[0].SetI(0);
-    args[1].SetI(0);
-    args[2].SetI(0);
-    args[3].SetI(0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(0, result.GetI());
 
+    args[0] = 1;
+    args[1] = 2;
+    args[2] = 3;
+    args[3] = 4;
     result.SetI(0);
-    args[0].SetI(1);
-    args[1].SetI(2);
-    args[2].SetI(3);
-    args[3].SetI(4);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(10, result.GetI());
 
+    args[0] = -1;
+    args[1] = 2;
+    args[2] = -3;
+    args[3] = 4;
     result.SetI(0);
-    args[0].SetI(-1);
-    args[1].SetI(2);
-    args[2].SetI(-3);
-    args[3].SetI(4);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(2, result.GetI());
 
+    args[0] = INT_MAX;
+    args[1] = INT_MIN;
+    args[2] = INT_MAX;
+    args[3] = INT_MIN;
     result.SetI(1234);
-    args[0].SetI(INT_MAX);
-    args[1].SetI(INT_MIN);
-    args[2].SetI(INT_MAX);
-    args[3].SetI(INT_MIN);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(-2, result.GetI());
 
+    args[0] = INT_MAX;
+    args[1] = INT_MAX;
+    args[2] = INT_MAX;
+    args[3] = INT_MAX;
     result.SetI(INT_MIN);
-    args[0].SetI(INT_MAX);
-    args[1].SetI(INT_MAX);
-    args[2].SetI(INT_MAX);
-    args[3].SetI(INT_MAX);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(-4, result.GetI());
   }
 
@@ -345,54 +399,61 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "sum", "(IIIII)I");
+    DoCompile(method, receiver, is_static, "sum", "(IIIII)I");
 
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
     JValue result;
+    JValue float_result;
+
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    arg_array.Append(0);
+    arg_array.Append(0);
+    arg_array.Append(0);
+    arg_array.Append(0);
+    arg_array.Append(0);
     result.SetI(-1.0);
-    JValue args[5];
-    args[0].SetI(0);
-    args[1].SetI(0);
-    args[2].SetI(0);
-    args[3].SetI(0);
-    args[4].SetI(0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(0, result.GetI());
 
+    args[0] = 1;
+    args[1] = 2;
+    args[2] = 3;
+    args[3] = 4;
+    args[4] = 5;
     result.SetI(0);
-    args[0].SetI(1);
-    args[1].SetI(2);
-    args[2].SetI(3);
-    args[3].SetI(4);
-    args[4].SetI(5);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(15, result.GetI());
 
+    args[0] = -1;
+    args[1] = 2;
+    args[2] = -3;
+    args[3] = 4;
+    args[4] = -5;
     result.SetI(0);
-    args[0].SetI(-1);
-    args[1].SetI(2);
-    args[2].SetI(-3);
-    args[3].SetI(4);
-    args[4].SetI(-5);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(-3, result.GetI());
 
+    args[0] = INT_MAX;
+    args[1] = INT_MIN;
+    args[2] = INT_MAX;
+    args[3] = INT_MIN;
+    args[4] = INT_MAX;
     result.SetI(1234);
-    args[0].SetI(INT_MAX);
-    args[1].SetI(INT_MIN);
-    args[2].SetI(INT_MAX);
-    args[3].SetI(INT_MIN);
-    args[4].SetI(INT_MAX);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(2147483645, result.GetI());
 
+    args[0] = INT_MAX;
+    args[1] = INT_MAX;
+    args[2] = INT_MAX;
+    args[3] = INT_MAX;
+    args[4] = INT_MAX;
     result.SetI(INT_MIN);
-    args[0].SetI(INT_MAX);
-    args[1].SetI(INT_MAX);
-    args[2].SetI(INT_MAX);
-    args[3].SetI(INT_MAX);
-    args[4].SetI(INT_MAX);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
     EXPECT_EQ(2147483643, result.GetI());
   }
 
@@ -400,146 +461,262 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "sum", "(DD)D");
+    DoCompile(method, receiver, is_static, "sum", "(DD)D");
 
-    JValue args[2];
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
+    JValue value;
+    JValue value2;
     JValue result;
+    JValue float_result;
 
-    args[0].SetD(0.0);
-    args[1].SetD(0.0);
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    value.SetD(0.0);
+    value2.SetD(0.0);
+    arg_array.AppendWide(value.GetJ());
+    arg_array.AppendWide(value2.GetJ());
     result.SetD(-1.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(0.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(0.0, float_result.GetD());
 
-    args[0].SetD(1.0);
-    args[1].SetD(2.0);
+    value.SetD(1.0);
+    value2.SetD(2.0);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(3.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(3.0, float_result.GetD());
 
-    args[0].SetD(1.0);
-    args[1].SetD(-2.0);
+    value.SetD(1.0);
+    value2.SetD(-2.0);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(-1.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(-1.0, float_result.GetD());
 
-    args[0].SetD(DBL_MAX);
-    args[1].SetD(DBL_MIN);
+    value.SetD(DBL_MAX);
+    value2.SetD(DBL_MIN);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(1.7976931348623157e308, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(1.7976931348623157e308, float_result.GetD());
 
-    args[0].SetD(DBL_MAX);
-    args[1].SetD(DBL_MAX);
+    value.SetD(DBL_MAX);
+    value2.SetD(DBL_MAX);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(INFINITY, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(INFINITY, float_result.GetD());
   }
 
   void InvokeSumDoubleDoubleDoubleMethod(bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "sum", "(DDD)D");
+    DoCompile(method, receiver, is_static, "sum", "(DDD)D");
 
-    JValue args[3];
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
+    JValue value;
+    JValue value2;
+    JValue value3;
     JValue result;
+    JValue float_result;
 
-    args[0].SetD(0.0);
-    args[1].SetD(0.0);
-    args[2].SetD(0.0);
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    value.SetD(0.0);
+    value2.SetD(0.0);
+    value3.SetD(0.0);
+    arg_array.AppendWide(value.GetJ());
+    arg_array.AppendWide(value2.GetJ());
+    arg_array.AppendWide(value3.GetJ());
     result.SetD(-1.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(0.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(0.0, float_result.GetD());
 
-    args[0].SetD(1.0);
-    args[1].SetD(2.0);
-    args[2].SetD(3.0);
+    value.SetD(1.0);
+    value2.SetD(2.0);
+    value3.SetD(3.0);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
+    args[4] = value3.GetJ();
+    args[5] = value3.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(6.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(6.0, float_result.GetD());
 
-    args[0].SetD(1.0);
-    args[1].SetD(-2.0);
-    args[2].SetD(3.0);
+    value.SetD(1.0);
+    value2.SetD(-2.0);
+    value3.SetD(3.0);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
+    args[4] = value3.GetJ();
+    args[5] = value3.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(2.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(2.0, float_result.GetD());
   }
 
   void InvokeSumDoubleDoubleDoubleDoubleMethod(bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "sum", "(DDDD)D");
+    DoCompile(method, receiver, is_static, "sum", "(DDDD)D");
 
-    JValue args[4];
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
+    JValue value;
+    JValue value2;
+    JValue value3;
+    JValue value4;
     JValue result;
+    JValue float_result;
 
-    args[0].SetD(0.0);
-    args[1].SetD(0.0);
-    args[2].SetD(0.0);
-    args[3].SetD(0.0);
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    value.SetD(0.0);
+    value2.SetD(0.0);
+    value3.SetD(0.0);
+    value4.SetD(0.0);
+    arg_array.AppendWide(value.GetJ());
+    arg_array.AppendWide(value2.GetJ());
+    arg_array.AppendWide(value3.GetJ());
+    arg_array.AppendWide(value4.GetJ());
     result.SetD(-1.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(0.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(0.0, float_result.GetD());
 
-    args[0].SetD(1.0);
-    args[1].SetD(2.0);
-    args[2].SetD(3.0);
-    args[3].SetD(4.0);
+    value.SetD(1.0);
+    value2.SetD(2.0);
+    value3.SetD(3.0);
+    value4.SetD(4.0);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
+    args[4] = value3.GetJ();
+    args[5] = value3.GetJ() >> 32;
+    args[6] = value4.GetJ();
+    args[7] = value4.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(10.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(10.0, float_result.GetD());
 
-    args[0].SetD(1.0);
-    args[1].SetD(-2.0);
-    args[2].SetD(3.0);
-    args[3].SetD(-4.0);
+    value.SetD(1.0);
+    value2.SetD(-2.0);
+    value3.SetD(3.0);
+    value4.SetD(-4.0);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
+    args[4] = value3.GetJ();
+    args[5] = value3.GetJ() >> 32;
+    args[6] = value4.GetJ();
+    args[7] = value4.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(-2.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(-2.0, float_result.GetD());
   }
 
   void InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::AbstractMethod* method;
     mirror::Object* receiver;
-    mirror::AbstractMethod::InvokeStub* stub =
-        DoCompile(method, receiver, is_static, "sum", "(DDDDD)D");
+    DoCompile(method, receiver, is_static, "sum", "(DDDDD)D");
 
-    JValue args[5];
+    ArgArray arg_array(NULL, 0);
+    uint32_t* args = arg_array.GetArray();
+    JValue value;
+    JValue value2;
+    JValue value3;
+    JValue value4;
+    JValue value5;
     JValue result;
+    JValue float_result;
 
-    args[0].SetD(0.0);
-    args[1].SetD(0.0);
-    args[2].SetD(0.0);
-    args[3].SetD(0.0);
-    args[4].SetD(0.0);
+    if (!is_static) {
+      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      args++;
+    }
+
+    value.SetD(0.0);
+    value2.SetD(0.0);
+    value3.SetD(0.0);
+    value4.SetD(0.0);
+    value5.SetD(0.0);
+    arg_array.AppendWide(value.GetJ());
+    arg_array.AppendWide(value2.GetJ());
+    arg_array.AppendWide(value3.GetJ());
+    arg_array.AppendWide(value4.GetJ());
+    arg_array.AppendWide(value5.GetJ());
     result.SetD(-1.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(0.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(0.0, float_result.GetD());
 
-    args[0].SetD(1.0);
-    args[1].SetD(2.0);
-    args[2].SetD(3.0);
-    args[3].SetD(4.0);
-    args[4].SetD(5.0);
+    value.SetD(1.0);
+    value2.SetD(2.0);
+    value3.SetD(3.0);
+    value4.SetD(4.0);
+    value5.SetD(5.0);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
+    args[4] = value3.GetJ();
+    args[5] = value3.GetJ() >> 32;
+    args[6] = value4.GetJ();
+    args[7] = value4.GetJ() >> 32;
+    args[8] = value5.GetJ();
+    args[9] = value5.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(15.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(15.0, float_result.GetD());
 
-    args[0].SetD(1.0);
-    args[1].SetD(-2.0);
-    args[2].SetD(3.0);
-    args[3].SetD(-4.0);
-    args[4].SetD(5.0);
+    value.SetD(1.0);
+    value2.SetD(-2.0);
+    value3.SetD(3.0);
+    value4.SetD(-4.0);
+    value5.SetD(5.0);
+    args[0] = value.GetJ();
+    args[1] = value.GetJ() >> 32;
+    args[2] = value2.GetJ();
+    args[3] = value2.GetJ() >> 32;
+    args[4] = value3.GetJ();
+    args[5] = value3.GetJ() >> 32;
+    args[6] = value4.GetJ();
+    args[7] = value4.GetJ() >> 32;
+    args[8] = value5.GetJ();
+    args[9] = value5.GetJ() >> 32;
     result.SetD(0.0);
-    (*stub)(method, NULL, Thread::Current(), args, &result);
-    EXPECT_EQ(3.0, result.GetD());
+    (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
+    EXPECT_EQ(3.0, float_result.GetD());
   }
 
   JavaVMExt* vm_;
@@ -1420,12 +1597,12 @@
   mirror::AbstractMethod* method = klass->FindDirectMethod("main", "([Ljava/lang/String;)V");
   ASSERT_TRUE(method != NULL);
 
-  mirror::AbstractMethod::InvokeStub* stub = method->GetInvokeStub();
+  ArgArray arg_array(NULL, 0);
+  arg_array.Append(0);
+  JValue result;
+  JValue float_result;
 
-  JValue args[1];
-  args[0].SetL(NULL);
-
-  (*stub)(method, NULL, Thread::Current(), args, NULL);
+  (*art_quick_invoke_stub)(method, arg_array.GetArray(), arg_array.GetNumBytes(), Thread::Current(), &result, &float_result);
 }
 
 TEST_F(JniInternalTest, StaticNopMethod) {
diff --git a/src/mirror/abstract_method.cc b/src/mirror/abstract_method.cc
index 202fa2f..e185c9c 100644
--- a/src/mirror/abstract_method.cc
+++ b/src/mirror/abstract_method.cc
@@ -31,6 +31,9 @@
 namespace art {
 namespace mirror {
 
+extern "C" void art_quick_invoke_stub(AbstractMethod*, uint32_t*, uint32_t,
+                                      Thread*, JValue*, JValue*);
+
 // TODO: get global references for these
 Class* AbstractMethod::java_lang_reflect_Constructor_ = NULL;
 Class* AbstractMethod::java_lang_reflect_Method_ = NULL;
@@ -276,7 +279,8 @@
   return DexFile::kDexNoIndex;
 }
 
-void AbstractMethod::Invoke(Thread* self, Object* receiver, JValue* args, JValue* result) {
+void AbstractMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
+                            JValue* float_result) {
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
     CHECK_EQ(kRunnable, self->GetState());
@@ -294,47 +298,83 @@
     LOG(INFO) << "Not invoking " << PrettyMethod(this) << " for a runtime that isn't started";
     if (result != NULL) {
       result->SetJ(0);
+      float_result->SetJ(0);
     }
   } else {
     bool interpret = self->ReadFlag(kEnterInterpreter) && !IsNative() && !IsProxyMethod();
     const bool kLogInvocationStartAndReturn = false;
-    if (!interpret && GetCode() != NULL && stub != NULL) {
-      if (kLogInvocationStartAndReturn) {
-        LOG(INFO) << StringPrintf("Invoking '%s' code=%p stub=%p",
-                                  PrettyMethod(this).c_str(), GetCode(), stub);
-      }
-      (*stub)(this, receiver, self, args, result);
-      if (UNLIKELY(reinterpret_cast<int32_t>(self->GetException()) == -1)) {
-        // Unusual case where we were running LLVM generated code and an
-        // exception was thrown to force the activations to be removed from the
-        // stack. Continue execution in the interpreter.
-        JValue value;
-        self->ClearException();
-        ShadowFrame* shadow_frame = self->GetAndClearDeoptimizationShadowFrame(&value);
-        self->SetTopOfShadowStack(shadow_frame);
-        interpreter::EnterInterpreterFromLLVM(self, shadow_frame, result);
-      }
-      if (kLogInvocationStartAndReturn) {
-        LOG(INFO) << StringPrintf("Returned '%s' code=%p stub=%p",
-                                  PrettyMethod(this).c_str(), GetCode(), stub);
-      }
-    } else {
-      const bool kInterpretMethodsWithNoCode = false;
-      if (interpret || kInterpretMethodsWithNoCode) {
+    if (GetCode() != NULL) {
+      if (!interpret) {
+        if (kLogInvocationStartAndReturn) {
+          LOG(INFO) << StringPrintf("Invoking '%s' code=%p stub=%p",
+                                    PrettyMethod(this).c_str(), GetCode(), stub);
+        }
+        // TODO: Temporary to keep portable working while stubs are removed from quick.
+#ifdef ART_USE_PORTABLE_COMPILER
+        MethodHelper mh(this);
+        const char* shorty = mh.GetShorty();
+        uint32_t shorty_len = mh.GetShortyLength();
+        UniquePtr<JValue[]> jvalue_args(new JValue[shorty_len - 1]);
+        Object* receiver = NULL;
+        uint32_t* ptr = args;
+        if (!this->IsStatic()) {
+          receiver = reinterpret_cast<Object*>(*ptr);
+          ptr++;
+        }
+        for (uint32_t i = 1; i < shorty_len; i++) {
+          if ((shorty[i] == 'J') || (shorty[i] == 'D')) {
+            jvalue_args[i - 1].SetJ(*((uint64_t*)ptr));
+            ptr++;
+          } else {
+            jvalue_args[i - 1].SetI(*ptr);
+          }
+          ptr++;
+        }
+        if (mh.IsReturnFloatOrDouble()) {
+          (*stub)(this, receiver, self, jvalue_args.get(), float_result);
+        } else {
+          (*stub)(this, receiver, self, jvalue_args.get(), result);
+        }
+#else
+        (*art_quick_invoke_stub)(this, args, args_size, self, result, float_result);
+#endif
+        if (UNLIKELY(reinterpret_cast<int32_t>(self->GetException()) == -1)) {
+          // Unusual case where we were running LLVM generated code and an
+          // exception was thrown to force the activations to be removed from the
+          // stack. Continue execution in the interpreter.
+          JValue value;
+          self->ClearException();
+          ShadowFrame* shadow_frame = self->GetAndClearDeoptimizationShadowFrame(&value);
+          self->SetTopOfShadowStack(shadow_frame);
+          interpreter::EnterInterpreterFromLLVM(self, shadow_frame, result);
+        }
+        if (kLogInvocationStartAndReturn) {
+          LOG(INFO) << StringPrintf("Returned '%s' code=%p stub=%p",
+                                    PrettyMethod(this).c_str(), GetCode(), stub);
+        }
+      } else {
         if (kLogInvocationStartAndReturn) {
           LOG(INFO) << "Interpreting " << PrettyMethod(this) << "'";
         }
-        art::interpreter::EnterInterpreterFromInvoke(self, this, receiver, args, result);
+        if (this->IsStatic()) {
+          art::interpreter::EnterInterpreterFromInvoke(self, this, NULL, args,
+                                                       result, float_result);
+        } else {
+          Object* receiver = reinterpret_cast<Object*>(args[0]);
+          art::interpreter::EnterInterpreterFromInvoke(self, this, receiver, args + 1,
+                                                       result, float_result);
+        }
         if (kLogInvocationStartAndReturn) {
           LOG(INFO) << "Returned '" << PrettyMethod(this) << "'";
         }
-      } else {
-        LOG(INFO) << "Not invoking '" << PrettyMethod(this)
-              << "' code=" << reinterpret_cast<const void*>(GetCode())
-              << " stub=" << reinterpret_cast<void*>(stub);
-        if (result != NULL) {
-          result->SetJ(0);
-        }
+      }
+    } else {
+      LOG(INFO) << "Not invoking '" << PrettyMethod(this)
+          << "' code=" << reinterpret_cast<const void*>(GetCode())
+          << " stub=" << reinterpret_cast<void*>(stub);
+      if (result != NULL) {
+        result->SetJ(0);
+        float_result->SetJ(0);
       }
     }
   }
diff --git a/src/mirror/abstract_method.h b/src/mirror/abstract_method.h
index 1d57abb..a489b1d 100644
--- a/src/mirror/abstract_method.h
+++ b/src/mirror/abstract_method.h
@@ -54,6 +54,10 @@
     return MemberOffset(OFFSETOF_MEMBER(AbstractMethod, declaring_class_));
   }
 
+  static MemberOffset CodeOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(AbstractMethod, code_));
+  }
+
   uint32_t GetAccessFlags() const;
 
   void SetAccessFlags(uint32_t new_access_flags) {
@@ -189,7 +193,8 @@
   // Find the method that this method overrides
   AbstractMethod* FindOverriddenMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void Invoke(Thread* self, Object* receiver, JValue* args, JValue* result)
+  void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
+              JValue* float_result)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const void* GetCode() const {
diff --git a/src/mirror/object_test.cc b/src/mirror/object_test.cc
index 29cf2f1..eed96bd 100644
--- a/src/mirror/object_test.cc
+++ b/src/mirror/object_test.cc
@@ -74,6 +74,8 @@
   ASSERT_EQ(STRING_COUNT_OFFSET, String::CountOffset().Int32Value());
   ASSERT_EQ(STRING_OFFSET_OFFSET, String::OffsetOffset().Int32Value());
   ASSERT_EQ(STRING_DATA_OFFSET, Array::DataOffset(sizeof(uint16_t)).Int32Value());
+
+  ASSERT_EQ(METHOD_CODE_OFFSET, AbstractMethod::CodeOffset().Int32Value());
 }
 
 TEST_F(ObjectTest, IsInSamePackage) {
diff --git a/src/oat/runtime/arm/runtime_support_arm.S b/src/oat/runtime/arm/runtime_support_arm.S
index 6067dd5..bd3f45d 100644
--- a/src/oat/runtime/arm/runtime_support_arm.S
+++ b/src/oat/runtime/arm/runtime_support_arm.S
@@ -103,13 +103,16 @@
     push {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
     .save {r1-r3, r5-r8, r10-r11, lr}
     .cfi_adjust_cfa_offset 40
-    .cfi_rel_offset r5, 0
-    .cfi_rel_offset r6, 4
-    .cfi_rel_offset r7, 8
-    .cfi_rel_offset r8, 12
-    .cfi_rel_offset r10, 16
-    .cfi_rel_offset r11, 20
-    .cfi_rel_offset lr, 24
+    .cfi_rel_offset r1, 0
+    .cfi_rel_offset r2, 4
+    .cfi_rel_offset r3, 8
+    .cfi_rel_offset r5, 12
+    .cfi_rel_offset r6, 16
+    .cfi_rel_offset r7, 20
+    .cfi_rel_offset r8, 24
+    .cfi_rel_offset r10, 28
+    .cfi_rel_offset r11, 32
+    .cfi_rel_offset lr, 36
     sub sp, #8                        @ 2 words of space, bottom word will hold Method*
     .pad #8
     .cfi_adjust_cfa_offset 8
@@ -244,6 +247,53 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
+     * Invocation stub.
+     * On entry:
+     *   r0 = method pointer
+     *   r1 = argument array or NULL for no argument methods
+     *   r2 = size of argument array in bytes
+     *   r3 = (managed) thread pointer
+     *   [sp] = JValue* result for non-floating point returns
+     *   [sp + 4] = JValue* result for floating point returns
+     */
+ENTRY art_quick_invoke_stub
+    push   {r0, r4, r5, r9, r11, lr}       @ spill regs
+    .save  {r0, r4, r5, r9, r11, lr}
+    .pad #24
+    .cfi_adjust_cfa_offset 24
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r4, 4
+    .cfi_rel_offset r5, 8
+    .cfi_rel_offset r9, 12
+    .cfi_rel_offset r11, 16
+    .cfi_rel_offset lr, 20
+    mov    r11, sp                         @ save the stack pointer
+    .cfi_def_cfa_register r11
+    mov    r9, r3                          @ move managed thread pointer into r9
+    mov    r4, #SUSPEND_CHECK_INTERVAL     @ reset r4 to suspend check interval
+    add    r5, r2, #16                     @ create space for method pointer in frame
+    and    r5, #0xFFFFFFF8                 @ align frame size to 16 bytes
+    sub    sp, r5                          @ reserve stack space for argument array
+    add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
+    bl     memcpy                          @ memcpy (dest, src, bytes)
+    ldr    r0, [r11]                       @ restore method*
+    ldr    r1, [sp, #4]                    @ copy arg value for r1
+    ldr    r2, [sp, #8]                    @ copy arg value for r2
+    ldr    r3, [sp, #12]                   @ copy arg value for r3
+    mov    ip, #0                          @ set ip to 0
+    str    ip, [sp]                        @ store NULL for method* at bottom of frame
+    ldr    ip, [r0, #METHOD_CODE_OFFSET]   @ get pointer to the code
+    blx    ip                              @ call the method
+    add    sp, r5                          @ restore the stack
+    ldr    ip, [sp, #24]                   @ load the result pointer
+    strd   r0, [ip]                        @ store r0/r1 into result pointer
+    ldr    ip, [sp, #28]                   @ load the floating point result pointer
+    strd   r0, [ip]                        @ store r0/r1 into floating point result pointer
+    pop    {r0, r4, r5, r9, r11, lr}       @ restore spill regs
+    .cfi_adjust_cfa_offset -24
+    bx     lr
+END art_quick_invoke_stub
+    /*
      * On entry, r0 and r1 must be preserved, r2 is dex PC
      */
     .extern artUpdateDebuggerFromCode
diff --git a/src/oat/runtime/mips/runtime_support_mips.S b/src/oat/runtime/mips/runtime_support_mips.S
index 56535b2..cc41d14 100644
--- a/src/oat/runtime/mips/runtime_support_mips.S
+++ b/src/oat/runtime/mips/runtime_support_mips.S
@@ -427,6 +427,63 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
+     * Invocation stub.
+     * On entry:
+     *   a0 = method pointer
+     *   a1 = argument array or NULL for no argument methods
+     *   a2 = size of argument array in bytes
+     *   a3 = (managed) thread pointer
+     *   [sp + 16] = JValue* result for non-floating point returns
+     *   [sp + 20] = JValue* result for floating point returns
+     */
+ENTRY art_quick_invoke_stub
+    GENERATE_GLOBAL_POINTER
+    sw    $a0, 0($sp)           # save out a0
+    addiu $sp, $sp, -16         # spill s0, s1, fp, ra
+    .cfi_adjust_cfa_offset 16
+    sw    $ra, 12($sp)
+    .cfi_rel_offset 31, 12
+    sw    $fp, 8($sp)
+    .cfi_rel_offset 30, 8
+    sw    $s1, 4($sp)
+    .cfi_rel_offset 17, 4
+    sw    $s0, 0($sp)
+    .cfi_rel_offset 16, 0
+    move  $fp, $sp              # save sp in fp
+    .cfi_def_cfa_register 30
+    move  $s1, $a3              # move managed thread pointer into s1
+    addiu $s0, $zero, SUSPEND_CHECK_INTERVAL  # reset s0 to suspend check interval
+    addiu $t0, $a2, 16          # create space for method pointer in frame
+    srl   $t0, $t0, 3           # shift the frame size right 3
+    sll   $t0, $t0, 3           # shift the frame size left 3 to align to 16 bytes
+    subu  $sp, $sp, $t0         # reserve stack space for argument array
+    addiu $a0, $sp, 4           # pass stack pointer + method ptr as dest for memcpy
+    jal   memcpy                # (dest, src, bytes)
+    addiu $sp, $sp, -16         # make space for argument slots for memcpy
+    addiu $sp, $sp, 16          # restore stack after memcpy
+    lw    $a0, 16($fp)          # restore method*
+    lw    $a1, 4($sp)           # copy arg value for a1
+    lw    $a2, 8($sp)           # copy arg value for a2
+    lw    $a3, 12($sp)          # copy arg value for a3
+    lw    $t9, METHOD_CODE_OFFSET($a0)  # get pointer to the code
+    jalr  $t9                   # call the method
+    sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
+    move  $sp, $fp              # restore the stack
+    lw    $s0, 0($sp)
+    lw    $s1, 4($sp)
+    lw    $fp, 8($sp)
+    lw    $ra, 12($sp)
+    addiu $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+    lw    $t0, 16($sp)          # get result pointer
+    sw    $v0, 0($t0)           # store the result
+    sw    $v1, 4($t0)           # store the other half of the result
+    lw    $t0, 20($sp)          # get floating point result pointer
+    jr    $ra
+    s.d   $f0, 0($t0)           # store floating point result
+END art_quick_invoke_stub
+
+    /*
      * Entry point of native methods when JNI bug compatibility is enabled.
      */
     .extern artWorkAroundAppJniBugs
diff --git a/src/oat/runtime/x86/runtime_support_x86.S b/src/oat/runtime/x86/runtime_support_x86.S
index 0ff69d9..32d657d 100644
--- a/src/oat/runtime/x86/runtime_support_x86.S
+++ b/src/oat/runtime/x86/runtime_support_x86.S
@@ -301,6 +301,50 @@
 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
+    /*
+     * Invocation stub.
+     * On entry:
+     *   [sp] = return address
+     *   [sp + 4] = method pointer
+     *   [sp + 8] = argument array or NULL for no argument methods
+     *   [sp + 12] = size of argument array in bytes
+     *   [sp + 16] = (managed) thread pointer
+     *   [sp + 20] = JValue* result for non-floating point returns
+     *   [sp + 24] = JValue* result for floating point returns
+     */
+DEFINE_FUNCTION art_quick_invoke_stub
+    PUSH ebp                      // save ebp
+    PUSH ebx                      // save ebx
+    mov %esp, %ebp                // copy value of stack pointer into base pointer
+    .cfi_def_cfa_register ebp
+    mov 20(%ebp), %ebx            // get arg array size
+    addl LITERAL(28), %ebx        // reserve space for return addr, method*, ebx, and ebp in frame
+    andl LITERAL(0xFFFFFFF8), %ebx    // align frame size to 16 bytes
+    subl LITERAL(12), %ebx        // remove space for return address, ebx, and ebp
+    subl %ebx, %esp               // reserve stack space for argument array
+    lea  4(%esp), %eax            // use stack pointer + method ptr as dest for memcpy
+    pushl 20(%ebp)                // push size of region to memcpy
+    pushl 16(%ebp)                // push arg array as source of memcpy
+    pushl %eax                    // push stack pointer as destination of memcpy
+    call SYMBOL(memcpy)           // (void*, const void*, size_t)
+    addl LITERAL(12), %esp        // pop arguments to memcpy
+    movl LITERAL(0), (%esp)       // store NULL for method*
+    mov 12(%ebp), %eax            // move method pointer into eax
+    mov 4(%esp), %ecx             // copy arg1 into ecx
+    mov 8(%esp), %edx             // copy arg2 into edx
+    mov 12(%esp), %ebx            // copy arg3 into ebx
+    call METHOD_CODE_OFFSET(%eax) // call the method
+    mov %ebp, %esp                // restore stack pointer
+    POP ebx                       // pop ebx
+    POP ebp                       // pop ebp
+    mov 20(%esp), %ecx            // get result pointer
+    mov %eax, (%ecx)              // store the result
+    mov %edx, 4(%ecx)             // store the other half of the result
+    mov 24(%esp), %ecx            // get floating point result pointer
+    movsd %xmm0, (%ecx)           // store the floating point result
+    ret
+END_FUNCTION art_quick_invoke_stub
+
 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name, 0)
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
diff --git a/src/object_utils.h b/src/object_utils.h
index 18ad312..2c9f7a2 100644
--- a/src/object_utils.h
+++ b/src/object_utils.h
@@ -596,6 +596,11 @@
     return GetParamPrimitiveType(param) == Primitive::kPrimNot;
   }
 
+  bool IsReturnFloatOrDouble() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    const char ret_shorty = GetReturnTypeDescriptor()[0];
+    return (ret_shorty == 'F') || (ret_shorty == 'D');
+  }
+
   bool HasSameNameAndSignature(MethodHelper* other)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (GetDexCache() == other->GetDexCache()) {
diff --git a/src/reflection.cc b/src/reflection.cc
index d678ebd..addb5a3 100644
--- a/src/reflection.cc
+++ b/src/reflection.cc
@@ -17,6 +17,7 @@
 #include "reflection.h"
 
 #include "class_linker.h"
+#include "invoke_arg_array_builder.h"
 #include "jni_internal.h"
 #include "mirror/abstract_method.h"
 #include "mirror/abstract_method-inl.h"
@@ -243,9 +244,18 @@
   if (kIsDebugBuild) {
     CHECK_EQ(soa.Self()->GetState(), kRunnable);
   }
-  JValue args[1] = { value };
+
+  ArgArray arg_array(NULL, 0);
   JValue result;
-  soa.DecodeMethod(m)->Invoke(soa.Self(), NULL, args, &result);
+  JValue float_result;
+  if (src_class == Primitive::kPrimDouble || src_class == Primitive::kPrimLong) {
+    arg_array.AppendWide(value.GetJ());
+  } else {
+    arg_array.Append(value.GetI());
+  }
+
+  soa.DecodeMethod(m)->Invoke(soa.Self(), arg_array.GetArray(), arg_array.GetNumBytes(),
+                              &result, &float_result);
   return result.GetL();
 }
 
diff --git a/src/runtime.cc b/src/runtime.cc
index 9b2dca7..1e7b000 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -39,6 +39,7 @@
 #include "image.h"
 #include "instrumentation.h"
 #include "intern_table.h"
+#include "invoke_arg_array_builder.h"
 #include "jni_internal.h"
 #include "mirror/abstract_method-inl.h"
 #include "mirror/array.h"
@@ -628,8 +629,11 @@
       class_loader_class->FindDirectMethod("getSystemClassLoader", "()Ljava/lang/ClassLoader;");
   CHECK(getSystemClassLoader != NULL);
 
-  mirror::ClassLoader* class_loader =
-    down_cast<mirror::ClassLoader*>(InvokeWithJValues(soa, NULL, getSystemClassLoader, NULL).GetL());
+  JValue result;
+  JValue float_result;
+  ArgArray arg_array(NULL, 0);
+  InvokeWithArgArray(soa, getSystemClassLoader, &arg_array, &result, &float_result);
+  mirror::ClassLoader* class_loader = down_cast<mirror::ClassLoader*>(result.GetL());
   CHECK(class_loader != NULL);
 
   soa.Self()->SetClassLoaderOverride(class_loader);
diff --git a/src/thread.cc b/src/thread.cc
index 394d263..96e7a8f 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -37,6 +37,7 @@
 #include "gc_map.h"
 #include "gc/card_table-inl.h"
 #include "heap.h"
+#include "invoke_arg_array_builder.h"
 #include "jni_internal.h"
 #include "mirror/abstract_method-inl.h"
 #include "mirror/class-inl.h"
@@ -161,7 +162,11 @@
     jmethodID mid = WellKnownClasses::java_lang_Thread_run;
     mirror::AbstractMethod* m =
         receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
-    m->Invoke(self, receiver, NULL, NULL);
+    JValue result;
+    JValue float_result;
+    ArgArray arg_array(NULL, 0);
+    arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+    m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, &float_result);
   }
   // Detach and delete self.
   Runtime::Current()->GetThreadList()->Unregister(self);
