Change 64 bit ArtMethod fields to be pointer sized
Changed the 64 bit entrypoint and gc map fields in ArtMethod to be
pointer sized. This saves a large amount of memory on 32 bit systems.
Reduces ArtMethod size by 16 bytes on 32 bit.
Total number of ArtMethod on low memory mako: 169957
Image size: 49203 methods -> 787248 image size reduction.
Zygote space size: 1070 methods -> 17120 size reduction.
App methods: ~120k -> 2 MB savings.
Savings per app on low memory mako: 125K+ per app
(less active apps -> more image methods per app).
Savings depend on how often the shared methods are on dirty pages vs
shared.
TODO in another CL, delete gc map field from ArtMethod since we
should be able to get it from the Oat method header.
Bug: 17643507
Change-Id: Ie9508f05907a9f693882d4d32a564460bf273ee8
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 250924a..2481128 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -84,7 +84,9 @@
}
uintptr_t QuickCompiler::GetEntryPointOf(mirror::ArtMethod* method) const {
- return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode());
+ size_t pointer_size = InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet());
+ return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
+ pointer_size));
}
bool QuickCompiler::WriteElf(art::File* file,
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 7958886..7d89e19 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -477,9 +477,10 @@
const RegStorage* alt_from,
const CompilationUnit* cu, Mir2Lir* cg) {
if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
+ int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ InstructionSetPointerSize(cu->instruction_set)).Int32Value();
// Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
- cg->LoadWordDisp(alt_from == nullptr ? cg->TargetReg(kArg0, kRef) : *alt_from,
- mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
+ cg->LoadWordDisp(alt_from == nullptr ? cg->TargetReg(kArg0, kRef) : *alt_from, offset,
cg->TargetPtrReg(kInvokeTgt));
return true;
}
@@ -1802,8 +1803,9 @@
call_inst =
reinterpret_cast<X86Mir2Lir*>(this)->CallWithLinkerFixup(target_method, info->type);
} else {
- call_inst = OpMem(kOpBlx, TargetReg(kArg0, kRef),
- mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+ int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ InstructionSetPointerSize(cu_->instruction_set)).Int32Value();
+ call_inst = OpMem(kOpBlx, TargetReg(kArg0, kRef), offset);
}
} else {
call_inst = GenInvokeNoInlineCall(this, info->type);
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 871889f..1b66adb 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -81,6 +81,7 @@
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet());
std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
if (oat_file.get() == NULL) {
LOG(ERROR) << "Failed to open oat file " << oat_filename << " for " << oat_location;
@@ -212,7 +213,14 @@
void ImageWriter::AssignImageOffset(mirror::Object* object) {
DCHECK(object != nullptr);
SetImageOffset(object, image_end_);
- image_end_ += RoundUp(object->SizeOf(), 8); // 64-bit alignment
+ size_t object_size;
+ if (object->IsArtMethod()) {
+ // Methods are sized based on the target pointer size.
+ object_size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
+ } else {
+ object_size = object->SizeOf();
+ }
+ image_end_ += RoundUp(object_size, 8); // 64-bit alignment
DCHECK_LT(image_end_, image_->Size());
}
@@ -609,7 +617,14 @@
size_t offset = image_writer->GetImageOffset(obj);
byte* dst = image_writer->image_->Begin() + offset;
const byte* src = reinterpret_cast<const byte*>(obj);
- size_t n = obj->SizeOf();
+ size_t n;
+ if (obj->IsArtMethod()) {
+ // Size without pointer fields since we don't want to overrun the buffer if target art method
+ // is 32 bits but source is 64 bits.
+ n = mirror::ArtMethod::SizeWithoutPointerFields();
+ } else {
+ n = obj->SizeOf();
+ }
DCHECK_LT(offset + n, image_writer->image_->Size());
memcpy(dst, src, n);
Object* copy = reinterpret_cast<Object*>(dst);
@@ -688,6 +703,10 @@
}
if (orig->IsArtMethod<kVerifyNone>()) {
FixupMethod(orig->AsArtMethod<kVerifyNone>(), down_cast<ArtMethod*>(copy));
+ } else if (orig->IsClass() && orig->AsClass()->IsArtMethodClass()) {
+ // Set the right size for the target.
+ size_t size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
+ down_cast<mirror::Class*>(copy)->SetObjectSizeWithoutChecks(size);
}
}
@@ -746,42 +765,63 @@
void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
// OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
// oat_begin_
+ // For 64 bit targets we need to repack the current runtime pointer sized fields to the right
+ // locations.
+ // Copy all of the fields from the runtime methods to the target methods first since we did a
+ // bytewise copy earlier.
+#if defined(ART_USE_PORTABLE_COMPILER)
+ copy->SetEntryPointFromPortableCompiledCodePtrSize<kVerifyNone>(
+ orig->GetEntryPointFromPortableCompiledCode(), target_ptr_size_);
+#endif
+ copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(orig->GetEntryPointFromInterpreter(),
+ target_ptr_size_);
+ copy->SetEntryPointFromJniPtrSize<kVerifyNone>(orig->GetEntryPointFromJni(), target_ptr_size_);
+ copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
+ orig->GetEntryPointFromQuickCompiledCode(), target_ptr_size_);
+ copy->SetNativeGcMapPtrSize<kVerifyNone>(orig->GetNativeGcMap(), target_ptr_size_);
// The resolution method has a special trampoline to call.
Runtime* runtime = Runtime::Current();
if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
#if defined(ART_USE_PORTABLE_COMPILER)
- copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_resolution_trampoline_offset_));
+ copy->SetEntryPointFromPortableCompiledCodePtrSize<kVerifyNone>(
+ GetOatAddress(portable_resolution_trampoline_offset_), target_ptr_size_);
#endif
- copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_resolution_trampoline_offset_));
+ copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
+ GetOatAddress(quick_resolution_trampoline_offset_), target_ptr_size_);
} else if (UNLIKELY(orig == runtime->GetImtConflictMethod() ||
orig == runtime->GetImtUnimplementedMethod())) {
#if defined(ART_USE_PORTABLE_COMPILER)
- copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_imt_conflict_trampoline_offset_));
+ copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(
+ GetOatAddress(portable_imt_conflict_trampoline_offset_), target_ptr_size_);
#endif
- copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_imt_conflict_trampoline_offset_));
+ copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
+ GetOatAddress(quick_imt_conflict_trampoline_offset_), target_ptr_size_);
} else {
// We assume all methods have code. If they don't currently then we set them to the use the
// resolution trampoline. Abstract methods never have code and so we need to make sure their
// use results in an AbstractMethodError. We use the interpreter to achieve this.
if (UNLIKELY(orig->IsAbstract())) {
#if defined(ART_USE_PORTABLE_COMPILER)
- copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_to_interpreter_bridge_offset_));
+ copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(
+ GetOatAddress(portable_to_interpreter_bridge_offset_), target_ptr_size_);
#endif
- copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_to_interpreter_bridge_offset_));
- copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
- (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
+ copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
+ GetOatAddress(quick_to_interpreter_bridge_offset_), target_ptr_size_);
+ copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(
+ reinterpret_cast<EntryPointFromInterpreter*>(const_cast<byte*>(
+ GetOatAddress(interpreter_to_interpreter_bridge_offset_))), target_ptr_size_);
} else {
bool quick_is_interpreted;
const byte* quick_code = GetQuickCode(orig, &quick_is_interpreted);
- copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
+ copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(quick_code, target_ptr_size_);
// Portable entrypoint:
bool portable_is_interpreted = false;
#if defined(ART_USE_PORTABLE_COMPILER)
const byte* portable_code = GetOatAddress(orig->GetPortableOatCodeOffset());
- if (portable_code != nullptr &&
- (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) {
+ if (portable_code != nullptr && (!orig->IsStatic() || orig->IsConstructor() ||
+ orig->GetDeclaringClass()->IsInitialized())) {
// We have code for a non-static or initialized method, just use the code.
} else if (portable_code == nullptr && orig->IsNative() &&
(!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) {
@@ -798,18 +838,20 @@
// initialization.
portable_code = GetOatAddress(portable_resolution_trampoline_offset_);
}
- copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code);
+ copy->SetEntryPointFromPortableCompiledCodePtrSize<kVerifyNone>(
+ portable_code, target_ptr_size_);
#endif
// JNI entrypoint:
if (orig->IsNative()) {
// The native method's pointer is set to a stub to lookup via dlsym.
// Note this is not the code_ pointer, that is handled above.
- copy->SetNativeMethod<kVerifyNone>(GetOatAddress(jni_dlsym_lookup_offset_));
+ copy->SetEntryPointFromJniPtrSize<kVerifyNone>(GetOatAddress(jni_dlsym_lookup_offset_),
+ target_ptr_size_);
} else {
// Normal (non-abstract non-native) methods have various tables to relocate.
uint32_t native_gc_map_offset = orig->GetOatNativeGcMapOffset();
- const byte* native_gc_map = GetOatAddress(native_gc_map_offset);
- copy->SetNativeGcMap<kVerifyNone>(reinterpret_cast<const uint8_t*>(native_gc_map));
+ const uint8_t* native_gc_map = GetOatAddress(native_gc_map_offset);
+ copy->SetNativeGcMapPtrSize<kVerifyNone>(native_gc_map, target_ptr_size_);
}
// Interpreter entrypoint:
@@ -817,9 +859,11 @@
uint32_t interpreter_code = (quick_is_interpreted && portable_is_interpreted)
? interpreter_to_interpreter_bridge_offset_
: interpreter_to_compiled_code_bridge_offset_;
- copy->SetEntryPointFromInterpreter<kVerifyNone>(
+ EntryPointFromInterpreter* interpreter_entrypoint =
reinterpret_cast<EntryPointFromInterpreter*>(
- const_cast<byte*>(GetOatAddress(interpreter_code))));
+ const_cast<byte*>(GetOatAddress(interpreter_code)));
+ copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(
+ interpreter_entrypoint, target_ptr_size_);
}
}
}
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 61365fe..6a9df10 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -204,6 +204,9 @@
uint32_t quick_to_interpreter_bridge_offset_;
bool compile_pic_;
+ // Size of pointers on the target architecture.
+ size_t target_ptr_size_;
+
friend class FixupVisitor;
friend class FixupClassVisitor;
DISALLOW_COPY_AND_ASSIGN(ImageWriter);
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index c38cfaf..35b7294 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -308,7 +308,9 @@
}
// 9. Plant call to native code associated with method.
- __ Call(main_jni_conv->MethodStackOffset(), mirror::ArtMethod::NativeMethodOffset(),
+ MemberOffset jni_entrypoint_offset = mirror::ArtMethod::EntryPointFromJniOffset(
+ InstructionSetPointerSize(instruction_set));
+ __ Call(main_jni_conv->MethodStackOffset(), jni_entrypoint_offset,
mr_conv->InterproceduralScratchRegister());
// 10. Fix differences in result widths.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2c954a0..7822ee5 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -854,8 +854,8 @@
// temp = temp[index_in_cache]
__ ldr(temp, Address(temp, index_in_cache));
// LR = temp[offset_of_quick_compiled_code]
- __ ldr(LR, Address(temp,
- mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
+ __ ldr(LR, Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArmPointerSize).Int32Value()));
// LR()
__ blx(LR);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f544d47..1b6fb6b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -796,7 +796,8 @@
// temp = temp[index_in_cache]
__ movl(temp, Address(temp, index_in_cache));
// (temp + offset_of_quick_compiled_code)()
- __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
+ __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kX86PointerSize).Int32Value()));
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke->GetDexPc());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e1807dc..1ee8271 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -738,7 +738,8 @@
// temp = temp[index_in_cache]
__ movl(temp, Address(temp, index_in_cache));
// (temp + offset_of_quick_compiled_code)()
- __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
+ __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kX86_64PointerSize).SizeValue()));
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke->GetDexPc());