Merge "Implement CFI for Optimizing."
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 0d3ca06..94322a8 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -104,6 +104,7 @@
optimizing/code_generator_arm64.cc \
optimizing/code_generator_x86.cc \
optimizing/code_generator_x86_64.cc \
+ optimizing/code_generator_utils.cc \
optimizing/constant_folding.cc \
optimizing/dead_code_elimination.cc \
optimizing/graph_checker.cc \
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 93d83c6..0850f42 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -403,13 +403,6 @@
DCHECK(bb != nullptr);
return c_unit->mir_graph->EliminateSuspendChecks(bb);
}
-
- void End(PassDataHolder* data) const {
- DCHECK(data != nullptr);
- CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
- DCHECK(c_unit != nullptr);
- c_unit->mir_graph->EliminateSuspendChecksEnd();
- }
};
} // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 95cff0a..85b1344 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -1085,7 +1085,6 @@
void EliminateDeadCodeEnd();
bool EliminateSuspendChecksGate();
bool EliminateSuspendChecks(BasicBlock* bb);
- void EliminateSuspendChecksEnd();
uint16_t GetGvnIFieldId(MIR* mir) const {
DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
@@ -1408,10 +1407,6 @@
uint16_t* sfield_ids; // Ditto.
GvnDeadCodeElimination* dce;
} gvn;
- // Suspend check elimination.
- struct {
- DexFileMethodInliner* inliner;
- } sce;
} temp_;
static const int kInvalidEntry = -1;
ArenaVector<BasicBlock*> block_list_;
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 831ad42..0c84b82 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -16,6 +16,8 @@
# include "mir_method_info.h"
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "dex/verified_method.h"
#include "driver/compiler_driver.h"
#include "driver/dex_compilation_unit.h"
@@ -64,6 +66,9 @@
const DexFile* const dex_file = mUnit->GetDexFile();
const bool use_jit = runtime->UseJit();
const VerifiedMethod* const verified_method = mUnit->GetVerifiedMethod();
+ DexFileToMethodInlinerMap* inliner_map = compiler_driver->GetMethodInlinerMap();
+ DexFileMethodInliner* default_inliner =
+ (inliner_map != nullptr) ? inliner_map->GetMethodInliner(dex_file) : nullptr;
for (auto it = method_infos, end = method_infos + count; it != end; ++it) {
// For quickened invokes, the dex method idx is actually the mir offset.
@@ -122,6 +127,7 @@
if (UNLIKELY(resolved_method == nullptr)) {
continue;
}
+
compiler_driver->GetResolvedMethodDexFileLocation(resolved_method,
&it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_method_idx_);
if (!it->IsQuickened()) {
@@ -133,6 +139,7 @@
it->vtable_idx_ =
compiler_driver->GetResolvedMethodVTableIndex(resolved_method, invoke_type);
}
+
MethodReference target_method(it->target_dex_file_, it->target_method_idx_);
int fast_path_flags = compiler_driver->IsFastInvoke(
soa, current_dex_cache, class_loader, mUnit, referrer_class.Get(), resolved_method,
@@ -140,10 +147,23 @@
const bool is_referrers_class = referrer_class.Get() == resolved_method->GetDeclaringClass();
const bool is_class_initialized =
compiler_driver->IsMethodsClassInitialized(referrer_class.Get(), resolved_method);
+
+ // Check if the target method is intrinsic or special.
+ InlineMethodFlags is_intrinsic_or_special = kNoInlineMethodFlags;
+ if (inliner_map != nullptr) {
+ auto* inliner = (target_method.dex_file == dex_file)
+ ? default_inliner
+ : inliner_map->GetMethodInliner(target_method.dex_file);
+ is_intrinsic_or_special = inliner->IsIntrinsicOrSpecial(target_method.dex_method_index);
+ }
+
uint16_t other_flags = it->flags_ &
- ~(kFlagFastPath | kFlagClassIsInitialized | (kInvokeTypeMask << kBitSharpTypeBegin));
+ ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
+ (kInvokeTypeMask << kBitSharpTypeBegin));
it->flags_ = other_flags |
(fast_path_flags != 0 ? kFlagFastPath : 0u) |
+ ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
+ ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
(static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
(is_referrers_class ? kFlagIsReferrersClass : 0u) |
(is_class_initialized ? kFlagClassIsInitialized : 0u);
diff --git a/compiler/dex/mir_method_info.h b/compiler/dex/mir_method_info.h
index e131c96..7230c46 100644
--- a/compiler/dex/mir_method_info.h
+++ b/compiler/dex/mir_method_info.h
@@ -127,6 +127,14 @@
return (flags_ & kFlagFastPath) != 0u;
}
+ bool IsIntrinsic() const {
+ return (flags_ & kFlagIsIntrinsic) != 0u;
+ }
+
+ bool IsSpecial() const {
+ return (flags_ & kFlagIsSpecial) != 0u;
+ }
+
bool IsReferrersClass() const {
return (flags_ & kFlagIsReferrersClass) != 0;
}
@@ -188,9 +196,11 @@
private:
enum {
kBitFastPath = kMethodInfoBitEnd,
+ kBitIsIntrinsic,
+ kBitIsSpecial,
kBitInvokeTypeBegin,
kBitInvokeTypeEnd = kBitInvokeTypeBegin + 3, // 3 bits for invoke type.
- kBitSharpTypeBegin,
+ kBitSharpTypeBegin = kBitInvokeTypeEnd,
kBitSharpTypeEnd = kBitSharpTypeBegin + 3, // 3 bits for sharp type.
kBitIsReferrersClass = kBitSharpTypeEnd,
kBitClassIsInitialized,
@@ -199,6 +209,8 @@
};
static_assert(kMethodLoweringInfoBitEnd <= 16, "Too many flags");
static constexpr uint16_t kFlagFastPath = 1u << kBitFastPath;
+ static constexpr uint16_t kFlagIsIntrinsic = 1u << kBitIsIntrinsic;
+ static constexpr uint16_t kFlagIsSpecial = 1u << kBitIsSpecial;
static constexpr uint16_t kFlagIsReferrersClass = 1u << kBitIsReferrersClass;
static constexpr uint16_t kFlagClassIsInitialized = 1u << kBitClassIsInitialized;
static constexpr uint16_t kFlagQuickened = 1u << kBitQuickened;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 5dcc903..9d7b4b4 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -1517,7 +1517,7 @@
continue;
}
const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir);
- if (!method_info.FastPath()) {
+ if (!method_info.FastPath() || !method_info.IsSpecial()) {
continue;
}
@@ -1659,10 +1659,6 @@
!HasInvokes()) { // No invokes to actually eliminate any suspend checks.
return false;
}
- if (cu_->compiler_driver != nullptr && cu_->compiler_driver->GetMethodInlinerMap() != nullptr) {
- temp_.sce.inliner =
- cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
- }
suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc);
return true;
}
@@ -1680,9 +1676,9 @@
uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u; // Start with all loop heads.
bool found_invoke = false;
for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
- if (IsInstructionInvoke(mir->dalvikInsn.opcode) &&
- (temp_.sce.inliner == nullptr ||
- !temp_.sce.inliner->IsIntrinsic(mir->dalvikInsn.vB, nullptr))) {
+ if ((IsInstructionInvoke(mir->dalvikInsn.opcode) ||
+ IsInstructionQuickInvoke(mir->dalvikInsn.opcode)) &&
+ !GetMethodLoweringInfo(mir).IsIntrinsic()) {
// Non-intrinsic invoke, rely on a suspend point in the invoked method.
found_invoke = true;
break;
@@ -1745,10 +1741,6 @@
return true;
}
-void MIRGraph::EliminateSuspendChecksEnd() {
- temp_.sce.inliner = nullptr;
-}
-
bool MIRGraph::CanThrow(MIR* mir) const {
if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) {
return false;
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 9ce5ebb..10a4337 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -474,7 +474,6 @@
for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
change = cu_.mir_graph->EliminateSuspendChecks(bb);
}
- cu_.mir_graph->EliminateSuspendChecksEnd();
}
SuspendCheckEliminationTest()
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 8e3f4ef..4ac6c0c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -413,6 +413,17 @@
return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
}
+InlineMethodFlags DexFileMethodInliner::IsIntrinsicOrSpecial(uint32_t method_index) {
+ ReaderMutexLock mu(Thread::Current(), lock_);
+ auto it = inline_methods_.find(method_index);
+ if (it != inline_methods_.end()) {
+ DCHECK_NE(it->second.flags & (kInlineIntrinsic | kInlineSpecial), 0);
+ return it->second.flags;
+ } else {
+ return kNoInlineMethodFlags;
+ }
+}
+
bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) {
ReaderMutexLock mu(Thread::Current(), lock_);
auto it = inline_methods_.find(method_index);
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index cb521da..d1e5621 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -65,6 +65,11 @@
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
/**
+ * Check whether a particular method index corresponds to an intrinsic or special function.
+ */
+ InlineMethodFlags IsIntrinsicOrSpecial(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+
+ /**
* Check whether a particular method index corresponds to an intrinsic function.
*/
bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index e747239..db7095d 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1435,10 +1435,12 @@
void Mir2Lir::GenInvoke(CallInfo* info) {
DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
- const DexFile* dex_file = info->method_ref.dex_file;
- if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file)
- ->GenIntrinsic(this, info)) {
- return;
+ if (mir_graph_->GetMethodLoweringInfo(info->mir).IsIntrinsic()) {
+ const DexFile* dex_file = info->method_ref.dex_file;
+ auto* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file);
+ if (inliner->GenIntrinsic(this, info)) {
+ return;
+ }
}
GenInvokeNoInline(info);
}
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 487d31c..e779479 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -1156,7 +1156,7 @@
mir_graph_->GetCurrentDexCompilationUnit(), mir->offset)) {
break; // No code generated.
}
- if (!needs_access_check && !use_declaring_class && pc_rel_temp_ != nullptr) {
+ if (!needs_access_check && !use_declaring_class && CanUseOpPcRelDexCacheArrayLoad()) {
uses_pc_rel_load = true; // And ignore method use in slow path.
dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
} else {
@@ -1166,7 +1166,7 @@
}
case Instruction::CONST_CLASS:
- if (pc_rel_temp_ != nullptr &&
+ if (CanUseOpPcRelDexCacheArrayLoad() &&
cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
mir->dalvikInsn.vB)) {
uses_pc_rel_load = true; // And ignore method use in slow path.
@@ -1178,7 +1178,7 @@
case Instruction::CONST_STRING:
case Instruction::CONST_STRING_JUMBO:
- if (pc_rel_temp_ != nullptr) {
+ if (CanUseOpPcRelDexCacheArrayLoad()) {
uses_pc_rel_load = true; // And ignore method use in slow path.
dex_cache_array_offset = dex_cache_arrays_layout_.StringOffset(mir->dalvikInsn.vB);
} else {
@@ -1200,11 +1200,13 @@
case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
const MirMethodLoweringInfo& info = mir_graph_->GetMethodLoweringInfo(mir);
InvokeType sharp_type = info.GetSharpType();
- if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) {
+ if (info.IsIntrinsic()) {
+ // Nothing to do, if an intrinsic uses ArtMethod* it's in the slow-path - don't count it.
+ } else if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) {
// Nothing to do, the generated code or entrypoint uses method from the stack.
} else if (info.DirectCode() != 0 && info.DirectMethod() != 0) {
// Nothing to do, the generated code uses method from the stack.
- } else if (pc_rel_temp_ != nullptr) {
+ } else if (CanUseOpPcRelDexCacheArrayLoad()) {
uses_pc_rel_load = true;
dex_cache_array_offset = dex_cache_arrays_layout_.MethodOffset(mir->dalvikInsn.vB);
} else {
@@ -1245,7 +1247,7 @@
? field_info.FastGet()
: field_info.FastPut();
if (fast && (cu_->enable_debug & (1 << kDebugSlowFieldPath)) == 0) {
- if (!field_info.IsReferrersClass() && pc_rel_temp_ != nullptr) {
+ if (!field_info.IsReferrersClass() && CanUseOpPcRelDexCacheArrayLoad()) {
uses_pc_rel_load = true; // And ignore method use in slow path.
dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
} else {
@@ -1264,9 +1266,13 @@
core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count += weight;
}
if (uses_pc_rel_load) {
- core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
- DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max());
- dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset);
+ if (pc_rel_temp_ != nullptr) {
+ core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
+ DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max());
+ dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset);
+ } else {
+ // Nothing to do, using PC-relative addressing without promoting base PC to register.
+ }
}
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 12eb0c7..5fe8adc 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1706,11 +1706,26 @@
Register lhs = InputRegisterAt(condition, 0);
Operand rhs = InputOperandAt(condition, 1);
Condition arm64_cond = ARM64Condition(condition->GetCondition());
- if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
- if (arm64_cond == eq) {
- __ Cbz(lhs, true_target);
- } else {
- __ Cbnz(lhs, true_target);
+ if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+ switch (arm64_cond) {
+ case eq:
+ __ Cbz(lhs, true_target);
+ break;
+ case ne:
+ __ Cbnz(lhs, true_target);
+ break;
+ case lt:
+ // Test the sign bit and branch accordingly.
+ __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ break;
+ case ge:
+ // Test the sign bit and branch accordingly.
+ __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ break;
+ default:
+ // Without the `static_cast` the compiler throws an error for
+ // `-Werror=sign-promo`.
+ LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
}
} else {
__ Cmp(lhs, rhs);
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
new file mode 100644
index 0000000..26cab2f
--- /dev/null
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_utils.h"
+
+#include "base/logging.h"
+
+void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long,
+ int64_t* magic, int* shift) {
+ // It does not make sense to calculate magic and shift for zero divisor.
+ DCHECK_NE(divisor, 0);
+
+ /* According to implementation from H.S.Warren's "Hacker's Delight" (Addison Wesley, 2002)
+ * Chapter 10 and T,Grablund, P.L.Montogomery's "Division by Invariant Integers Using
+ * Multiplication" (PLDI 1994).
+ * The magic number M and shift S can be calculated in the following way:
+ * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
+ * where divisor(d) >= 2.
+ * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
+ * where divisor(d) <= -2.
+ * Thus nc can be calculated like:
+ * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
+ * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
+ *
+ * So the shift p is the smallest p satisfying
+ * 2^p > nc * (d - 2^p % d), where d >= 2
+ * 2^p > nc * (d + 2^p % d), where d <= -2.
+ *
+ * The magic number M is calcuated by
+ * M = (2^p + d - 2^p % d) / d, where d >= 2
+ * M = (2^p - d - 2^p % d) / d, where d <= -2.
+ *
+ * Notice that p is always bigger than or equal to 32 (resp. 64), so we just return 32-p
+ * (resp. 64 - p) as the shift number S.
+ */
+
+ int64_t p = is_long ? 63 : 31;
+ const uint64_t exp = is_long ? (UINT64_C(1) << 63) : (UINT32_C(1) << 31);
+
+ // Initialize the computations.
+ uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
+ uint64_t tmp = exp + (is_long ? static_cast<uint64_t>(divisor) >> 63 :
+ static_cast<uint32_t>(divisor) >> 31);
+ uint64_t abs_nc = tmp - 1 - tmp % abs_d;
+ uint64_t quotient1 = exp / abs_nc;
+ uint64_t remainder1 = exp % abs_nc;
+ uint64_t quotient2 = exp / abs_d;
+ uint64_t remainder2 = exp % abs_d;
+
+ /*
+ * To avoid handling both positive and negative divisor, "Hacker's Delight"
+ * introduces a method to handle these 2 cases together to avoid duplication.
+ */
+ uint64_t delta;
+ do {
+ p++;
+ quotient1 = 2 * quotient1;
+ remainder1 = 2 * remainder1;
+ if (remainder1 >= abs_nc) {
+ quotient1++;
+ remainder1 = remainder1 - abs_nc;
+ }
+ quotient2 = 2 * quotient2;
+ remainder2 = 2 * remainder2;
+ if (remainder2 >= abs_d) {
+ quotient2++;
+ remainder2 = remainder2 - abs_d;
+ }
+ delta = abs_d - remainder2;
+ } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
+
+ *magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
+
+ if (!is_long) {
+ *magic = static_cast<int>(*magic);
+ }
+
+ *shift = is_long ? p - 64 : p - 32;
+}
+
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
new file mode 100644
index 0000000..742d675
--- /dev/null
+++ b/compiler/optimizing/code_generator_utils.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
+
+#include <cstdint>
+
+// Computes the magic number and the shift needed in the div/rem by constant algorithm
+void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift);
+
+#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8fbc64e..845c6c2 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86.h"
+#include "code_generator_utils.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
@@ -2297,6 +2298,134 @@
__ addl(ESP, Immediate(2 * elem_size));
}
+
+void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(1).IsConstant());
+
+ Register out_register = locations->Out().AsRegister<Register>();
+ Register input_register = locations->InAt(0).AsRegister<Register>();
+ int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+ DCHECK(imm == 1 || imm == -1);
+
+ if (instruction->IsRem()) {
+ __ xorl(out_register, out_register);
+ } else {
+ __ movl(out_register, input_register);
+ if (imm == -1) {
+ __ negl(out_register);
+ }
+ }
+}
+
+
+void InstructionCodeGeneratorX86::DivByPowerOfTwo(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv());
+
+ LocationSummary* locations = instruction->GetLocations();
+
+ Register out_register = locations->Out().AsRegister<Register>();
+ Register input_register = locations->InAt(0).AsRegister<Register>();
+ int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+ DCHECK(instruction->IsDiv() && IsPowerOfTwo(std::abs(imm)));
+ Register num = locations->GetTemp(0).AsRegister<Register>();
+
+ __ leal(num, Address(input_register, std::abs(imm) - 1));
+ __ testl(input_register, input_register);
+ __ cmovl(kGreaterEqual, num, input_register);
+ int shift = CTZ(imm);
+ __ sarl(num, Immediate(shift));
+
+ if (imm < 0) {
+ __ negl(num);
+ }
+
+ __ movl(out_register, num);
+}
+
+void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+ Register eax = locations->InAt(0).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+ Register num;
+ Register edx;
+
+ if (instruction->IsDiv()) {
+ edx = locations->GetTemp(0).AsRegister<Register>();
+ num = locations->GetTemp(1).AsRegister<Register>();
+ } else {
+ edx = locations->Out().AsRegister<Register>();
+ num = locations->GetTemp(0).AsRegister<Register>();
+ }
+
+ DCHECK_EQ(EAX, eax);
+ DCHECK_EQ(EDX, edx);
+ if (instruction->IsDiv()) {
+ DCHECK_EQ(EAX, out);
+ } else {
+ DCHECK_EQ(EDX, out);
+ }
+
+ int64_t magic;
+ int shift;
+ CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+ Label ndiv;
+ Label end;
+ // If numerator is 0, the result is 0, no computation needed.
+ __ testl(eax, eax);
+ __ j(kNotEqual, &ndiv);
+
+ __ xorl(out, out);
+ __ jmp(&end);
+
+ __ Bind(&ndiv);
+
+ // Save the numerator.
+ __ movl(num, eax);
+
+ // EAX = magic
+ __ movl(eax, Immediate(magic));
+
+ // EDX:EAX = magic * numerator
+ __ imull(num);
+
+ if (imm > 0 && magic < 0) {
+ // EDX += num
+ __ addl(edx, num);
+ } else if (imm < 0 && magic > 0) {
+ __ subl(edx, num);
+ }
+
+ // Shift if needed.
+ if (shift != 0) {
+ __ sarl(edx, Immediate(shift));
+ }
+
+ // EDX += 1 if EDX < 0
+ __ movl(eax, edx);
+ __ shrl(edx, Immediate(31));
+ __ addl(edx, eax);
+
+ if (instruction->IsRem()) {
+ __ movl(eax, num);
+ __ imull(edx, Immediate(imm));
+ __ subl(eax, edx);
+ __ movl(edx, eax);
+ } else {
+ __ movl(eax, edx);
+ }
+ __ Bind(&end);
+}
+
void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
@@ -2308,28 +2437,42 @@
switch (instruction->GetResultType()) {
case Primitive::kPrimInt: {
- Register second_reg = second.AsRegister<Register>();
DCHECK_EQ(EAX, first.AsRegister<Register>());
DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
- SlowPathCodeX86* slow_path =
+ if (second.IsConstant()) {
+ int imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+ if (imm == 0) {
+ // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+ DivByPowerOfTwo(instruction);
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+ } else {
+ SlowPathCodeX86* slow_path =
new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(),
- is_div);
- codegen_->AddSlowPath(slow_path);
+ is_div);
+ codegen_->AddSlowPath(slow_path);
- // 0x80000000/-1 triggers an arithmetic exception!
- // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
- // it's safe to just use negl instead of more complex comparisons.
+ Register second_reg = second.AsRegister<Register>();
+ // 0x80000000/-1 triggers an arithmetic exception!
+ // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
+ // it's safe to just use negl instead of more complex comparisons.
- __ cmpl(second_reg, Immediate(-1));
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ cmpl(second_reg, Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
- // edx:eax <- sign-extended of eax
- __ cdq();
- // eax = quotient, edx = remainder
- __ idivl(second_reg);
-
- __ Bind(slow_path->GetExitLabel());
+ // edx:eax <- sign-extended of eax
+ __ cdq();
+ // eax = quotient, edx = remainder
+ __ idivl(second_reg);
+ __ Bind(slow_path->GetExitLabel());
+ }
break;
}
@@ -2369,10 +2512,16 @@
switch (div->GetResultType()) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RegisterLocation(EAX));
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
// Intel uses edx:eax as the dividend.
locations->AddTemp(Location::RegisterLocation(EDX));
+ // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+ // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
+ // output and request another temp.
+ if (div->InputAt(1)->IsConstant()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
break;
}
case Primitive::kPrimLong: {
@@ -2430,6 +2579,7 @@
void LocationsBuilderX86::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
+
LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
? LocationSummary::kCall
: LocationSummary::kNoCall;
@@ -2438,8 +2588,14 @@
switch (type) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RegisterLocation(EAX));
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
locations->SetOut(Location::RegisterLocation(EDX));
+ // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+ // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
+ // output and request another temp.
+ if (rem->InputAt(1)->IsConstant()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
break;
}
case Primitive::kPrimLong: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0cc3c65..20f14fb 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -163,6 +163,9 @@
void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
void HandleBitwiseOperation(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivByPowerOfTwo(HBinaryOperation* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateRemFP(HRem *rem);
void HandleShift(HBinaryOperation* instruction);
void GenerateShlLong(const Location& loc, Register shifter);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index b5e038f..c915b0f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86_64.h"
+#include "code_generator_utils.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
@@ -428,7 +429,8 @@
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this),
- isa_features_(isa_features) {
+ isa_features_(isa_features),
+ constant_area_start_(0) {
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -1975,7 +1977,7 @@
case Primitive::kPrimDouble:
case Primitive::kPrimFloat: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2039,12 +2041,30 @@
}
case Primitive::kPrimFloat: {
- __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2072,7 +2092,7 @@
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2110,12 +2130,30 @@
}
case Primitive::kPrimFloat: {
- __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2148,7 +2186,7 @@
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2193,13 +2231,31 @@
case Primitive::kPrimFloat: {
DCHECK(first.Equals(locations->Out()));
- __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
DCHECK(first.Equals(locations->Out()));
- __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2283,6 +2339,228 @@
__ addq(CpuRegister(RSP), Immediate(2 * elem_size));
}
+void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
+ int64_t imm;
+ if (second.GetConstant()->IsLongConstant()) {
+ imm = second.GetConstant()->AsLongConstant()->GetValue();
+ } else {
+ imm = second.GetConstant()->AsIntConstant()->GetValue();
+ }
+
+ DCHECK(imm == 1 || imm == -1);
+
+ switch (instruction->GetResultType()) {
+ case Primitive::kPrimInt: {
+ if (instruction->IsRem()) {
+ __ xorl(output_register, output_register);
+ } else {
+ __ movl(output_register, input_register);
+ if (imm == -1) {
+ __ negl(output_register);
+ }
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ if (instruction->IsRem()) {
+ __ xorq(output_register, output_register);
+ } else {
+ __ movq(output_register, input_register);
+ if (imm == -1) {
+ __ negq(output_register);
+ }
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unreachable";
+ }
+}
+
+void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv());
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+
+ CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
+
+ int64_t imm;
+ if (instruction->GetResultType() == Primitive::kPrimLong) {
+ imm = second.GetConstant()->AsLongConstant()->GetValue();
+ } else {
+ imm = second.GetConstant()->AsIntConstant()->GetValue();
+ }
+
+ DCHECK(IsPowerOfTwo(std::abs(imm)));
+
+ CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ if (instruction->GetResultType() == Primitive::kPrimInt) {
+ __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+ __ testl(numerator, numerator);
+ __ cmov(kGreaterEqual, tmp, numerator);
+ int shift = CTZ(imm);
+ __ sarl(tmp, Immediate(shift));
+
+ if (imm < 0) {
+ __ negl(tmp);
+ }
+
+ __ movl(output_register, tmp);
+ } else {
+ DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+ CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ __ movq(rdx, Immediate(std::abs(imm) - 1));
+ __ addq(rdx, numerator);
+ __ testq(numerator, numerator);
+ __ cmov(kGreaterEqual, rdx, numerator);
+ int shift = CTZ(imm);
+ __ sarq(rdx, Immediate(shift));
+
+ if (imm < 0) {
+ __ negq(rdx);
+ }
+
+ __ movq(output_register, rdx);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+
+ CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
+ : locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
+ : locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ DCHECK_EQ(RAX, eax.AsRegister());
+ DCHECK_EQ(RDX, edx.AsRegister());
+ if (instruction->IsDiv()) {
+ DCHECK_EQ(RAX, out.AsRegister());
+ } else {
+ DCHECK_EQ(RDX, out.AsRegister());
+ }
+
+ int64_t magic;
+ int shift;
+
+ // TODO: can these branch be written as one?
+ if (instruction->GetResultType() == Primitive::kPrimInt) {
+ int imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+ CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+ __ movl(numerator, eax);
+
+ Label no_div;
+ Label end;
+ __ testl(eax, eax);
+ __ j(kNotEqual, &no_div);
+
+ __ xorl(out, out);
+ __ jmp(&end);
+
+ __ Bind(&no_div);
+
+ __ movl(eax, Immediate(magic));
+ __ imull(numerator);
+
+ if (imm > 0 && magic < 0) {
+ __ addl(edx, numerator);
+ } else if (imm < 0 && magic > 0) {
+ __ subl(edx, numerator);
+ }
+
+ if (shift != 0) {
+ __ sarl(edx, Immediate(shift));
+ }
+
+ __ movl(eax, edx);
+ __ shrl(edx, Immediate(31));
+ __ addl(edx, eax);
+
+ if (instruction->IsRem()) {
+ __ movl(eax, numerator);
+ __ imull(edx, Immediate(imm));
+ __ subl(eax, edx);
+ __ movl(edx, eax);
+ } else {
+ __ movl(eax, edx);
+ }
+ __ Bind(&end);
+ } else {
+ int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
+
+ DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+
+ CpuRegister rax = eax;
+ CpuRegister rdx = edx;
+
+ CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
+
+ // Save the numerator.
+ __ movq(numerator, rax);
+
+ // RAX = magic
+ __ movq(rax, Immediate(magic));
+
+ // RDX:RAX = magic * numerator
+ __ imulq(numerator);
+
+ if (imm > 0 && magic < 0) {
+ // RDX += numeratorerator
+ __ addq(rdx, numerator);
+ } else if (imm < 0 && magic > 0) {
+ // RDX -= numerator
+ __ subq(rdx, numerator);
+ }
+
+ // Shift if needed.
+ if (shift != 0) {
+ __ sarq(rdx, Immediate(shift));
+ }
+
+ // RDX += 1 if RDX < 0
+ __ movq(rax, rdx);
+ __ shrq(rdx, Immediate(63));
+ __ addq(rdx, rax);
+
+ if (instruction->IsRem()) {
+ __ movq(rax, numerator);
+
+ if (IsInt<32>(imm)) {
+ __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
+ } else {
+ __ movq(numerator, Immediate(imm));
+ __ imulq(rdx, numerator);
+ }
+
+ __ subq(rax, rdx);
+ __ movq(rdx, rax);
+ } else {
+ __ movq(rax, rdx);
+ }
+ }
+}
+
void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
Primitive::Type type = instruction->GetResultType();
@@ -2291,37 +2569,57 @@
bool is_div = instruction->IsDiv();
LocationSummary* locations = instruction->GetLocations();
- CpuRegister out_reg = locations->Out().AsRegister<CpuRegister>();
- CpuRegister second_reg = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ Location second = locations->InAt(1);
DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
- DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister());
+ DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
- SlowPathCodeX86_64* slow_path =
- new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
- out_reg.AsRegister(), type, is_div);
- codegen_->AddSlowPath(slow_path);
+ if (second.IsConstant()) {
+ int64_t imm;
+ if (second.GetConstant()->AsLongConstant()) {
+ imm = second.GetConstant()->AsLongConstant()->GetValue();
+ } else {
+ imm = second.GetConstant()->AsIntConstant()->GetValue();
+ }
- // 0x80000000(00000000)/-1 triggers an arithmetic exception!
- // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
- // so it's safe to just use negl instead of more complex comparisons.
- if (type == Primitive::kPrimInt) {
- __ cmpl(second_reg, Immediate(-1));
- __ j(kEqual, slow_path->GetEntryLabel());
- // edx:eax <- sign-extended of eax
- __ cdq();
- // eax = quotient, edx = remainder
- __ idivl(second_reg);
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+ DivByPowerOfTwo(instruction);
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
} else {
- __ cmpq(second_reg, Immediate(-1));
- __ j(kEqual, slow_path->GetEntryLabel());
- // rdx:rax <- sign-extended of rax
- __ cqo();
- // rax = quotient, rdx = remainder
- __ idivq(second_reg);
- }
+ SlowPathCodeX86_64* slow_path =
+ new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
+ out.AsRegister(), type, is_div);
+ codegen_->AddSlowPath(slow_path);
- __ Bind(slow_path->GetExitLabel());
+ CpuRegister second_reg = second.AsRegister<CpuRegister>();
+ // 0x80000000(00000000)/-1 triggers an arithmetic exception!
+ // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
+ // so it's safe to just use negl instead of more complex comparisons.
+ if (type == Primitive::kPrimInt) {
+ __ cmpl(second_reg, Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
+ // edx:eax <- sign-extended of eax
+ __ cdq();
+ // eax = quotient, edx = remainder
+ __ idivl(second_reg);
+ } else {
+ __ cmpq(second_reg, Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
+ // rdx:rax <- sign-extended of rax
+ __ cqo();
+ // rax = quotient, rdx = remainder
+ __ idivq(second_reg);
+ }
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
@@ -2331,17 +2629,23 @@
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RegisterLocation(RAX));
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
// Intel uses edx:eax as the dividend.
locations->AddTemp(Location::RegisterLocation(RDX));
+ // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
+ // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
+ // output and request another temp.
+ if (div->InputAt(1)->IsConstant()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
break;
}
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2366,12 +2670,30 @@
}
case Primitive::kPrimFloat: {
- __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2389,9 +2711,15 @@
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RegisterLocation(RAX));
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
// Intel uses rdx:rax as the dividend and puts the remainder in rdx
locations->SetOut(Location::RegisterLocation(RDX));
+ // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+ // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
+ // output and request another temp.
+ if (rem->InputAt(1)->IsConstant()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
break;
}
@@ -3904,5 +4232,65 @@
LOG(FATAL) << "Unreachable";
}
+void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+ // Generate the constant area if needed.
+ if (!__ IsConstantAreaEmpty()) {
+ // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+ // byte values. If used for vectors at a later time, this will need to be
+ // updated to 16 bytes with the appropriate offset.
+ __ Align(4, 0);
+ constant_area_start_ = __ CodeSize();
+ __ AddConstantArea();
+ }
+
+ // And finish up.
+ CodeGenerator::Finalize(allocator);
+}
+
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+ public:
+ RIPFixup(CodeGeneratorX86_64& codegen, int offset)
+ : codegen_(codegen), offset_into_constant_area_(offset) {}
+
+ private:
+ void Process(const MemoryRegion& region, int pos) OVERRIDE {
+ // Patch the correct offset for the instruction. We use the address of the
+ // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+ int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
+ int relative_position = constant_offset - pos;
+
+ // Patch in the right value.
+ region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+ }
+
+ CodeGeneratorX86_64& codegen_;
+
+ // Location in constant area that the fixup refers to.
+ int offset_into_constant_area_;
+};
+
+Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
+ return Address::RIP(fixup);
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 375c0b0..c819eec 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -173,6 +173,9 @@
void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
void HandleBitwiseOperation(HBinaryOperation* operation);
void GenerateRemFP(HRem *rem);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivByPowerOfTwo(HBinaryOperation* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleShift(HBinaryOperation* operation);
void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -243,6 +246,7 @@
Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void Finalize(CodeAllocator* allocator) OVERRIDE;
InstructionSet GetInstructionSet() const OVERRIDE {
return InstructionSet::kX86_64;
@@ -274,6 +278,15 @@
return isa_features_;
}
+ int ConstantAreaStart() const {
+ return constant_area_start_;
+ }
+
+ Address LiteralDoubleAddress(double v);
+ Address LiteralFloatAddress(float v);
+ Address LiteralInt32Address(int32_t v);
+ Address LiteralInt64Address(int64_t v);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -284,6 +297,10 @@
X86_64Assembler assembler_;
const X86_64InstructionSetFeatures& isa_features_;
+ // Offset to start of the constant area in the assembled code.
+ // Used for fixups to the constant area.
+ int constant_area_start_;
+
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
};
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index b6e4510..aec2d19 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -320,6 +320,27 @@
GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
}
+void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
+ CreateLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location input = locations->InAt(0);
+ Register input_lo = input.AsRegisterPairLow<Register>();
+ Register input_hi = input.AsRegisterPairHigh<Register>();
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+
+ X86Assembler* assembler = GetAssembler();
+ // Assign the inputs to the outputs, mixing low/high.
+ __ movl(output_lo, input_hi);
+ __ movl(output_hi, input_lo);
+ __ bswapl(output_lo);
+ __ bswapl(output_hi);
+}
+
void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
CreateIntToIntLocations(arena_, invoke);
}
@@ -1330,6 +1351,181 @@
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
}
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Offset is a long, but in 32 bit mode, we only need the low word.
+ // Can we update the invoke here to remove a TypeConvert to Long?
+ locations->SetInAt(2, Location::RequiresRegister());
+ // Expected value must be in EAX or EDX:EAX.
+ // For long, new value must be in ECX:EBX.
+ if (type == Primitive::kPrimLong) {
+ locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
+ locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
+ } else {
+ locations->SetInAt(3, Location::RegisterLocation(EAX));
+ locations->SetInAt(4, Location::RequiresRegister());
+ }
+
+ // Force a byte register for the output.
+ locations->SetOut(Location::RegisterLocation(EAX));
+ if (type == Primitive::kPrimNot) {
+ // Need temp registers for card-marking.
+ locations->AddTemp(Location::RequiresRegister());
+ // Need a byte register for marking.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
+ X86Assembler* assembler =
+ reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register base = locations->InAt(1).AsRegister<Register>();
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
+ Location out = locations->Out();
+ DCHECK_EQ(out.AsRegister<Register>(), EAX);
+
+ if (type == Primitive::kPrimLong) {
+ DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
+ DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
+ DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
+ DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
+ __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+ } else {
+ // Integer or object.
+ DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
+ Register value = locations->InAt(4).AsRegister<Register>();
+ if (type == Primitive::kPrimNot) {
+ // Mark card for object assuming new value is stored.
+ codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
+ locations->GetTemp(1).AsRegister<Register>(),
+ base,
+ value);
+ }
+
+ __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+ }
+
+ // locked cmpxchg has full barrier semantics, and we don't need scheduling
+ // barriers at this time.
+
+ // Convert ZF into the boolean result.
+ __ setb(kZero, out.AsRegister<Register>());
+ __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
+ X86Assembler* assembler) {
+ Immediate imm_shift(shift);
+ Immediate imm_mask(mask);
+ __ movl(temp, reg);
+ __ shrl(reg, imm_shift);
+ __ andl(temp, imm_mask);
+ __ andl(reg, imm_mask);
+ __ shll(temp, imm_shift);
+ __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
+ X86Assembler* assembler =
+ reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register reg = locations->InAt(0).AsRegister<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+ /*
+ * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ * swapping bits to reverse bits in a number x. Using bswap to save instructions
+ * compared to generic luni implementation which has 5 rounds of swapping bits.
+ * x = bswap x
+ * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+ * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+ * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+ */
+ __ bswapl(reg);
+ SwapBits(reg, temp, 1, 0x55555555, assembler);
+ SwapBits(reg, temp, 2, 0x33333333, assembler);
+ SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
+ X86Assembler* assembler =
+ reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+ // We want to swap high/low, then bswap each one, and then do the same
+ // as a 32 bit reverse.
+ // Exchange high and low.
+ __ movl(temp, reg_low);
+ __ movl(reg_low, reg_high);
+ __ movl(reg_high, temp);
+
+ // bit-reverse low
+ __ bswapl(reg_low);
+ SwapBits(reg_low, temp, 1, 0x55555555, assembler);
+ SwapBits(reg_low, temp, 2, 0x33333333, assembler);
+ SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
+
+ // bit-reverse high
+ __ bswapl(reg_high);
+ SwapBits(reg_high, temp, 1, 0x55555555, assembler);
+ SwapBits(reg_high, temp, 2, 0x33333333, assembler);
+ SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
+}
+
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -1338,16 +1534,10 @@
void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(StringIndexOf)
UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
} // namespace x86
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index f6fa013..c0c4ff3 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -298,25 +298,23 @@
// TODO: Allow x86 to work with memory. This requires assembler support, see below.
// locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
- locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
+ locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
}
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations, bool is64bit,
+ X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) {
Location output = locations->Out();
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
if (output.IsFpuRegister()) {
// In-register
- XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ // TODO: Can mask directly with constant area if we align on 16 bytes.
if (is64bit) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
__ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
__ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
}
} else {
@@ -341,7 +339,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -349,7 +347,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
}
static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -400,7 +398,7 @@
}
static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
- X86_64Assembler* assembler) {
+ X86_64Assembler* assembler, CodeGeneratorX86_64* codegen) {
Location op1_loc = locations->InAt(0);
Location op2_loc = locations->InAt(1);
Location out_loc = locations->Out();
@@ -427,7 +425,7 @@
//
// This removes one jmp, but needs to copy one input (op1) to out.
//
- // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+ // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
@@ -461,14 +459,11 @@
// NaN handling.
__ Bind(&nan);
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
if (is_double) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+ __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+ __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
}
- __ movd(out, cpu_temp, is_double);
__ jmp(&done);
// out := op2;
@@ -483,7 +478,7 @@
__ Bind(&done);
}
-static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
kIntrinsified);
@@ -492,39 +487,38 @@
// The following is sub-optimal, but all we can do for now. It would be fine to also accept
// the second input to be the output (we can simply swap inputs).
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
}
static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -1202,6 +1196,175 @@
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
}
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ // expected value must be in EAX/RAX.
+ locations->SetInAt(3, Location::RegisterLocation(RAX));
+ locations->SetInAt(4, Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister());
+ if (type == Primitive::kPrimNot) {
+ // Need temp registers for card-marking.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ X86_64Assembler* assembler =
+ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+ CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
+ DCHECK_EQ(expected.AsRegister(), RAX);
+ CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ if (type == Primitive::kPrimLong) {
+ __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
+ } else {
+ // Integer or object.
+ if (type == Primitive::kPrimNot) {
+ // Mark card for object assuming new value is stored.
+ codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+ locations->GetTemp(1).AsRegister<CpuRegister>(),
+ base,
+ value);
+ }
+
+ __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+ }
+
+ // locked cmpxchg has full barrier semantics, and we don't need scheduling
+ // barriers at this time.
+
+ // Convert ZF into the boolean result.
+ __ setcc(kZero, out);
+ __ movzxb(out, out);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
+ X86_64Assembler* assembler) {
+ Immediate imm_shift(shift);
+ Immediate imm_mask(mask);
+ __ movl(temp, reg);
+ __ shrl(reg, imm_shift);
+ __ andl(temp, imm_mask);
+ __ andl(reg, imm_mask);
+ __ shll(temp, imm_shift);
+ __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
+ X86_64Assembler* assembler =
+ reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ /*
+ * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ * swapping bits to reverse bits in a number x. Using bswap to save instructions
+ * compared to generic luni implementation which has 5 rounds of swapping bits.
+ * x = bswap x
+ * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+ * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+ * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+ */
+ __ bswapl(reg);
+ SwapBits(reg, temp, 1, 0x55555555, assembler);
+ SwapBits(reg, temp, 2, 0x33333333, assembler);
+ SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
+ int32_t shift, int64_t mask, X86_64Assembler* assembler) {
+ Immediate imm_shift(shift);
+ __ movq(temp_mask, Immediate(mask));
+ __ movq(temp, reg);
+ __ shrq(reg, imm_shift);
+ __ andq(temp, temp_mask);
+ __ andq(reg, temp_mask);
+ __ shlq(temp, imm_shift);
+ __ orq(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
+ X86_64Assembler* assembler =
+ reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+ /*
+ * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ * swapping bits to reverse bits in a long number x. Using bswap to save instructions
+ * compared to generic luni implementation which has 5 rounds of swapping bits.
+ * x = bswap x
+ * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
+ * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
+ * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
+ */
+ __ bswapq(reg);
+ SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
+ SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
+ SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
+}
+
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -1210,14 +1373,9 @@
void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
UNIMPLEMENTED_INTRINSIC(StringIndexOf)
UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
} // namespace x86_64
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 6286b10..3b42f63 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -373,24 +373,34 @@
void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) {
+ ldrd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::ldrd(Register rd, Register rd2, const Address& ad, Condition cond) {
CheckCondition(cond);
- CHECK_EQ(rd % 2, 0);
+ // Encoding T1.
// This is different from other loads. The encoding is like ARM.
int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 |
static_cast<int32_t>(rd) << 12 |
- (static_cast<int32_t>(rd) + 1) << 8 |
+ static_cast<int32_t>(rd2) << 8 |
ad.encodingThumbLdrdStrd();
Emit32(encoding);
}
void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) {
+ strd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::strd(Register rd, Register rd2, const Address& ad, Condition cond) {
CheckCondition(cond);
- CHECK_EQ(rd % 2, 0);
+ // Encoding T1.
// This is different from other loads. The encoding is like ARM.
int32_t encoding = B31 | B30 | B29 | B27 | B22 |
static_cast<int32_t>(rd) << 12 |
- (static_cast<int32_t>(rd) + 1) << 8 |
+ static_cast<int32_t>(rd2) << 8 |
ad.encodingThumbLdrdStrd();
Emit32(encoding);
}
@@ -2613,14 +2623,16 @@
Register tmp_reg = kNoRegister;
if (!Address::CanHoldStoreOffsetThumb(type, offset)) {
CHECK_NE(base, IP);
- if (reg != IP) {
+ if (reg != IP &&
+ (type != kStoreWordPair || reg + 1 != IP)) {
tmp_reg = IP;
} else {
- // Be careful not to use IP twice (for `reg` and to build the
- // Address object used by the store instruction(s) below).
- // Instead, save R5 on the stack (or R6 if R5 is not available),
- // use it as secondary temporary register, and restore it after
- // the store instruction has been emitted.
+ // Be careful not to use IP twice (for `reg` (or `reg` + 1 in
+ // the case of a word-pair store)) and to build the Address
+ // object used by the store instruction(s) below). Instead,
+ // save R5 on the stack (or R6 if R5 is not available), use it
+ // as secondary temporary register, and restore it after the
+ // store instruction has been emitted.
tmp_reg = base != R5 ? R5 : R6;
Push(tmp_reg);
if (base == SP) {
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 81dd138..e33c240 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -135,9 +135,17 @@
void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+ // Load/store register dual instructions using registers `rd` and `rd` + 1.
void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+ // Load/store register dual instructions using registers `rd` and `rd2`.
+ // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding
+ // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1.
+ void ldrd(Register rd, Register rd2, const Address& ad, Condition cond);
+ void strd(Register rd, Register rd2, const Address& ad, Condition cond);
+
+
void ldm(BlockAddressMode am, Register base,
RegList regs, Condition cond = AL) OVERRIDE;
void stm(BlockAddressMode am, Register base,
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 62e0b90..5f5561a 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -291,4 +291,59 @@
DriverStr(expected, "StoreWordToNonThumbOffset");
}
+TEST_F(AssemblerThumb2Test, StoreWordPairToThumbOffset) {
+ arm::StoreOperandType type = arm::kStoreWordPair;
+ int32_t offset = 1020;
+ ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+ __ StoreToOffset(type, arm::R0, arm::SP, offset);
+ // We cannot use IP (i.e. R12) as first source register, as it would
+ // force us to use SP (i.e. R13) as second source register, which
+ // would have an "unpredictable" effect according to the ARMv7
+ // specification (the T1 encoding describes the result as
+ // UNPREDICTABLE when of the source registers is R13).
+ //
+ // So we use (R11, IP) (e.g. (R11, R12)) as source registers in the
+ // following instructions.
+ __ StoreToOffset(type, arm::R11, arm::SP, offset);
+ __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+ const char* expected =
+ "strd r0, r1, [sp, #1020]\n"
+ "strd r11, ip, [sp, #1020]\n"
+ "strd r11, ip, [r5, #1020]\n";
+ DriverStr(expected, "StoreWordPairToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) {
+ arm::StoreOperandType type = arm::kStoreWordPair;
+ int32_t offset = 1024;
+ ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+ __ StoreToOffset(type, arm::R0, arm::SP, offset);
+ // Same comment as in AssemblerThumb2Test.StoreWordPairToThumbOffset
+ // regarding the use of (R11, IP) (e.g. (R11, R12)) as source
+ // registers in the following instructions.
+ __ StoreToOffset(type, arm::R11, arm::SP, offset);
+ __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+ const char* expected =
+ "mov ip, #1024\n" // LoadImmediate(ip, 1024)
+ "add ip, ip, sp\n"
+ "strd r0, r1, [ip, #0]\n"
+
+ "str r5, [sp, #-4]!\n" // Push(r5)
+ "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize)
+ "add r5, r5, sp\n"
+ "strd r11, ip, [r5, #0]\n"
+ "ldr r5, [sp], #4\n" // Pop(r5)
+
+ "str r6, [sp, #-4]!\n" // Push(r6)
+ "mov r6, #1024\n" // LoadImmediate(r6, 1024)
+ "add r6, r6, r5\n"
+ "strd r11, ip, [r6, #0]\n"
+ "ldr r6, [sp], #4\n"; // Pop(r6)
+ DriverStr(expected, "StoreWordPairToNonThumbOffset");
+}
+
} // namespace art
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index b13edb6..3fe1a31 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -44,7 +44,9 @@
enum class RegisterView { // private
kUsePrimaryName,
- kUseSecondaryName
+ kUseSecondaryName,
+ kUseTertiaryName,
+ kUseQuaternaryName,
};
template<typename Ass, typename Reg, typename FPReg, typename Imm>
@@ -97,6 +99,15 @@
fmt);
}
+ std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) {
+ return RepeatTemplatedRegisters<Reg, Reg>(f,
+ GetRegisters(),
+ GetRegisters(),
+ &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+ &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>,
+ fmt);
+ }
+
std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) {
return RepeatTemplatedRegisters<Reg, Reg>(f,
GetRegisters(),
@@ -240,6 +251,18 @@
UNREACHABLE();
}
+ // Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems.
+ virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers";
+ UNREACHABLE();
+ }
+
+ // Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems.
+ virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers";
+ UNREACHABLE();
+ }
+
std::string GetRegisterName(const Reg& reg) {
return GetRegName<RegisterView::kUsePrimaryName>(reg);
}
@@ -520,6 +543,14 @@
case RegisterView::kUseSecondaryName:
sreg << GetSecondaryRegisterName(reg);
break;
+
+ case RegisterView::kUseTertiaryName:
+ sreg << GetTertiaryRegisterName(reg);
+ break;
+
+ case RegisterView::kUseQuaternaryName:
+ sreg << GetQuaternaryRegisterName(reg);
+ break;
}
return sreg.str();
}
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 8ce9375..51cc7ac 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1466,6 +1466,15 @@
EmitOperand(reg, address);
}
+
+void X86Assembler::cmpxchg8b(const Address& address) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0xC7);
+ EmitOperand(1, address);
+}
+
+
void X86Assembler::mfence() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 046df02..f3675ae 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -457,6 +457,7 @@
X86Assembler* lock();
void cmpxchgl(const Address& address, Register reg);
+ void cmpxchg8b(const Address& address);
void mfence();
@@ -476,6 +477,10 @@
lock()->cmpxchgl(address, reg);
}
+ void LockCmpxchg8b(const Address& address) {
+ lock()->cmpxchg8b(address);
+ }
+
//
// Misc. functionality
//
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index fccb510..dba3b6b 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -127,4 +127,49 @@
DriverStr(expected, "LoadLongConstant");
}
+TEST_F(AssemblerX86Test, LockCmpxchgl) {
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
+ x86::Register(x86::ESI));
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+ x86::Register(x86::ESI));
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+ x86::Register(x86::EDI));
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EBP), 0), x86::Register(x86::ESI));
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0),
+ x86::Register(x86::ESI));
+ const char* expected =
+ "lock cmpxchgl %ESI, 0xc(%EDI,%EBX,4)\n"
+ "lock cmpxchgl %ESI, 0xc(%EDI,%ESI,4)\n"
+ "lock cmpxchgl %EDI, 0xc(%EDI,%ESI,4)\n"
+ "lock cmpxchgl %ESI, (%EBP)\n"
+ "lock cmpxchgl %ESI, (%EBP,%ESI,1)\n";
+
+ DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86Test, LockCmpxchg8b) {
+ GetAssembler()->LockCmpxchg8b(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+ GetAssembler()->LockCmpxchg8b(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+ GetAssembler()->LockCmpxchg8b(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+ GetAssembler()->LockCmpxchg8b(x86::Address(x86::Register(x86::EBP), 0));
+ GetAssembler()->LockCmpxchg8b(x86::Address(
+ x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0));
+ const char* expected =
+ "lock cmpxchg8b 0xc(%EDI,%EBX,4)\n"
+ "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+ "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+ "lock cmpxchg8b (%EBP)\n"
+ "lock cmpxchg8b (%EBP,%ESI,1)\n";
+
+ DriverStr(expected, "lock_cmpxchg8b");
+}
+
} // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 780c1b4..cb6d400 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -209,7 +209,9 @@
void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitOptionalByteRegNormalizingRex32(dst, src);
+ // Byte register is only in the source register form, so we don't use
+ // EmitOptionalByteRegNormalizingRex32(dst, src);
+ EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xB6);
EmitOperand(dst.LowBits(), src);
@@ -227,7 +229,9 @@
void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitOptionalByteRegNormalizingRex32(dst, src);
+ // Byte register is only in the source register form, so we don't use
+ // EmitOptionalByteRegNormalizingRex32(dst, src);
+ EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xBE);
EmitOperand(dst.LowBits(), src);
@@ -1620,6 +1624,14 @@
}
+void X86_64Assembler::imulq(CpuRegister reg) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitRex64(reg);
+ EmitUint8(0xF7);
+ EmitOperand(5, Operand(reg));
+}
+
+
void X86_64Assembler::imull(const Address& address) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(address);
@@ -1853,11 +1865,22 @@
void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(reg, address);
EmitUint8(0x0F);
EmitUint8(0xB1);
EmitOperand(reg.LowBits(), address);
}
+
+void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitRex64(reg, address);
+ EmitUint8(0x0F);
+ EmitUint8(0xB1);
+ EmitOperand(reg.LowBits(), address);
+}
+
+
void X86_64Assembler::mfence() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
@@ -1956,6 +1979,10 @@
for (int i = 1; i < length; i++) {
EmitUint8(operand.encoding_[i]);
}
+ AssemblerFixup* fixup = operand.GetFixup();
+ if (fixup != nullptr) {
+ EmitFixup(fixup);
+ }
}
@@ -2154,11 +2181,18 @@
}
void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
- EmitOptionalRex(true, false, dst.NeedsRex(), false, src.NeedsRex());
+ // For src, SPL, BPL, SIL, DIL need the rex prefix.
+ bool force = src.AsRegister() > 3;
+ EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
}
void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
- uint8_t rex = 0x40 | operand.rex(); // REX.0000
+ uint8_t rex = operand.rex();
+ // For dst, SPL, BPL, SIL, DIL need the rex prefix.
+ bool force = dst.AsRegister() > 3;
+ if (force) {
+ rex |= 0x40; // REX.0000
+ }
if (dst.NeedsRex()) {
rex |= 0x44; // REX.0R00
}
@@ -2706,5 +2740,55 @@
#undef __
}
+void X86_64Assembler::AddConstantArea() {
+ const std::vector<int32_t>& area = constant_area_.GetBuffer();
+ for (size_t i = 0, u = area.size(); i < u; i++) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitInt32(area[i]);
+ }
+}
+
+int ConstantArea::AddInt32(int32_t v) {
+ for (size_t i = 0, u = buffer_.size(); i < u; i++) {
+ if (v == buffer_[i]) {
+ return i * elem_size_;
+ }
+ }
+
+ // Didn't match anything.
+ int result = buffer_.size() * elem_size_;
+ buffer_.push_back(v);
+ return result;
+}
+
+int ConstantArea::AddInt64(int64_t v) {
+ int32_t v_low = v;
+ int32_t v_high = v >> 32;
+ if (buffer_.size() > 1) {
+ // Ensure we don't pass the end of the buffer.
+ for (size_t i = 0, u = buffer_.size() - 1; i < u; i++) {
+ if (v_low == buffer_[i] && v_high == buffer_[i+1]) {
+ return i * elem_size_;
+ }
+ }
+ }
+
+ // Didn't match anything.
+ int result = buffer_.size() * elem_size_;
+ buffer_.push_back(v_low);
+ buffer_.push_back(v_high);
+ return result;
+}
+
+int ConstantArea::AddDouble(double v) {
+ // Treat the value as a 64-bit integer value.
+ return AddInt64(bit_cast<int64_t, double>(v));
+}
+
+int ConstantArea::AddFloat(float v) {
+ // Treat the value as a 32-bit integer value.
+ return AddInt32(bit_cast<int32_t, float>(v));
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index bcc8e62..ef6205c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -97,9 +97,13 @@
&& (reg.NeedsRex() == ((rex_ & 1) != 0)); // REX.000B bits match.
}
+ AssemblerFixup* GetFixup() const {
+ return fixup_;
+ }
+
protected:
// Operand can be sub classed (e.g: Address).
- Operand() : rex_(0), length_(0) { }
+ Operand() : rex_(0), length_(0), fixup_(nullptr) { }
void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
CHECK_EQ(mod_in & ~3, 0);
@@ -136,12 +140,17 @@
length_ += disp_size;
}
+ void SetFixup(AssemblerFixup* fixup) {
+ fixup_ = fixup;
+ }
+
private:
uint8_t rex_;
uint8_t length_;
uint8_t encoding_[6];
+ AssemblerFixup* fixup_;
- explicit Operand(CpuRegister reg) : rex_(0), length_(0) { SetModRM(3, reg); }
+ explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
// Get the operand encoding byte at the given index.
uint8_t encoding_at(int index_in) const {
@@ -232,6 +241,15 @@
return result;
}
+ // An RIP relative address that will be fixed up later.
+ static Address RIP(AssemblerFixup* fixup) {
+ Address result;
+ result.SetModRM(0, CpuRegister(RBP));
+ result.SetDisp32(0);
+ result.SetFixup(fixup);
+ return result;
+ }
+
// If no_rip is true then the Absolute address isn't RIP relative.
static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) {
return Absolute(addr.Int32Value(), no_rip);
@@ -242,6 +260,55 @@
};
+/**
+ * Class to handle constant area values.
+ */
+class ConstantArea {
+ public:
+ ConstantArea() {}
+
+ /**
+ * Add a double to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddDouble(double v);
+
+ /**
+ * Add a float to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddFloat(float v);
+
+ /**
+ * Add an int32_t to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddInt32(int32_t v);
+
+ /**
+ * Add an int64_t to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddInt64(int64_t v);
+
+ int GetSize() const {
+ return buffer_.size() * elem_size_;
+ }
+
+ const std::vector<int32_t>& GetBuffer() const {
+ return buffer_;
+ }
+
+ private:
+ static constexpr size_t elem_size_ = sizeof(int32_t);
+ std::vector<int32_t> buffer_;
+};
+
+
class X86_64Assembler FINAL : public Assembler {
public:
X86_64Assembler() {}
@@ -468,6 +535,7 @@
void imull(CpuRegister reg, const Immediate& imm);
void imull(CpuRegister reg, const Address& address);
+ void imulq(CpuRegister src);
void imulq(CpuRegister dst, CpuRegister src);
void imulq(CpuRegister reg, const Immediate& imm);
void imulq(CpuRegister reg, const Address& address);
@@ -517,6 +585,7 @@
X86_64Assembler* lock();
void cmpxchgl(const Address& address, CpuRegister reg);
+ void cmpxchgq(const Address& address, CpuRegister reg);
void mfence();
@@ -539,6 +608,10 @@
lock()->cmpxchgl(address, reg);
}
+ void LockCmpxchgq(const Address& address, CpuRegister reg) {
+ lock()->cmpxchgq(address, reg);
+ }
+
//
// Misc. functionality
//
@@ -663,6 +736,45 @@
// and branch to a ExceptionSlowPath if it is.
void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
+ /**
+ * Add a double to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddDouble(double v) { return constant_area_.AddDouble(v); }
+
+ /**
+ * Add a float to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddFloat(float v) { return constant_area_.AddFloat(v); }
+
+ /**
+ * Add an int32_t to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+
+ /**
+ * Add an int64_t to the constant area.
+ * @param v literal to be added to the constant area.
+ * @returns the offset in the constant area where the literal resides.
+ */
+ int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+
+ /**
+ * Add the contents of the constant area to the assembler buffer.
+ */
+ void AddConstantArea();
+
+ /**
+ * Is the constant area empty?
+ * @returns 'true' if there are no literals in the constant area.
+ */
+ bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
+
private:
void EmitUint8(uint8_t value);
void EmitInt32(int32_t value);
@@ -708,6 +820,8 @@
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
+ ConstantArea constant_area_;
+
DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
};
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 4402dfc..116190a 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -174,6 +174,40 @@
secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14d");
secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15d");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "ax");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bx");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cx");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dx");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bp");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "sp");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "si");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "di");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15w");
+
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "al");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bpl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "spl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "sil");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "dil");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b");
+
fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0));
fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1));
fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2));
@@ -216,9 +250,21 @@
return secondary_register_names_[reg];
}
+ std::string GetTertiaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE {
+ CHECK(tertiary_register_names_.find(reg) != tertiary_register_names_.end());
+ return tertiary_register_names_[reg];
+ }
+
+ std::string GetQuaternaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE {
+ CHECK(quaternary_register_names_.find(reg) != quaternary_register_names_.end());
+ return quaternary_register_names_[reg];
+ }
+
private:
std::vector<x86_64::CpuRegister*> registers_;
std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_;
+ std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_;
+ std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_;
std::vector<x86_64::XmmRegister*> fp_registers_;
};
@@ -269,6 +315,10 @@
DriverStr(Repeatri(&x86_64::X86_64Assembler::addl, 4U, "add ${imm}, %{reg}"), "addli");
}
+TEST_F(AssemblerX86_64Test, ImulqReg1) {
+ DriverStr(RepeatR(&x86_64::X86_64Assembler::imulq, "imulq %{reg}"), "imulq");
+}
+
TEST_F(AssemblerX86_64Test, ImulqRegs) {
DriverStr(RepeatRR(&x86_64::X86_64Assembler::imulq, "imulq %{reg2}, %{reg1}"), "imulq");
}
@@ -539,6 +589,56 @@
// DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl");
}
+TEST_F(AssemblerX86_64Test, LockCmpxchgl) {
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::R8));
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+ x86_64::CpuRegister(x86_64::RSI));
+ const char* expected =
+ "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n"
+ "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n"
+ "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n"
+ "lock cmpxchgl %ESI, (%R13)\n"
+ "lock cmpxchgl %ESI, (%R13,%R9,1)\n";
+
+ DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86_64Test, LockCmpxchgq) {
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::R8));
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+ x86_64::CpuRegister(x86_64::RSI));
+ const char* expected =
+ "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n"
+ "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n"
+ "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n"
+ "lock cmpxchg %RSI, (%R13)\n"
+ "lock cmpxchg %RSI, (%R13,%R9,1)\n";
+
+ DriverStr(expected, "lock_cmpxchg");
+}
+
TEST_F(AssemblerX86_64Test, Movl) {
GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
@@ -824,31 +924,12 @@
"l", "ge", "le" };
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
- std::string byte_regs[16];
- byte_regs[x86_64::RAX] = "al";
- byte_regs[x86_64::RBX] = "bl";
- byte_regs[x86_64::RCX] = "cl";
- byte_regs[x86_64::RDX] = "dl";
- byte_regs[x86_64::RBP] = "bpl";
- byte_regs[x86_64::RSP] = "spl";
- byte_regs[x86_64::RSI] = "sil";
- byte_regs[x86_64::RDI] = "dil";
- byte_regs[x86_64::R8] = "r8b";
- byte_regs[x86_64::R9] = "r9b";
- byte_regs[x86_64::R10] = "r10b";
- byte_regs[x86_64::R11] = "r11b";
- byte_regs[x86_64::R12] = "r12b";
- byte_regs[x86_64::R13] = "r13b";
- byte_regs[x86_64::R14] = "r14b";
- byte_regs[x86_64::R15] = "r15b";
-
std::ostringstream str;
for (auto reg : registers) {
for (size_t i = 0; i < 15; ++i) {
assembler->setcc(static_cast<x86_64::Condition>(i), *reg);
- str << "set" << suffixes[i] << " %" << byte_regs[reg->AsRegister()] << "\n";
+ str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n";
}
}
@@ -979,4 +1060,12 @@
DriverFn(&decreaseframe_test_fn, "DecreaseFrame");
}
+TEST_F(AssemblerX86_64Test, MovzxbRegs) {
+ DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
+}
+
+TEST_F(AssemblerX86_64Test, MovsxbRegs) {
+ DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
+}
+
} // namespace art
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index a1834e1..ba0c0bd 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -942,7 +942,7 @@
opcode1 = "pextrw";
prefix[2] = 0;
has_modrm = true;
- store = true;
+ load = true;
src_reg_file = SSE;
immediate_bytes = 1;
} else {
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 6e7b04f..af00834 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -97,6 +97,7 @@
kAllocTrackerLock,
kDeoptimizationLock,
kProfilerLock,
+ kJdwpShutdownLock,
kJdwpEventListLock,
kJdwpAttachLock,
kJdwpStartLock,
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6759c4d..a909a1a 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -307,7 +307,6 @@
// Runtime JDWP state.
static JDWP::JdwpState* gJdwpState = nullptr;
static bool gDebuggerConnected; // debugger or DDMS is connected.
-static bool gDisposed; // debugger called VirtualMachine.Dispose, so we should drop the connection.
static bool gDdmThreadNotification = false;
@@ -319,6 +318,7 @@
static Dbg::HpsgWhat gDdmNhsgWhat;
bool Dbg::gDebuggerActive = false;
+bool Dbg::gDisposed = false;
ObjectRegistry* Dbg::gRegistry = nullptr;
// Recent allocation tracking.
@@ -551,7 +551,7 @@
gJdwpState->PostVMDeath();
}
// Prevent the JDWP thread from processing JDWP incoming packets after we close the connection.
- Disposed();
+ Dispose();
delete gJdwpState;
gJdwpState = nullptr;
delete gRegistry;
@@ -599,14 +599,6 @@
gDisposed = false;
}
-void Dbg::Disposed() {
- gDisposed = true;
-}
-
-bool Dbg::IsDisposed() {
- return gDisposed;
-}
-
bool Dbg::RequiresDeoptimization() {
// We don't need deoptimization if everything runs with interpreter after
// enabling -Xint mode.
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 5898784..dd7f9c5 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -239,7 +239,9 @@
static void GoActive()
LOCKS_EXCLUDED(Locks::breakpoint_lock_, Locks::deoptimization_lock_, Locks::mutator_lock_);
static void Disconnected() LOCKS_EXCLUDED(Locks::deoptimization_lock_, Locks::mutator_lock_);
- static void Disposed();
+ static void Dispose() {
+ gDisposed = true;
+ }
// Returns true if we're actually debugging with a real debugger, false if it's
// just DDMS (or nothing at all).
@@ -255,9 +257,12 @@
// Returns true if a method has any breakpoints.
static bool MethodHasAnyBreakpoints(mirror::ArtMethod* method)
- SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::breakpoint_lock_);
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+ LOCKS_EXCLUDED(Locks::breakpoint_lock_);
- static bool IsDisposed();
+ static bool IsDisposed() {
+ return gDisposed;
+ }
/*
* Time, in milliseconds, since the last debugger activity. Does not
@@ -756,6 +761,10 @@
// Indicates whether the debugger is making requests.
static bool gDebuggerActive;
+ // Indicates whether we should drop the JDWP connection because the runtime stops or the
+ // debugger called VirtualMachine.Dispose.
+ static bool gDisposed;
+
// The registry mapping objects to JDWP ids.
static ObjectRegistry* gRegistry;
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index cd59365..5012965 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -64,7 +64,8 @@
}
IndirectReferenceTable::IndirectReferenceTable(size_t initialCount,
- size_t maxCount, IndirectRefKind desiredKind)
+ size_t maxCount, IndirectRefKind desiredKind,
+ bool abort_on_error)
: kind_(desiredKind),
max_entries_(maxCount) {
CHECK_GT(initialCount, 0U);
@@ -75,16 +76,28 @@
const size_t table_bytes = maxCount * sizeof(IrtEntry);
table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes,
PROT_READ | PROT_WRITE, false, false, &error_str));
- CHECK(table_mem_map_.get() != nullptr) << error_str;
- CHECK_EQ(table_mem_map_->Size(), table_bytes);
+ if (abort_on_error) {
+ CHECK(table_mem_map_.get() != nullptr) << error_str;
+ CHECK_EQ(table_mem_map_->Size(), table_bytes);
+ CHECK(table_mem_map_->Begin() != nullptr);
+ } else if (table_mem_map_.get() == nullptr ||
+ table_mem_map_->Size() != table_bytes ||
+ table_mem_map_->Begin() == nullptr) {
+ table_mem_map_.reset();
+ LOG(ERROR) << error_str;
+ return;
+ }
table_ = reinterpret_cast<IrtEntry*>(table_mem_map_->Begin());
- CHECK(table_ != nullptr);
segment_state_.all = IRT_FIRST_SEGMENT;
}
IndirectReferenceTable::~IndirectReferenceTable() {
}
+bool IndirectReferenceTable::IsValid() const {
+ return table_mem_map_.get() != nullptr;
+}
+
IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) {
IRTSegmentState prevState;
prevState.all = cookie;
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 25b0281..0072184 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -258,10 +258,15 @@
class IndirectReferenceTable {
public:
- IndirectReferenceTable(size_t initialCount, size_t maxCount, IndirectRefKind kind);
+ // WARNING: When using with abort_on_error = false, the object may be in a partially
+ // initialized state. Use IsValid() to check.
+ IndirectReferenceTable(size_t initialCount, size_t maxCount, IndirectRefKind kind,
+ bool abort_on_error = true);
~IndirectReferenceTable();
+ bool IsValid() const;
+
/*
* Add a new entry. "obj" must be a valid non-NULL object reference.
*
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index e16221c..31c9a0b 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -403,6 +403,14 @@
// Used for VirtualMachine.Exit command handling.
bool should_exit_;
int exit_status_;
+
+ // Used to synchronize runtime shutdown with JDWP command handler thread.
+ // When the runtime shuts down, it needs to stop JDWP command handler thread by closing the
+ // JDWP connection. However, if the JDWP thread is processing a command, it needs to wait
+ // for the command to finish so we can send its reply before closing the connection.
+ Mutex shutdown_lock_ ACQUIRED_AFTER(event_list_lock_);
+ ConditionVariable shutdown_cond_ GUARDED_BY(shutdown_lock_);
+ bool processing_request_ GUARDED_BY(shutdown_lock_);
};
std::string DescribeField(const FieldId& field_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 0d161bc..d0ca214 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -271,7 +271,7 @@
static JdwpError VM_Dispose(JdwpState*, Request*, ExpandBuf*)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- Dbg::Disposed();
+ Dbg::Dispose();
return ERR_NONE;
}
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index e2b88a5..5b30f0c 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -126,6 +126,7 @@
*/
ssize_t JdwpNetStateBase::WritePacket(ExpandBuf* pReply, size_t length) {
MutexLock mu(Thread::Current(), socket_lock_);
+ DCHECK(IsConnected()) << "Connection with debugger is closed";
DCHECK_LE(length, expandBufGetLength(pReply));
return TEMP_FAILURE_RETRY(write(clientSock, expandBufGetBuffer(pReply), length));
}
@@ -140,6 +141,7 @@
ssize_t JdwpNetStateBase::WriteBufferedPacketLocked(const std::vector<iovec>& iov) {
socket_lock_.AssertHeld(Thread::Current());
+ DCHECK(IsConnected()) << "Connection with debugger is closed";
return TEMP_FAILURE_RETRY(writev(clientSock, &iov[0], iov.size()));
}
@@ -225,7 +227,10 @@
jdwp_token_owner_thread_id_(0),
ddm_is_active_(false),
should_exit_(false),
- exit_status_(0) {
+ exit_status_(0),
+ shutdown_lock_("JDWP shutdown lock", kJdwpShutdownLock),
+ shutdown_cond_("JDWP shutdown condition variable", shutdown_lock_),
+ processing_request_(false) {
}
/*
@@ -338,10 +343,20 @@
JdwpState::~JdwpState() {
if (netState != nullptr) {
/*
- * Close down the network to inspire the thread to halt.
+ * Close down the network to inspire the thread to halt. If a request is being processed,
+ * we need to wait for it to finish first.
*/
- VLOG(jdwp) << "JDWP shutting down net...";
- netState->Shutdown();
+ {
+ Thread* self = Thread::Current();
+ MutexLock mu(self, shutdown_lock_);
+ while (processing_request_) {
+ VLOG(jdwp) << "JDWP command in progress: wait for it to finish ...";
+ shutdown_cond_.Wait(self);
+ }
+
+ VLOG(jdwp) << "JDWP shutting down net...";
+ netState->Shutdown();
+ }
if (debug_thread_started_) {
run = false;
@@ -369,7 +384,13 @@
// Returns "false" if we encounter a connection-fatal error.
bool JdwpState::HandlePacket() {
- JdwpNetStateBase* netStateBase = reinterpret_cast<JdwpNetStateBase*>(netState);
+ Thread* const self = Thread::Current();
+ {
+ MutexLock mu(self, shutdown_lock_);
+ processing_request_ = true;
+ }
+ JdwpNetStateBase* netStateBase = netState;
+ CHECK(netStateBase != nullptr) << "Connection has been closed";
JDWP::Request request(netStateBase->input_buffer_, netStateBase->input_count_);
ExpandBuf* pReply = expandBufAlloc();
@@ -388,6 +409,11 @@
}
expandBufFree(pReply);
netStateBase->ConsumeBytes(request.GetLength());
+ {
+ MutexLock mu(self, shutdown_lock_);
+ processing_request_ = false;
+ shutdown_cond_.Broadcast(self);
+ }
return true;
}
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index b2d3835..84fc404 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -28,11 +28,29 @@
static constexpr size_t kLocalsInitial = 64; // Arbitrary.
+// Checking "locals" requires the mutator lock, but at creation time we're really only interested
+// in validity, which isn't changing. To avoid grabbing the mutator lock, factored out and tagged
+// with NO_THREAD_SAFETY_ANALYSIS.
+static bool CheckLocalsValid(JNIEnvExt* in) NO_THREAD_SAFETY_ANALYSIS {
+ if (in == nullptr) {
+ return false;
+ }
+ return in->locals.IsValid();
+}
+
+JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) {
+ std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in));
+ if (CheckLocalsValid(ret.get())) {
+ return ret.release();
+ }
+ return nullptr;
+}
+
JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in)
: self(self_in),
vm(vm_in),
local_ref_cookie(IRT_FIRST_SEGMENT),
- locals(kLocalsInitial, kLocalsMax, kLocal),
+ locals(kLocalsInitial, kLocalsMax, kLocal, false),
check_jni(false),
critical(0),
monitors("monitors", kMonitorsInitial, kMonitorsMax) {
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index af87cb4..29d912c 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -34,7 +34,8 @@
static constexpr size_t kLocalsMax = 512;
struct JNIEnvExt : public JNIEnv {
- JNIEnvExt(Thread* self, JavaVMExt* vm);
+ static JNIEnvExt* Create(Thread* self, JavaVMExt* vm);
+
~JNIEnvExt();
void DumpReferenceTables(std::ostream& os)
@@ -87,6 +88,11 @@
// Used by -Xcheck:jni.
const JNINativeInterface* unchecked_functions;
+
+ private:
+ // The constructor should not be called directly. It may leave the object in an erronuous state,
+ // and the result needs to be checked.
+ JNIEnvExt(Thread* self, JavaVMExt* vm);
};
// Used to save and restore the JNIEnvExt state when not going through code created by the JNI
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index c182a4d..87ae64d 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -297,22 +297,15 @@
return result;
}
-// Java: dalvik.system.DexFile.UP_TO_DATE
-static const jbyte kUpToDate = 0;
-// Java: dalvik.system.DexFile.DEXOPT_NEEDED
-static const jbyte kPatchoatNeeded = 1;
-// Java: dalvik.system.DexFile.PATCHOAT_NEEDED
-static const jbyte kDexoptNeeded = 2;
-
-static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename,
+static jint GetDexOptNeeded(JNIEnv* env, const char* filename,
const char* pkgname, const char* instruction_set, const jboolean defer) {
if ((filename == nullptr) || !OS::FileExists(filename)) {
- LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
+ LOG(ERROR) << "DexFile_getDexOptNeeded file '" << filename << "' does not exist";
ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
const char* message = (filename == nullptr) ? "<empty file name>" : filename;
env->ThrowNew(fnfe.get(), message);
- return kUpToDate;
+ return OatFileAssistant::kNoDexOptNeeded;
}
const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set);
@@ -330,7 +323,7 @@
// Always treat elements of the bootclasspath as up-to-date.
if (oat_file_assistant.IsInBootClassPath()) {
- return kUpToDate;
+ return OatFileAssistant::kNoDexOptNeeded;
}
// TODO: Checking the profile should probably be done in the GetStatus()
@@ -343,7 +336,7 @@
if (!defer) {
oat_file_assistant.CopyProfileFile();
}
- return kDexoptNeeded;
+ return OatFileAssistant::kDex2OatNeeded;
} else if (oat_file_assistant.ProfileExists()
&& !oat_file_assistant.OldProfileExists()) {
if (!defer) {
@@ -353,16 +346,10 @@
}
}
- OatFileAssistant::Status status = oat_file_assistant.GetStatus();
- switch (status) {
- case OatFileAssistant::kUpToDate: return kUpToDate;
- case OatFileAssistant::kNeedsRelocation: return kPatchoatNeeded;
- case OatFileAssistant::kOutOfDate: return kDexoptNeeded;
- }
- UNREACHABLE();
+ return oat_file_assistant.GetDexOptNeeded();
}
-static jbyte DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
+static jint DexFile_getDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename,
jstring javaPkgname, jstring javaInstructionSet, jboolean defer) {
ScopedUtfChars filename(env, javaFilename);
if (env->ExceptionCheck()) {
@@ -376,25 +363,25 @@
return 0;
}
- return IsDexOptNeededInternal(env, filename.c_str(), pkgname.c_str(),
- instruction_set.c_str(), defer);
+ return GetDexOptNeeded(env, filename.c_str(), pkgname.c_str(),
+ instruction_set.c_str(), defer);
}
// public API, NULL pkgname
static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
const char* instruction_set = GetInstructionSetString(kRuntimeISA);
ScopedUtfChars filename(env, javaFilename);
- return kUpToDate != IsDexOptNeededInternal(env, filename.c_str(), nullptr /* pkgname */,
- instruction_set, false /* defer */);
+ jint status = GetDexOptNeeded(env, filename.c_str(), nullptr /* pkgname */,
+ instruction_set, false /* defer */);
+ return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE;
}
-
static JNINativeMethod gMethods[] = {
NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)V"),
NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/Object;)Ljava/lang/Class;"),
NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"),
NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
- NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)B"),
+ NATIVE_METHOD(DexFile, getDexOptNeeded, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"),
NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)Ljava/lang/Object;"),
};
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index d92f59b..e5c27b2 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -142,31 +142,31 @@
return true;
}
-OatFileAssistant::Status OatFileAssistant::GetStatus() {
+OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded() {
// TODO: If the profiling code is ever restored, it's worth considering
// whether we should check to see if the profile is out of date here.
- if (OdexFileIsOutOfDate()) {
- // The DEX file is not pre-compiled.
- // TODO: What if the oat file is not out of date? Could we relocate it
- // from itself?
- return OatFileIsUpToDate() ? kUpToDate : kOutOfDate;
- } else {
- // The DEX file is pre-compiled. If the oat file isn't up to date, we can
- // patch the pre-compiled version rather than recompiling.
- if (OatFileIsUpToDate() || OdexFileIsUpToDate()) {
- return kUpToDate;
- } else {
- return kNeedsRelocation;
- }
+ if (OatFileIsUpToDate() || OdexFileIsUpToDate()) {
+ return kNoDexOptNeeded;
}
+
+ if (OdexFileNeedsRelocation()) {
+ return kPatchOatNeeded;
+ }
+
+ if (OatFileNeedsRelocation()) {
+ return kSelfPatchOatNeeded;
+ }
+
+ return kDex2OatNeeded;
}
bool OatFileAssistant::MakeUpToDate(std::string* error_msg) {
- switch (GetStatus()) {
- case kUpToDate: return true;
- case kNeedsRelocation: return RelocateOatFile(error_msg);
- case kOutOfDate: return GenerateOatFile(error_msg);
+ switch (GetDexOptNeeded()) {
+ case kNoDexOptNeeded: return true;
+ case kDex2OatNeeded: return GenerateOatFile(error_msg);
+ case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg);
+ case kSelfPatchOatNeeded: return RelocateOatFile(OatFileName(), error_msg);
}
UNREACHABLE();
}
@@ -269,14 +269,14 @@
return GetOdexFile() != nullptr;
}
-OatFileAssistant::Status OatFileAssistant::OdexFileStatus() {
+OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() {
if (OdexFileIsOutOfDate()) {
- return kOutOfDate;
+ return kOatOutOfDate;
}
if (OdexFileIsUpToDate()) {
- return kUpToDate;
+ return kOatUpToDate;
}
- return kNeedsRelocation;
+ return kOatNeedsRelocation;
}
bool OatFileAssistant::OdexFileIsOutOfDate() {
@@ -293,7 +293,7 @@
}
bool OatFileAssistant::OdexFileNeedsRelocation() {
- return OdexFileStatus() == kNeedsRelocation;
+ return OdexFileStatus() == kOatNeedsRelocation;
}
bool OatFileAssistant::OdexFileIsUpToDate() {
@@ -338,14 +338,14 @@
return GetOatFile() != nullptr;
}
-OatFileAssistant::Status OatFileAssistant::OatFileStatus() {
+OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() {
if (OatFileIsOutOfDate()) {
- return kOutOfDate;
+ return kOatOutOfDate;
}
if (OatFileIsUpToDate()) {
- return kUpToDate;
+ return kOatUpToDate;
}
- return kNeedsRelocation;
+ return kOatNeedsRelocation;
}
bool OatFileAssistant::OatFileIsOutOfDate() {
@@ -362,7 +362,7 @@
}
bool OatFileAssistant::OatFileNeedsRelocation() {
- return OatFileStatus() == kNeedsRelocation;
+ return OatFileStatus() == kOatNeedsRelocation;
}
bool OatFileAssistant::OatFileIsUpToDate() {
@@ -378,17 +378,17 @@
return cached_oat_file_is_up_to_date_;
}
-OatFileAssistant::Status OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
+OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
// TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which
// is more work than we need to do. If performance becomes a concern, and
// this method is actually called, this should be fixed.
if (GivenOatFileIsOutOfDate(file)) {
- return kOutOfDate;
+ return kOatOutOfDate;
}
if (GivenOatFileIsUpToDate(file)) {
- return kUpToDate;
+ return kOatUpToDate;
}
- return kNeedsRelocation;
+ return kOatNeedsRelocation;
}
bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) {
@@ -451,7 +451,7 @@
}
bool OatFileAssistant::GivenOatFileNeedsRelocation(const OatFile& file) {
- return GivenOatFileStatus(file) == kNeedsRelocation;
+ return GivenOatFileStatus(file) == kOatNeedsRelocation;
}
bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) {
@@ -592,16 +592,17 @@
}
}
-bool OatFileAssistant::RelocateOatFile(std::string* error_msg) {
+bool OatFileAssistant::RelocateOatFile(const std::string* input_file,
+ std::string* error_msg) {
CHECK(error_msg != nullptr);
- if (OdexFileName() == nullptr) {
+ if (input_file == nullptr) {
*error_msg = "Patching of oat file for dex location "
+ std::string(dex_location_)
- + " not attempted because the odex file name could not be determined.";
+ + " not attempted because the input file name could not be determined.";
return false;
}
- const std::string& odex_file_name = *OdexFileName();
+ const std::string& input_file_name = *input_file;
if (OatFileName() == nullptr) {
*error_msg = "Patching of oat file for dex location "
@@ -628,7 +629,7 @@
std::vector<std::string> argv;
argv.push_back(runtime->GetPatchoatExecutable());
argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(isa_)));
- argv.push_back("--input-oat-file=" + odex_file_name);
+ argv.push_back("--input-oat-file=" + input_file_name);
argv.push_back("--output-oat-file=" + oat_file_name);
argv.push_back("--patched-image-location=" + image_info->location);
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index f2abcf9..9e7c2ef 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -43,20 +43,43 @@
// be restored and tested, or removed.
class OatFileAssistant {
public:
- enum Status {
- // kOutOfDate - An oat file is said to be out of date if the file does not
- // exist, or is out of date with respect to the dex file or boot image.
- kOutOfDate,
+ enum DexOptNeeded {
+ // kNoDexOptNeeded - The code for this dex location is up to date and can
+ // be used as is.
+ // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0
+ kNoDexOptNeeded = 0,
- // kNeedsRelocation - An oat file is said to need relocation if the code
- // is up to date, but not yet properly relocated for address space layout
- // randomization (ASLR). In this case, the oat file is neither "out of
- // date" nor "up to date".
- kNeedsRelocation,
+ // kDex2OatNeeded - In order to make the code for this dex location up to
+ // date, dex2oat must be run on the dex file.
+ // Matches Java: dalvik.system.DexFile.DEX2OAT_NEEDED = 1
+ kDex2OatNeeded = 1,
- // kUpToDate - An oat file is said to be up to date if it is not out of
+ // kPatchOatNeeded - In order to make the code for this dex location up to
+ // date, patchoat must be run on the odex file.
+ // Matches Java: dalvik.system.DexFile.PATCHOAT_NEEDED = 2
+ kPatchOatNeeded = 2,
+
+ // kSelfPatchOatNeeded - In order to make the code for this dex location
+ // up to date, patchoat must be run on the oat file.
+ // Matches Java: dalvik.system.DexFile.SELF_PATCHOAT_NEEDED = 3
+ kSelfPatchOatNeeded = 3,
+ };
+
+ enum OatStatus {
+ // kOatOutOfDate - An oat file is said to be out of date if the file does
+ // not exist, or is out of date with respect to the dex file or boot
+ // image.
+ kOatOutOfDate,
+
+ // kOatNeedsRelocation - An oat file is said to need relocation if the
+ // code is up to date, but not yet properly relocated for address space
+ // layout randomization (ASLR). In this case, the oat file is neither
+ // "out of date" nor "up to date".
+ kOatNeedsRelocation,
+
+ // kOatUpToDate - An oat file is said to be up to date if it is not out of
// date and has been properly relocated for the purposes of ASLR.
- kUpToDate,
+ kOatUpToDate,
};
// Constructs an OatFileAssistant object to assist the oat file
@@ -67,7 +90,6 @@
// Typically the dex_location is the absolute path to the original,
// un-optimized dex file.
//
- //
// Note: Currently the dex_location must have an extension.
// TODO: Relax this restriction?
//
@@ -121,8 +143,9 @@
// file.
bool Lock(std::string* error_msg);
- // Returns the overall compilation status for the given dex location.
- Status GetStatus();
+ // Return what action needs to be taken to produce up-to-date code for this
+ // dex location.
+ DexOptNeeded GetDexOptNeeded();
// Attempts to generate or relocate the oat file as needed to make it up to
// date.
@@ -164,7 +187,7 @@
// determined.
const std::string* OdexFileName();
bool OdexFileExists();
- Status OdexFileStatus();
+ OatStatus OdexFileStatus();
bool OdexFileIsOutOfDate();
bool OdexFileNeedsRelocation();
bool OdexFileIsUpToDate();
@@ -176,20 +199,18 @@
// the dex location.
//
// Notes:
- // * To get the overall status of the compiled code for this dex_location,
- // use the GetStatus() method, not the OatFileStatus() method.
// * OatFileName may return null if the oat file name could not be
// determined.
const std::string* OatFileName();
bool OatFileExists();
- Status OatFileStatus();
+ OatStatus OatFileStatus();
bool OatFileIsOutOfDate();
bool OatFileNeedsRelocation();
bool OatFileIsUpToDate();
// These methods return the status for a given opened oat file with respect
// to the dex location.
- Status GivenOatFileStatus(const OatFile& file);
+ OatStatus GivenOatFileStatus(const OatFile& file);
bool GivenOatFileIsOutOfDate(const OatFile& file);
bool GivenOatFileNeedsRelocation(const OatFile& file);
bool GivenOatFileIsUpToDate(const OatFile& file);
@@ -216,7 +237,7 @@
// Copy the current profile to the old profile location.
void CopyProfileFile();
- // Generates the oat file by relocation from the odex file.
+ // Generates the oat file by relocation from the named input file.
// This does not check the current status before attempting to relocate the
// oat file.
// Returns true on success.
@@ -224,7 +245,7 @@
//
// If there is a failure, the value of error_msg will be set to a string
// describing why there was failure. error_msg must not be nullptr.
- bool RelocateOatFile(std::string* error_msg);
+ bool RelocateOatFile(const std::string* input_file, std::string* error_msg);
// Generate the oat file from the dex file.
// This does not check the current status before attempting to generate the
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index a198824..d2362a2 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -29,7 +29,9 @@
#include "common_runtime_test.h"
#include "compiler_callbacks.h"
#include "mem_map.h"
+#include "mirror/art_field-inl.h"
#include "os.h"
+#include "scoped_thread_state_change.h"
#include "thread-inl.h"
#include "utils.h"
@@ -267,42 +269,42 @@
}
// Case: We have a DEX file, but no OAT file for it.
-// Expect: The oat file status is kOutOfDate.
+// Expect: The status is kDex2OatNeeded.
TEST_F(OatFileAssistantTest, DexNoOat) {
std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
Copy(GetDexSrc1(), dex_location);
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
- EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_FALSE(oat_file_assistant.OdexFileExists());
EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
- EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OdexFileStatus());
+ EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OdexFileStatus());
EXPECT_FALSE(oat_file_assistant.OatFileExists());
EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
- EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OatFileStatus());
+ EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OatFileStatus());
}
// Case: We have no DEX file and no OAT file.
-// Expect: Status is out of date. Loading should fail, but not crash.
+// Expect: Status is kDex2OatNeeded. Loading should fail, but not crash.
TEST_F(OatFileAssistantTest, NoDexNoOat) {
std::string dex_location = GetScratchDir() + "/NoDexNoOat.jar";
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
- EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
EXPECT_EQ(nullptr, oat_file.get());
}
// Case: We have a DEX file and up-to-date OAT file for it.
-// Expect: The oat file status is kUpToDate.
+// Expect: The status is kNoDexOptNeeded.
TEST_F(OatFileAssistantTest, OatUpToDate) {
std::string dex_location = GetScratchDir() + "/OatUpToDate.jar";
Copy(GetDexSrc1(), dex_location);
@@ -310,7 +312,7 @@
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
- EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_FALSE(oat_file_assistant.OdexFileExists());
EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -319,18 +321,20 @@
EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
- EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.OatFileStatus());
+ EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
}
// Case: We have a MultiDEX file and up-to-date OAT file for it.
-// Expect: The oat file status is kUpToDate.
+// Expect: The status is kNoDexOptNeeded and we load all dex files.
TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar";
Copy(GetMultiDexSrc1(), dex_location);
GenerateOatForTest(dex_location.c_str());
- // Verify we can load both dex files.
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+ EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+
+ // Verify we can load both dex files.
std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
ASSERT_TRUE(oat_file.get() != nullptr);
EXPECT_TRUE(oat_file->IsExecutable());
@@ -341,7 +345,7 @@
// Case: We have a MultiDEX file and up-to-date OAT file for it with relative
// encoded dex locations.
-// Expect: The oat file status is kUpToDate.
+// Expect: The oat file status is kNoDexOptNeeded.
TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) {
std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar";
std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat";
@@ -370,8 +374,8 @@
EXPECT_EQ(2u, dex_files.size());
}
-// Case: We have a DEX file and out of date OAT file.
-// Expect: The oat file status is kOutOfDate.
+// Case: We have a DEX file and out-of-date OAT file.
+// Expect: The status is kDex2OatNeeded.
TEST_F(OatFileAssistantTest, OatOutOfDate) {
std::string dex_location = GetScratchDir() + "/OatOutOfDate.jar";
@@ -382,7 +386,7 @@
Copy(GetDexSrc2(), dex_location);
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
- EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -394,7 +398,7 @@
}
// Case: We have a DEX file and an ODEX file, but no OAT file.
-// Expect: The oat file status is kNeedsRelocation.
+// Expect: The status is kPatchOatNeeded.
TEST_F(OatFileAssistantTest, DexOdexNoOat) {
std::string dex_location = GetScratchDir() + "/DexOdexNoOat.jar";
std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
@@ -406,21 +410,20 @@
// Verify the status.
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
- EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_TRUE(oat_file_assistant.OdexFileExists());
EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
EXPECT_TRUE(oat_file_assistant.OdexFileNeedsRelocation());
- EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.OdexFileNeedsRelocation());
EXPECT_FALSE(oat_file_assistant.OatFileExists());
EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
}
// Case: We have a stripped DEX file and an ODEX file, but no OAT file.
-// Expect: The oat file status is kNeedsRelocation.
+// Expect: The status is kPatchOatNeeded
TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) {
std::string dex_location = GetScratchDir() + "/StrippedDexOdexNoOat.jar";
std::string odex_location = GetOdexDir() + "/StrippedDexOdexNoOat.odex";
@@ -435,7 +438,7 @@
// Verify the status.
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
- EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -449,7 +452,7 @@
std::string error_msg;
ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
- EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -468,8 +471,8 @@
EXPECT_EQ(1u, dex_files.size());
}
-// Case: We have a stripped DEX file, an ODEX file, and an out of date OAT file.
-// Expect: The oat file status is kNeedsRelocation.
+// Case: We have a stripped DEX file, an ODEX file, and an out-of-date OAT file.
+// Expect: The status is kPatchOatNeeded.
TEST_F(OatFileAssistantTest, StrippedDexOdexOat) {
std::string dex_location = GetScratchDir() + "/StrippedDexOdexOat.jar";
std::string odex_location = GetOdexDir() + "/StrippedDexOdexOat.odex";
@@ -488,7 +491,7 @@
// Verify the status.
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
- EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -503,7 +506,7 @@
std::string error_msg;
ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
- EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -524,9 +527,59 @@
EXPECT_EQ(1u, dex_files.size());
}
+// Case: We have a DEX file, no ODEX file and an OAT file that needs
+// relocation.
+// Expect: The status is kSelfPatchOatNeeded.
+TEST_F(OatFileAssistantTest, SelfRelocation) {
+ std::string dex_location = GetScratchDir() + "/SelfRelocation.jar";
+ std::string oat_location = GetOdexDir() + "/SelfRelocation.oat";
+
+ // Create the dex and odex files
+ Copy(GetDexSrc1(), dex_location);
+ GenerateOdexForTest(dex_location, oat_location);
+
+ OatFileAssistant oat_file_assistant(dex_location.c_str(),
+ oat_location.c_str(), kRuntimeISA, true);
+
+ EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
+
+ EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+ EXPECT_FALSE(oat_file_assistant.OdexFileExists());
+ EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+ EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
+ EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+ EXPECT_TRUE(oat_file_assistant.OatFileExists());
+ EXPECT_TRUE(oat_file_assistant.OatFileNeedsRelocation());
+ EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
+ EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+
+ // Make the oat file up to date.
+ std::string error_msg;
+ ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+
+ EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+
+ EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+ EXPECT_FALSE(oat_file_assistant.OdexFileExists());
+ EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+ EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
+ EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+ EXPECT_TRUE(oat_file_assistant.OatFileExists());
+ EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
+ EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
+ EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
+
+ std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+ ASSERT_TRUE(oat_file.get() != nullptr);
+ EXPECT_TRUE(oat_file->IsExecutable());
+ std::vector<std::unique_ptr<const DexFile>> dex_files;
+ dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
+ EXPECT_EQ(1u, dex_files.size());
+}
+
// Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and
// OAT files both have patch delta of 0.
-// Expect: It shouldn't crash.
+// Expect: It shouldn't crash, and status is kPatchOatNeeded.
TEST_F(OatFileAssistantTest, OdexOatOverlap) {
std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar";
std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex";
@@ -544,7 +597,7 @@
OatFileAssistant oat_file_assistant(dex_location.c_str(),
oat_location.c_str(), kRuntimeISA, true);
- EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -564,7 +617,7 @@
}
// Case: We have a DEX file and a PIC ODEX file, but no OAT file.
-// Expect: The oat file status is kUpToDate, because PIC needs no relocation.
+// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation.
TEST_F(OatFileAssistantTest, DexPicOdexNoOat) {
std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar";
std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex";
@@ -576,7 +629,7 @@
// Verify the status.
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
- EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -661,7 +714,7 @@
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
- EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.OdexFileExists());
EXPECT_FALSE(oat_file_assistant.OatFileExists());
EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -720,7 +773,7 @@
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
- EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.OdexFileExists());
EXPECT_FALSE(oat_file_assistant.OatFileExists());
EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -737,7 +790,7 @@
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
- EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.OdexFileExists());
EXPECT_FALSE(oat_file_assistant.OatFileExists());
EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -751,14 +804,14 @@
}
// Case: Non-standard extension for dex file.
-// Expect: The oat file status is kOutOfDate.
+// Expect: The status is kDex2OatNeeded.
TEST_F(OatFileAssistantTest, LongDexExtension) {
std::string dex_location = GetScratchDir() + "/LongDexExtension.jarx";
Copy(GetDexSrc1(), dex_location);
OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
- EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+ EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -895,6 +948,41 @@
"/foo/bar/baz_noext", kArm, &odex_file, &error_msg));
}
+// Verify the dexopt status values from dalvik.system.DexFile
+// match the OatFileAssistant::DexOptStatus values.
+TEST_F(OatFileAssistantTest, DexOptStatusValues) {
+ ScopedObjectAccess soa(Thread::Current());
+ StackHandleScope<1> hs(soa.Self());
+ ClassLinker* linker = Runtime::Current()->GetClassLinker();
+ Handle<mirror::Class> dexfile(
+ hs.NewHandle(linker->FindSystemClass(soa.Self(), "Ldalvik/system/DexFile;")));
+ ASSERT_FALSE(dexfile.Get() == nullptr);
+ linker->EnsureInitialized(soa.Self(), dexfile, true, true);
+
+ mirror::ArtField* no_dexopt_needed = mirror::Class::FindStaticField(
+ soa.Self(), dexfile, "NO_DEXOPT_NEEDED", "I");
+ ASSERT_FALSE(no_dexopt_needed == nullptr);
+ EXPECT_EQ(no_dexopt_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+ EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, no_dexopt_needed->GetInt(dexfile.Get()));
+
+ mirror::ArtField* dex2oat_needed = mirror::Class::FindStaticField(
+ soa.Self(), dexfile, "DEX2OAT_NEEDED", "I");
+ ASSERT_FALSE(dex2oat_needed == nullptr);
+ EXPECT_EQ(dex2oat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+ EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, dex2oat_needed->GetInt(dexfile.Get()));
+
+ mirror::ArtField* patchoat_needed = mirror::Class::FindStaticField(
+ soa.Self(), dexfile, "PATCHOAT_NEEDED", "I");
+ ASSERT_FALSE(patchoat_needed == nullptr);
+ EXPECT_EQ(patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+ EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, patchoat_needed->GetInt(dexfile.Get()));
+
+ mirror::ArtField* self_patchoat_needed = mirror::Class::FindStaticField(
+ soa.Self(), dexfile, "SELF_PATCHOAT_NEEDED", "I");
+ ASSERT_FALSE(self_patchoat_needed == nullptr);
+ EXPECT_EQ(self_patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+ EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, self_patchoat_needed->GetInt(dexfile.Get()));
+}
// TODO: More Tests:
// * Test class linker falls back to unquickened dex for DexNoOat
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d1b0464..89fc00e 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -377,7 +377,11 @@
tls32_.thin_lock_thread_id = thread_list->AllocThreadId(this);
- tlsPtr_.jni_env = new JNIEnvExt(this, java_vm);
+ tlsPtr_.jni_env = JNIEnvExt::Create(this, java_vm);
+ if (tlsPtr_.jni_env == nullptr) {
+ return false;
+ }
+
thread_list->Register(this);
return true;
}
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index 3d0f074..708f61f 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -104,6 +104,16 @@
if (!unsafe.compareAndSwapInt(t, intOffset, 0, 1)) {
System.out.println("Unexpectedly not succeeding compareAndSwap...");
}
+
+ if (unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+ System.out.println("Unexpectedly succeeding compareAndSwapLong...");
+ }
+ if (!unsafe.compareAndSwapLong(t, longOffset, longValue, 0)) {
+ System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+ }
+ if (!unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+ System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+ }
}
private static class TestClass {
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index f0fe934..6a6227c 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -379,7 +379,7 @@
*/
static int lit16Test(int x) {
- int[] results = new int[8];
+ int[] results = new int[10];
/* try to generate op-int/lit16" instructions */
results[0] = x + 1000;
@@ -390,6 +390,9 @@
results[5] = x & 1000;
results[6] = x | -1000;
results[7] = x ^ -1000;
+ /* use an 16-bit constant that has its MSB (bit-15) set */
+ results[8] = x / 32769;
+ results[9] = x / -32769;
if (results[0] != 78777) { return 1; }
if (results[1] != -76777) { return 2; }
@@ -399,6 +402,8 @@
if (results[5] != 960) { return 6; }
if (results[6] != -39) { return 7; }
if (results[7] != -76855) { return 8; }
+ if (results[8] != 2) { return 9; }
+ if (results[9] != -2) { return 10; }
return 0;
}