blob: 8d5e4f375b3220af997c51af75c42127e6b05302 [file] [log] [blame]
xueliang.zhongf7caf682017-03-01 16:07:02 +00001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
18#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
19
20#include "code_generator_arm_vixl.h"
21#include "scheduler.h"
22
23namespace art {
24namespace arm {
25#ifdef ART_USE_OLD_ARM_BACKEND
26typedef CodeGeneratorARM CodeGeneratorARMType;
27#else
28typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
29#endif
30
31// AArch32 instruction latencies.
32// We currently assume that all ARM CPUs share the same instruction latency list.
33// The following latencies were tuned based on performance experiments and
34// automatic tuning using differential evolution approach on various benchmarks.
35static constexpr uint32_t kArmIntegerOpLatency = 2;
36static constexpr uint32_t kArmFloatingPointOpLatency = 11;
37static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
38static constexpr uint32_t kArmMulIntegerLatency = 6;
39static constexpr uint32_t kArmMulFloatingPointLatency = 11;
40static constexpr uint32_t kArmDivIntegerLatency = 10;
41static constexpr uint32_t kArmDivFloatLatency = 20;
42static constexpr uint32_t kArmDivDoubleLatency = 25;
43static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
44static constexpr uint32_t kArmMemoryLoadLatency = 9;
45static constexpr uint32_t kArmMemoryStoreLatency = 9;
46static constexpr uint32_t kArmMemoryBarrierLatency = 6;
47static constexpr uint32_t kArmBranchLatency = 4;
48static constexpr uint32_t kArmCallLatency = 5;
49static constexpr uint32_t kArmCallInternalLatency = 29;
50static constexpr uint32_t kArmLoadStringInternalLatency = 10;
51static constexpr uint32_t kArmNopLatency = 2;
52static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
53static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
54
55class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
56 public:
57 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
58 : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
59
60 // Default visitor for instructions not handled specifically below.
61 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
62 last_visited_latency_ = kArmIntegerOpLatency;
63 }
64
65// We add a second unused parameter to be able to use this macro like the others
66// defined in `nodes.h`.
67#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
68 M(ArrayGet , unused) \
69 M(ArrayLength , unused) \
70 M(ArraySet , unused) \
71 M(Add , unused) \
72 M(Sub , unused) \
73 M(And , unused) \
74 M(Or , unused) \
75 M(Ror , unused) \
76 M(Xor , unused) \
77 M(Shl , unused) \
78 M(Shr , unused) \
79 M(UShr , unused) \
80 M(Mul , unused) \
81 M(Div , unused) \
82 M(Condition , unused) \
83 M(Compare , unused) \
84 M(BoundsCheck , unused) \
85 M(InstanceFieldGet , unused) \
86 M(InstanceFieldSet , unused) \
87 M(InstanceOf , unused) \
88 M(Invoke , unused) \
89 M(LoadString , unused) \
90 M(NewArray , unused) \
91 M(NewInstance , unused) \
92 M(Rem , unused) \
93 M(StaticFieldGet , unused) \
94 M(StaticFieldSet , unused) \
95 M(SuspendCheck , unused) \
96 M(TypeConversion , unused)
97
98#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
99 M(BitwiseNegatedRight, unused) \
100 M(MultiplyAccumulate, unused) \
101 M(IntermediateAddress, unused) \
102 M(DataProcWithShifterOp, unused)
103
104#define DECLARE_VISIT_INSTRUCTION(type, unused) \
105 void Visit##type(H##type* instruction) OVERRIDE;
106
107 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
108 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
109 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
110
111#undef DECLARE_VISIT_INSTRUCTION
112
113 private:
114 void HandleBinaryOperationLantencies(HBinaryOperation* instr);
115 void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
116 void HandleShiftLatencies(HBinaryOperation* instr);
117 void HandleDivRemConstantIntegralLatencies(int32_t imm);
118 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
119 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
120 void HandleGenerateDataProcInstruction(bool internal_latency = false);
121 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
122 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
123
124 // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
125 // latency visitors may query CodeGenerator for such information for accurate latency settings.
126 CodeGeneratorARMType* codegen_;
127};
128
129class HSchedulerARM : public HScheduler {
130 public:
131 HSchedulerARM(ArenaAllocator* arena,
132 SchedulingNodeSelector* selector,
133 SchedulingLatencyVisitorARM* arm_latency_visitor)
134 : HScheduler(arena, arm_latency_visitor, selector) {}
135 ~HSchedulerARM() OVERRIDE {}
136
137 bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
138#define CASE_INSTRUCTION_KIND(type, unused) case \
139 HInstruction::InstructionKind::k##type:
140 switch (instruction->GetKind()) {
141 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
142 return true;
143 FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
144 return true;
145 default:
146 return HScheduler::IsSchedulable(instruction);
147 }
148#undef CASE_INSTRUCTION_KIND
149 }
150
151 private:
152 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
153};
154
155} // namespace arm
156} // namespace art
157
158#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_