Disambiguate memory accesses in instruction scheduling
Based on aliasing information from heap location collector,
instruction scheduling can further eliminate side-effect
dependencies between memory accesses to different locations,
and perform better scheduling on memory loads and stores.
Performance improvements of this CL, measured on Cortex-A53:
| benchmarks | ARM64 backend | ARM backend |
|----------------+---------------|-------------|
| algorithm | 0.1 % | 0.1 % |
| benchmarksgame | 0.5 % | 1.3 % |
| caffeinemark | 0.0 % | 0.0 % |
| math | 5.1 % | 5.0 % |
| stanford | 1.1 % | 0.6 % |
| testsimd | 0.4 % | 0.1 % |
Compilation time impact is negligible, because this
heap location load store analysis is only performed
on loop basic blocks that get instruction scheduled.
Test: m test-art-host
Test: m test-art-target
Test: 706-checker-scheduler
Change-Id: I43d7003c09bfab9d3a1814715df666aea9a7360b
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index d87600a..cc7222d 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -18,6 +18,7 @@
#include "builder.h"
#include "codegen_test_utils.h"
#include "common_compiler_test.h"
+#include "load_store_analysis.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "pc_relative_fixups_x86.h"
@@ -193,6 +194,147 @@
}
}
+ void TestDependencyGraphOnAliasingArrayAccesses(HScheduler* scheduler) {
+ HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(entry);
+ graph_->SetEntryBlock(entry);
+ graph_->BuildDominatorTree();
+
+ HInstruction* arr = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
+ 0,
+ Primitive::kPrimNot);
+ HInstruction* i = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(1),
+ 1,
+ Primitive::kPrimInt);
+ HInstruction* j = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(1),
+ 1,
+ Primitive::kPrimInt);
+ HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
+ 0,
+ Primitive::kPrimNot);
+ HInstruction* c0 = graph_->GetIntConstant(0);
+ HInstruction* c1 = graph_->GetIntConstant(1);
+ HInstruction* add0 = new (&allocator_) HAdd(Primitive::kPrimInt, i, c0);
+ HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, i, c1);
+ HInstruction* sub0 = new (&allocator_) HSub(Primitive::kPrimInt, i, c0);
+ HInstruction* sub1 = new (&allocator_) HSub(Primitive::kPrimInt, i, c1);
+ HInstruction* arr_set_0 = new (&allocator_) HArraySet(arr, c0, c0, Primitive::kPrimInt, 0);
+ HInstruction* arr_set_1 = new (&allocator_) HArraySet(arr, c1, c0, Primitive::kPrimInt, 0);
+ HInstruction* arr_set_i = new (&allocator_) HArraySet(arr, i, c0, Primitive::kPrimInt, 0);
+ HInstruction* arr_set_add0 = new (&allocator_) HArraySet(arr, add0, c0, Primitive::kPrimInt, 0);
+ HInstruction* arr_set_add1 = new (&allocator_) HArraySet(arr, add1, c0, Primitive::kPrimInt, 0);
+ HInstruction* arr_set_sub0 = new (&allocator_) HArraySet(arr, sub0, c0, Primitive::kPrimInt, 0);
+ HInstruction* arr_set_sub1 = new (&allocator_) HArraySet(arr, sub1, c0, Primitive::kPrimInt, 0);
+ HInstruction* arr_set_j = new (&allocator_) HArraySet(arr, j, c0, Primitive::kPrimInt, 0);
+ HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object,
+ c1,
+ nullptr,
+ Primitive::kPrimInt,
+ MemberOffset(10),
+ false,
+ kUnknownFieldIndex,
+ kUnknownClassDefIndex,
+ graph_->GetDexFile(),
+ 0);
+
+ HInstruction* block_instructions[] = {arr,
+ i,
+ j,
+ object,
+ add0,
+ add1,
+ sub0,
+ sub1,
+ arr_set_0,
+ arr_set_1,
+ arr_set_i,
+ arr_set_add0,
+ arr_set_add1,
+ arr_set_sub0,
+ arr_set_sub1,
+ arr_set_j,
+ set_field10};
+
+ for (HInstruction* instr : block_instructions) {
+ entry->AddInstruction(instr);
+ }
+
+ SchedulingGraph scheduling_graph(scheduler, graph_->GetArena());
+ HeapLocationCollector heap_location_collector(graph_);
+ heap_location_collector.VisitBasicBlock(entry);
+ heap_location_collector.BuildAliasingMatrix();
+ scheduling_graph.SetHeapLocationCollector(heap_location_collector);
+
+ for (HInstruction* instr : ReverseRange(block_instructions)) {
+ // Build scheduling graph with memory access aliasing information
+ // from LSA/heap_location_collector.
+ scheduling_graph.AddNode(instr);
+ }
+
+ // LSA/HeapLocationCollector should see those ArraySet instructions.
+ ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 9U);
+ ASSERT_TRUE(heap_location_collector.HasHeapStores());
+
+ // Test queries on HeapLocationCollector's aliasing matrix after load store analysis.
+ // HeapLocationCollector and SchedulingGraph should report consistent relationships.
+ size_t loc1 = HeapLocationCollector::kHeapLocationNotFound;
+ size_t loc2 = HeapLocationCollector::kHeapLocationNotFound;
+
+ // Test side effect dependency: array[0] and array[1]
+ loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, c0);
+ loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, c1);
+ ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_1, arr_set_0));
+
+ // Test side effect dependency based on LSA analysis: array[i] and array[j]
+ loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i);
+ loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, j);
+ ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i));
+
+ // Test side effect dependency based on LSA analysis: array[i] and array[i+0]
+ loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i);
+ loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, add0);
+ ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_add0, arr_set_i));
+
+ // Test side effect dependency based on LSA analysis: array[i] and array[i-0]
+ loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i);
+ loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, sub0);
+ ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_i));
+
+ // Test side effect dependency based on LSA analysis: array[i] and array[i+1]
+ loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i);
+ loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, add1);
+ ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_add1, arr_set_i));
+
+ // Test side effect dependency based on LSA analysis: array[i+1] and array[i-1]
+ loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, add1);
+ loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, sub1);
+ ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub1, arr_set_add1));
+
+ // Test side effect dependency based on LSA analysis: array[j] and all others array accesses
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add0));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub0));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub1));
+
+ // Test that ArraySet and FieldSet should not have side effect dependency
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_i, set_field10));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, set_field10));
+
+ // Exercise target specific scheduler and SchedulingLatencyVisitor.
+ scheduler->Schedule(graph_);
+ }
+
ArenaPool pool_;
ArenaAllocator allocator_;
HGraph* graph_;
@@ -204,15 +346,28 @@
arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector);
TestBuildDependencyGraphAndSchedule(&scheduler);
}
+
+TEST_F(SchedulerTest, ArrayAccessAliasingARM64) {
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector);
+ TestDependencyGraphOnAliasingArrayAccesses(&scheduler);
+}
#endif
#if defined(ART_ENABLE_CODEGEN_arm)
-TEST_F(SchedulerTest, DependencyGrapAndSchedulerARM) {
+TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM) {
CriticalPathSchedulingNodeSelector critical_path_selector;
arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor);
TestBuildDependencyGraphAndSchedule(&scheduler);
}
+
+TEST_F(SchedulerTest, ArrayAccessAliasingARM) {
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
+ arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor);
+ TestDependencyGraphOnAliasingArrayAccesses(&scheduler);
+}
#endif
TEST_F(SchedulerTest, RandomScheduling) {