Alignment optimizations in vectorizer.
Rationale:
Since aligned data access is generally better (enables more efficient
aligned moves and prevents nasty cache line splits), computing and/or
enforcing alignment has been added to the vectorizer:
(1) If the initial alignment is known completely and suffices,
then a static peeling factor enforces proper alignment.
(2) If (1) fails, but the base alignment allows, dynamically peeling
until total offset is aligned forces proper aligned access patterns.
By using ART conventions only, any forced alignment is preserved
over suspends checks where data may move.
Note 1:
Current allocation convention is just 8 byte alignment on arrays/strings,
so only ARM32 benefits. However, all optimizations are implemented in
a general way, so moving to a 16 byte alignment will immediately
take advantage of any new convention!!
Note 2:
This CL also exposes how bad the choice of 12 byte offset of arrays
really is. Even though the new optimizations fix the misaligned, it
requires peeling for the most common case: 0 indexed loops. Therefore,
we may even consider moving to a 16 byte offset. Again the optimizations
in this CL will immediately take advantage of that new convention!!
Test: test-art-host test-art-target
Change-Id: Ib6cc0fb68c9433d3771bee573603e64a3a9423ee
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 768fe55..51e0a98 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -94,20 +94,24 @@
* Representation of a unit-stride array reference.
*/
struct ArrayReference {
- ArrayReference(HInstruction* b, HInstruction* o, DataType::Type t, bool l)
- : base(b), offset(o), type(t), lhs(l) { }
+ ArrayReference(HInstruction* b, HInstruction* o, DataType::Type t, bool l, bool c = false)
+ : base(b), offset(o), type(t), lhs(l), is_string_char_at(c) { }
bool operator<(const ArrayReference& other) const {
return
(base < other.base) ||
(base == other.base &&
(offset < other.offset || (offset == other.offset &&
(type < other.type ||
- (type == other.type && lhs < other.lhs)))));
+ (type == other.type &&
+ (lhs < other.lhs ||
+ (lhs == other.lhs &&
+ is_string_char_at < other.is_string_char_at)))))));
}
- HInstruction* base; // base address
- HInstruction* offset; // offset + i
- DataType::Type type; // component type
- bool lhs; // def/use
+ HInstruction* base; // base address
+ HInstruction* offset; // offset + i
+ DataType::Type type; // component type
+ bool lhs; // def/use
+ bool is_string_char_at; // compressed string read
};
//
@@ -152,6 +156,7 @@
bool generate_code,
DataType::Type type,
uint64_t restrictions);
+ uint32_t GetVectorSizeInBytes();
bool TrySetVectorType(DataType::Type type, /*out*/ uint64_t* restrictions);
bool TrySetVectorLength(uint32_t length);
void GenerateVecInv(HInstruction* org, DataType::Type type);
@@ -183,8 +188,14 @@
uint64_t restrictions);
// Vectorization heuristics.
+ Alignment ComputeAlignment(HInstruction* offset,
+ DataType::Type type,
+ bool is_string_char_at,
+ uint32_t peeling = 0);
+ void SetAlignmentStrategy(uint32_t peeling_votes[],
+ const ArrayReference* peeling_candidate);
+ uint32_t MaxNumberPeeled();
bool IsVectorizationProfitable(int64_t trip_count);
- void SetPeelingCandidate(const ArrayReference* candidate, int64_t trip_count);
uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
//
@@ -202,7 +213,7 @@
bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
bool collect_loop_uses,
- /*out*/ int32_t* use_count);
+ /*out*/ uint32_t* use_count);
bool IsUsedOutsideLoop(HLoopInformation* loop_info,
HInstruction* instruction);
bool TryReplaceWithLastValue(HLoopInformation* loop_info,
@@ -254,8 +265,9 @@
// Contents reside in phase-local heap memory.
ScopedArenaSet<ArrayReference>* vector_refs_;
- // Dynamic loop peeling candidate for alignment.
- const ArrayReference* vector_peeling_candidate_;
+ // Static or dynamic loop peeling for alignment.
+ uint32_t vector_static_peeling_factor_;
+ const ArrayReference* vector_dynamic_peeling_candidate_;
// Dynamic data dependence test of the form a != b.
HInstruction* vector_runtime_test_a_;