blob: 8f6920b6417a6611c33205f41c421915532182db [file] [log] [blame]
Carl Shapiro0e5d75d2011-07-06 18:28:37 -07001// Copyright 2011 Google Inc. All Rights Reserved.
2
3#ifndef ART_SRC_DEX_VERIFY_H_
4#define ART_SRC_DEX_VERIFY_H_
5
Ian Rogersd81871c2011-10-03 13:57:23 -07006#include "casts.h"
Elliott Hughes90a33692011-08-30 13:27:07 -07007#include "dex_file.h"
8#include "dex_instruction.h"
Brian Carlstrom578bbdc2011-07-21 14:07:47 -07009#include "macros.h"
10#include "object.h"
Ian Rogersd81871c2011-10-03 13:57:23 -070011#include "stl_util.h"
Elliott Hughes5fe594f2011-09-08 12:33:17 -070012#include "UniquePtr.h"
Carl Shapiro0e5d75d2011-07-06 18:28:37 -070013
Ian Rogers55d249f2011-11-02 16:48:09 -070014#include <deque>
Ian Rogers84fa0742011-10-25 18:13:30 -070015#include <limits>
Ian Rogersd81871c2011-10-03 13:57:23 -070016#include <map>
Ian Rogersd81871c2011-10-03 13:57:23 -070017#include <vector>
Carl Shapiro0e5d75d2011-07-06 18:28:37 -070018
Ian Rogersd81871c2011-10-03 13:57:23 -070019namespace art {
20namespace verifier {
21
22class DexVerifier;
23class PcToReferenceMap;
24class RegTypeCache;
jeffhaobdb76512011-09-07 11:43:16 -070025
26/*
Ian Rogersd81871c2011-10-03 13:57:23 -070027 * Set this to enable dead code scanning. This is not required, but it's very useful when testing
28 * changes to the verifier (to make sure we're not skipping over stuff). The only reason not to do
29 * it is that it slightly increases the time required to perform verification.
jeffhaobdb76512011-09-07 11:43:16 -070030 */
31#ifndef NDEBUG
32# define DEAD_CODE_SCAN true
33#else
34# define DEAD_CODE_SCAN false
35#endif
36
37/*
Ian Rogers84fa0742011-10-25 18:13:30 -070038 * RegType holds information about the "type" of data held in a register.
jeffhaobdb76512011-09-07 11:43:16 -070039 */
Ian Rogersd81871c2011-10-03 13:57:23 -070040class RegType {
Carl Shapiro0e5d75d2011-07-06 18:28:37 -070041 public:
Ian Rogersd81871c2011-10-03 13:57:23 -070042 enum Type {
43 kRegTypeUnknown = 0, /* initial state */
jeffhaobdb76512011-09-07 11:43:16 -070044 kRegTypeConflict, /* merge clash makes this reg's type unknowable */
Ian Rogers84fa0742011-10-25 18:13:30 -070045 kRegTypeBoolean, /* Z */
46 kRegType1nrSTART = kRegTypeBoolean,
47 kRegTypeIntegralSTART = kRegTypeBoolean,
Ian Rogersd81871c2011-10-03 13:57:23 -070048 kRegTypeByte, /* B */
Ian Rogersd81871c2011-10-03 13:57:23 -070049 kRegTypeShort, /* S */
50 kRegTypeChar, /* C */
51 kRegTypeInteger, /* I */
52 kRegTypeIntegralEND = kRegTypeInteger,
53 kRegTypeFloat, /* F */
54 kRegType1nrEND = kRegTypeFloat,
Ian Rogers84fa0742011-10-25 18:13:30 -070055 kRegTypeLongLo, /* J - lower-numbered register; endian-independent */
56 kRegTypeLongHi,
57 kRegTypeDoubleLo, /* D */
58 kRegTypeDoubleHi,
Ian Rogersd81871c2011-10-03 13:57:23 -070059 kRegTypeConstLo, /* const derived wide, lower half - could be long or double */
60 kRegTypeConstHi, /* const derived wide, upper half - could be long or double */
Ian Rogers84fa0742011-10-25 18:13:30 -070061 kRegTypeLastFixedLocation = kRegTypeConstHi,
62 kRegTypeConst, /* 32-bit constant derived value - could be float or int */
63 kRegTypeUnresolvedReference, // Reference type that couldn't be resolved
64 kRegTypeUninitializedReference, // Freshly allocated reference type
65 kRegTypeUninitializedThisReference, // Freshly allocated reference passed as "this"
66 kRegTypeUnresolvedAndUninitializedReference, // Freshly allocated unresolved reference type
67 kRegTypeReference, // Reference type
jeffhaobdb76512011-09-07 11:43:16 -070068 };
69
Ian Rogersd81871c2011-10-03 13:57:23 -070070 Type GetType() const {
71 return type_;
jeffhaobdb76512011-09-07 11:43:16 -070072 }
73
Ian Rogersd81871c2011-10-03 13:57:23 -070074 bool IsUnknown() const { return type_ == kRegTypeUnknown; }
75 bool IsConflict() const { return type_ == kRegTypeConflict; }
Ian Rogersd81871c2011-10-03 13:57:23 -070076 bool IsBoolean() const { return type_ == kRegTypeBoolean; }
77 bool IsByte() const { return type_ == kRegTypeByte; }
78 bool IsChar() const { return type_ == kRegTypeChar; }
79 bool IsShort() const { return type_ == kRegTypeShort; }
80 bool IsInteger() const { return type_ == kRegTypeInteger; }
81 bool IsLong() const { return type_ == kRegTypeLongLo; }
82 bool IsFloat() const { return type_ == kRegTypeFloat; }
83 bool IsDouble() const { return type_ == kRegTypeDoubleLo; }
Ian Rogers84fa0742011-10-25 18:13:30 -070084 bool IsUnresolvedReference() const { return type_ == kRegTypeUnresolvedReference; }
85 bool IsUninitializedReference() const { return type_ == kRegTypeUninitializedReference; }
86 bool IsUninitializedThisReference() const { return type_ == kRegTypeUninitializedThisReference; }
87 bool IsUnresolvedAndUninitializedReference() const {
88 return type_ == kRegTypeUnresolvedAndUninitializedReference;
89 }
Ian Rogersd81871c2011-10-03 13:57:23 -070090 bool IsReference() const { return type_ == kRegTypeReference; }
Ian Rogers84fa0742011-10-25 18:13:30 -070091 bool IsUninitializedTypes() const {
92 return IsUninitializedReference() || IsUninitializedThisReference() ||
93 IsUnresolvedAndUninitializedReference();
94 }
Ian Rogers9074b992011-10-26 17:41:55 -070095 bool IsUnresolvedTypes() const {
96 return IsUnresolvedReference() || IsUnresolvedAndUninitializedReference();
97 }
Ian Rogersd81871c2011-10-03 13:57:23 -070098 bool IsLowHalf() const { return type_ == kRegTypeLongLo ||
99 type_ == kRegTypeDoubleLo ||
100 type_ == kRegTypeConstLo; }
101 bool IsHighHalf() const { return type_ == kRegTypeLongHi ||
102 type_ == kRegTypeDoubleHi ||
103 type_ == kRegTypeConstHi; }
104
Ian Rogers84fa0742011-10-25 18:13:30 -0700105 bool IsLongOrDoubleTypes() const { return IsLowHalf(); }
Ian Rogersd81871c2011-10-03 13:57:23 -0700106
Ian Rogers84fa0742011-10-25 18:13:30 -0700107 // Check this is the low half, and that type_h is its matching high-half
Ian Rogersd81871c2011-10-03 13:57:23 -0700108 bool CheckWidePair(const RegType& type_h) const {
109 return IsLowHalf() && (type_h.type_ == type_ + 1);
jeffhaobdb76512011-09-07 11:43:16 -0700110 }
111
Ian Rogers84fa0742011-10-25 18:13:30 -0700112 // The high half that corresponds to this low half
113 const RegType& HighHalf(RegTypeCache* cache) const;
114
115 bool IsConstant() const { return type_ == kRegTypeConst; }
116 bool IsLongConstant() const { return type_ == kRegTypeConstLo; }
117 bool IsLongConstantHigh() const { return type_ == kRegTypeConstHi; }
118
119 // If this is a 32-bit constant, what is the value? This value may just
120 // approximate to the actual constant value by virtue of merging.
121 int32_t ConstantValue() const {
122 DCHECK(IsConstant());
123 return allocation_pc_or_constant_;
124 }
125
126 bool IsZero() const { return IsConstant() && ConstantValue() == 0; }
127 bool IsOne() const { return IsConstant() && ConstantValue() == 1; }
128 bool IsConstantBoolean() const { return IsZero() || IsOne(); }
129 bool IsConstantByte() const {
130 return IsConstant() &&
131 ConstantValue() >= std::numeric_limits<jbyte>::min() &&
132 ConstantValue() <= std::numeric_limits<jbyte>::max();
133 }
134 bool IsConstantShort() const {
135 return IsConstant() &&
136 ConstantValue() >= std::numeric_limits<jshort>::min() &&
137 ConstantValue() <= std::numeric_limits<jshort>::max();
138 }
139 bool IsConstantChar() const {
140 return IsConstant() && ConstantValue() >= 0 &&
141 ConstantValue() <= std::numeric_limits<jchar>::max();
142 }
143
144 bool IsReferenceTypes() const {
145 return IsReference() || IsUnresolvedReference() || IsUninitializedReference() ||
Ian Rogers28ad40d2011-10-27 15:19:26 -0700146 IsUninitializedThisReference() || IsUnresolvedAndUninitializedReference() || IsZero();
Ian Rogers84fa0742011-10-25 18:13:30 -0700147 }
148 bool IsNonZeroReferenceTypes() const {
149 return IsReference() || IsUnresolvedReference() || IsUninitializedReference() ||
150 IsUninitializedThisReference();
151 }
152 bool IsCategory1Types() const {
153 return (type_ >= kRegType1nrSTART && type_ <= kRegType1nrEND) || IsConstant();
154 }
155 bool IsCategory2Types() const {
156 return IsLowHalf(); // Don't expect explicit testing of high halves
157 }
158
159 bool IsBooleanTypes() const { return IsBoolean() || IsConstantBoolean(); }
160 bool IsByteTypes() const { return IsByte() || IsBoolean() || IsConstantByte(); }
161 bool IsShortTypes() const { return IsShort() || IsByte() || IsBoolean() || IsConstantShort(); }
162 bool IsCharTypes() const { return IsChar() || IsBooleanTypes() || IsConstantChar(); }
163 bool IsIntegralTypes() const {
164 return (type_ >= kRegTypeIntegralSTART && type_ <= kRegTypeIntegralEND) || IsConstant();
165 }
166 bool IsArrayIndexTypes() const { return IsIntegralTypes(); }
167
168 // Float type may be derived from any constant type
169 bool IsFloatTypes() const { return IsFloat() || IsConstant(); }
170
171 bool IsLongTypes() const {
172 return IsLong() || IsLongConstant();
173 }
174 bool IsLongHighTypes() const {
175 return type_ == kRegTypeLongHi || type_ == kRegTypeConstHi;
176 }
177 bool IsDoubleTypes() const {
178 return IsDouble() || IsLongConstant();
179 }
180 bool IsDoubleHighTypes() const {
181 return type_ == kRegTypeDoubleHi || type_ == kRegTypeConstHi;
182 }
183
184 uint32_t GetAllocationPc() const {
Ian Rogers28ad40d2011-10-27 15:19:26 -0700185 DCHECK(IsUninitializedTypes());
Ian Rogers84fa0742011-10-25 18:13:30 -0700186 return allocation_pc_or_constant_;
187 }
188
189 Class* GetClass() const {
190 DCHECK(!IsUnresolvedReference());
191 DCHECK(klass_or_descriptor_ != NULL);
192 DCHECK(klass_or_descriptor_->IsClass());
193 return down_cast<Class*>(klass_or_descriptor_);
194 }
195
Ian Rogers9074b992011-10-26 17:41:55 -0700196 bool IsJavaLangObject() const {
197 return IsReference() && GetClass()->IsObjectClass();
198 }
Ian Rogers28ad40d2011-10-27 15:19:26 -0700199 bool IsInstantiableTypes() const {
200 return IsUnresolvedTypes() || (IsNonZeroReferenceTypes() && GetClass()->IsInstantiable());
201 }
Ian Rogers84fa0742011-10-25 18:13:30 -0700202 String* GetDescriptor() const {
Ian Rogers28ad40d2011-10-27 15:19:26 -0700203 DCHECK(IsUnresolvedTypes());
Ian Rogers84fa0742011-10-25 18:13:30 -0700204 DCHECK(klass_or_descriptor_ != NULL);
Elliott Hughesdbb40792011-11-18 17:05:22 -0800205 DCHECK(klass_or_descriptor_->GetClass()->IsStringClass());
Ian Rogers84fa0742011-10-25 18:13:30 -0700206 return down_cast<String*>(klass_or_descriptor_);
207 }
Ian Rogers28ad40d2011-10-27 15:19:26 -0700208 bool IsArrayClass() const {
209 if (IsUnresolvedTypes()) {
210 return GetDescriptor()->CharAt(0) == '[';
211 } else if (!IsConstant()) {
212 return GetClass()->IsArrayClass();
213 } else {
214 return false;
215 }
216 }
Ian Rogers84fa0742011-10-25 18:13:30 -0700217
Ian Rogersd81871c2011-10-03 13:57:23 -0700218 uint16_t GetId() const {
219 return cache_id_;
jeffhaobdb76512011-09-07 11:43:16 -0700220 }
221
Ian Rogers84fa0742011-10-25 18:13:30 -0700222 std::string Dump() const;
Ian Rogersd81871c2011-10-03 13:57:23 -0700223
Ian Rogersb5e95b92011-10-25 23:28:55 -0700224 bool IsAssignableFrom(const RegType& src) const;
Ian Rogersd81871c2011-10-03 13:57:23 -0700225
226 const RegType& Merge(const RegType& incoming_type, RegTypeCache* reg_types) const;
227
Ian Rogers84fa0742011-10-25 18:13:30 -0700228 bool Equals(const RegType& other) const { return GetId() == other.GetId(); }
jeffhaobdb76512011-09-07 11:43:16 -0700229
230 /*
Ian Rogersd81871c2011-10-03 13:57:23 -0700231 * A basic Join operation on classes. For a pair of types S and T the Join, written S v T = J, is
232 * S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is J is the parent of
233 * S and T such that there isn't a parent of both S and T that isn't also the parent of J (ie J
234 * is the deepest (lowest upper bound) parent of S and T).
235 *
236 * This operation applies for regular classes and arrays, however, for interface types there needn't
237 * be a partial ordering on the types. We could solve the problem of a lack of a partial order by
238 * introducing sets of types, however, the only operation permissible on an interface is
239 * invoke-interface. In the tradition of Java verifiers [1] we defer the verification of interface
240 * types until an invoke-interface call on the interface typed reference at runtime and allow
241 * the perversion of Object being assignable to an interface type (note, however, that we don't
242 * allow assignment of Object or Interface to any concrete class and are therefore type safe).
243 *
Elliott Hughes91250e02011-12-13 22:30:35 -0800244 * [1] Java bytecode verification: algorithms and formalizations, Xavier Leroy
jeffhaobdb76512011-09-07 11:43:16 -0700245 */
Ian Rogersd81871c2011-10-03 13:57:23 -0700246 static Class* ClassJoin(Class* s, Class* t);
jeffhaoe23d93c2011-09-15 14:48:43 -0700247
248 private:
Ian Rogersd81871c2011-10-03 13:57:23 -0700249 friend class RegTypeCache;
250
Ian Rogers84fa0742011-10-25 18:13:30 -0700251 RegType(Type type, Object* klass_or_descriptor, uint32_t allocation_pc_or_constant, uint16_t cache_id) :
252 type_(type), klass_or_descriptor_(klass_or_descriptor), allocation_pc_or_constant_(allocation_pc_or_constant),
253 cache_id_(cache_id) {
Ian Rogers28ad40d2011-10-27 15:19:26 -0700254 DCHECK(IsConstant() || IsUninitializedTypes() || allocation_pc_or_constant == 0);
Ian Rogers84fa0742011-10-25 18:13:30 -0700255 if (!IsConstant() && !IsLongConstant() && !IsLongConstantHigh() && !IsUnknown() &&
256 !IsConflict()) {
257 DCHECK(klass_or_descriptor != NULL);
Ian Rogers28ad40d2011-10-27 15:19:26 -0700258 DCHECK(IsUnresolvedTypes() || klass_or_descriptor_->IsClass());
Elliott Hughesdbb40792011-11-18 17:05:22 -0800259 DCHECK(!IsUnresolvedTypes() || klass_or_descriptor_->GetClass()->IsStringClass());
Ian Rogers84fa0742011-10-25 18:13:30 -0700260 }
Ian Rogersd81871c2011-10-03 13:57:23 -0700261 }
262
263 const Type type_; // The current type of the register
264
Ian Rogers84fa0742011-10-25 18:13:30 -0700265 // If known the type of the register, else a String for the descriptor
266 Object* klass_or_descriptor_;
Ian Rogersd81871c2011-10-03 13:57:23 -0700267
Ian Rogers84fa0742011-10-25 18:13:30 -0700268 // Overloaded field that:
269 // - if IsConstant() holds a 32bit constant value
270 // - is IsReference() holds the allocation_pc or kInitArgAddr for an initialized reference or
271 // kUninitThisArgAddr for an uninitialized this ptr
272 const uint32_t allocation_pc_or_constant_;
Ian Rogersd81871c2011-10-03 13:57:23 -0700273
274 // A RegType cache densely encodes types, this is the location in the cache for this type
275 const uint16_t cache_id_;
276
Ian Rogersd81871c2011-10-03 13:57:23 -0700277 DISALLOW_COPY_AND_ASSIGN(RegType);
278};
279std::ostream& operator<<(std::ostream& os, const RegType& rhs);
280
281class RegTypeCache {
282 public:
Ian Rogers84fa0742011-10-25 18:13:30 -0700283 explicit RegTypeCache() : entries_(RegType::kRegTypeLastFixedLocation + 1) {
Ian Rogersd81871c2011-10-03 13:57:23 -0700284 Unknown(); // ensure Unknown is initialized
285 }
286 ~RegTypeCache() {
287 STLDeleteElements(&entries_);
288 }
289
290 const RegType& GetFromId(uint16_t id) {
291 DCHECK_LT(id, entries_.size());
292 RegType* result = entries_[id];
293 DCHECK(result != NULL);
294 return *result;
295 }
296
297 const RegType& From(RegType::Type type, const ClassLoader* loader, const std::string& descriptor);
298 const RegType& FromClass(Class* klass);
299 const RegType& FromCat1Const(int32_t value);
300 const RegType& FromDescriptor(const ClassLoader* loader, const std::string& descriptor);
301 const RegType& FromType(RegType::Type);
302
303 const RegType& Boolean() { return FromType(RegType::kRegTypeBoolean); }
304 const RegType& Byte() { return FromType(RegType::kRegTypeByte); }
305 const RegType& Char() { return FromType(RegType::kRegTypeChar); }
306 const RegType& Short() { return FromType(RegType::kRegTypeShort); }
307 const RegType& Integer() { return FromType(RegType::kRegTypeInteger); }
308 const RegType& Float() { return FromType(RegType::kRegTypeFloat); }
309 const RegType& Long() { return FromType(RegType::kRegTypeLongLo); }
310 const RegType& Double() { return FromType(RegType::kRegTypeDoubleLo); }
311
312 const RegType& JavaLangClass() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Class;"); }
313 const RegType& JavaLangObject() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Object;"); }
314 const RegType& JavaLangString() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/String;"); }
Ian Rogers28ad40d2011-10-27 15:19:26 -0700315 const RegType& JavaLangThrowable() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Throwable;"); }
Ian Rogersd81871c2011-10-03 13:57:23 -0700316
317 const RegType& Unknown() { return FromType(RegType::kRegTypeUnknown); }
318 const RegType& Conflict() { return FromType(RegType::kRegTypeConflict); }
Ian Rogersd81871c2011-10-03 13:57:23 -0700319 const RegType& ConstLo() { return FromType(RegType::kRegTypeConstLo); }
Ian Rogers84fa0742011-10-25 18:13:30 -0700320 const RegType& Zero() { return FromCat1Const(0); }
Ian Rogersd81871c2011-10-03 13:57:23 -0700321
Ian Rogers28ad40d2011-10-27 15:19:26 -0700322 const RegType& Uninitialized(const RegType& type, uint32_t allocation_pc);
Ian Rogersd81871c2011-10-03 13:57:23 -0700323 const RegType& UninitializedThisArgument(Class* klass);
Ian Rogers28ad40d2011-10-27 15:19:26 -0700324 const RegType& FromUninitialized(const RegType& uninit_type);
Ian Rogersd81871c2011-10-03 13:57:23 -0700325
Ian Rogers84fa0742011-10-25 18:13:30 -0700326 // Representatives of various constant types. When merging constants we can't infer a type,
327 // (an int may later be used as a float) so we select these representative values meaning future
328 // merges won't know the exact constant value but have some notion of its size.
329 const RegType& ByteConstant() { return FromCat1Const(std::numeric_limits<jbyte>::min()); }
330 const RegType& ShortConstant() { return FromCat1Const(std::numeric_limits<jshort>::min()); }
331 const RegType& IntConstant() { return FromCat1Const(std::numeric_limits<jint>::max()); }
Ian Rogers28ad40d2011-10-27 15:19:26 -0700332
333 const RegType& GetComponentType(const RegType& array, const ClassLoader* loader);
Ian Rogersd81871c2011-10-03 13:57:23 -0700334 private:
335 // The allocated entries
336 std::vector<RegType*> entries_;
337
338 DISALLOW_COPY_AND_ASSIGN(RegTypeCache);
339};
340
341class InsnFlags {
342 public:
343 InsnFlags() : length_(0), flags_(0) {}
344
345 void SetLengthInCodeUnits(size_t length) {
346 CHECK_LT(length, 65536u);
347 length_ = length;
348 }
349 size_t GetLengthInCodeUnits() {
350 return length_;
351 }
352 bool IsOpcode() const {
353 return length_ != 0;
354 }
355
356 void SetInTry() {
357 flags_ |= 1 << kInsnFlagInTry;
358 }
359 void ClearInTry() {
360 flags_ &= ~(1 << kInsnFlagInTry);
361 }
362 bool IsInTry() const {
363 return (flags_ & (1 << kInsnFlagInTry)) != 0;
364 }
365
366 void SetBranchTarget() {
367 flags_ |= 1 << kInsnFlagBranchTarget;
368 }
369 void ClearBranchTarget() {
370 flags_ &= ~(1 << kInsnFlagBranchTarget);
371 }
372 bool IsBranchTarget() const {
373 return (flags_ & (1 << kInsnFlagBranchTarget)) != 0;
374 }
375
376 void SetGcPoint() {
377 flags_ |= 1 << kInsnFlagGcPoint;
378 }
379 void ClearGcPoint() {
380 flags_ &= ~(1 << kInsnFlagGcPoint);
381 }
382 bool IsGcPoint() const {
383 return (flags_ & (1 << kInsnFlagGcPoint)) != 0;
384 }
385
386 void SetVisited() {
387 flags_ |= 1 << kInsnFlagVisited;
388 }
389 void ClearVisited() {
390 flags_ &= ~(1 << kInsnFlagVisited);
391 }
392 bool IsVisited() const {
393 return (flags_ & (1 << kInsnFlagVisited)) != 0;
394 }
395
396 void SetChanged() {
397 flags_ |= 1 << kInsnFlagChanged;
398 }
399 void ClearChanged() {
400 flags_ &= ~(1 << kInsnFlagChanged);
401 }
402 bool IsChanged() const {
403 return (flags_ & (1 << kInsnFlagChanged)) != 0;
404 }
405
406 bool IsVisitedOrChanged() const {
407 return IsVisited() || IsChanged();
408 }
409
Ian Rogers2c8a8572011-10-24 17:11:36 -0700410 std::string Dump() {
Ian Rogersd81871c2011-10-03 13:57:23 -0700411 char encoding[6];
412 if (!IsOpcode()) {
413 strncpy(encoding, "XXXXX", sizeof(encoding));
414 } else {
415 strncpy(encoding, "-----", sizeof(encoding));
416 if (IsInTry()) encoding[kInsnFlagInTry] = 'T';
417 if (IsBranchTarget()) encoding[kInsnFlagBranchTarget] = 'B';
418 if (IsGcPoint()) encoding[kInsnFlagGcPoint] = 'G';
419 if (IsVisited()) encoding[kInsnFlagVisited] = 'V';
420 if (IsChanged()) encoding[kInsnFlagChanged] = 'C';
421 }
Ian Rogers2c8a8572011-10-24 17:11:36 -0700422 return std::string(encoding);
Ian Rogersd81871c2011-10-03 13:57:23 -0700423 }
424 private:
425 enum InsnFlag {
426 kInsnFlagInTry,
427 kInsnFlagBranchTarget,
428 kInsnFlagGcPoint,
429 kInsnFlagVisited,
430 kInsnFlagChanged,
431 };
432
433 // Size of instruction in code units
434 uint16_t length_;
435 uint8_t flags_;
436};
437
438/*
439 * "Direct" and "virtual" methods are stored independently. The type of call used to invoke the
440 * method determines which list we search, and whether we travel up into superclasses.
441 *
442 * (<clinit>, <init>, and methods declared "private" or "static" are stored in the "direct" list.
443 * All others are stored in the "virtual" list.)
444 */
445enum MethodType {
446 METHOD_UNKNOWN = 0,
447 METHOD_DIRECT, // <init>, private
448 METHOD_STATIC, // static
449 METHOD_VIRTUAL, // virtual, super
450 METHOD_INTERFACE // interface
451};
452
453const int kRegTypeUninitMask = 0xff;
454const int kRegTypeUninitShift = 8;
455
456/*
457 * Register type categories, for type checking.
458 *
459 * The spec says category 1 includes boolean, byte, char, short, int, float, reference, and
460 * returnAddress. Category 2 includes long and double.
461 *
462 * We treat object references separately, so we have "category1nr". We don't support jsr/ret, so
463 * there is no "returnAddress" type.
464 */
465enum TypeCategory {
466 kTypeCategoryUnknown = 0,
467 kTypeCategory1nr = 1, // boolean, byte, char, short, int, float
468 kTypeCategory2 = 2, // long, double
469 kTypeCategoryRef = 3, // object reference
470};
471
472/*
473 * An enumeration of problems that can turn up during verification.
474 * VERIFY_ERROR_GENERIC denotes a failure that causes the entire class to be rejected. Other errors
475 * denote verification errors that cause bytecode to be rewritten to fail at runtime.
476 */
477enum VerifyError {
478 VERIFY_ERROR_NONE = 0, /* no error; must be zero */
479 VERIFY_ERROR_GENERIC, /* VerifyError */
480
481 VERIFY_ERROR_NO_CLASS, /* NoClassDefFoundError */
482 VERIFY_ERROR_NO_FIELD, /* NoSuchFieldError */
483 VERIFY_ERROR_NO_METHOD, /* NoSuchMethodError */
484 VERIFY_ERROR_ACCESS_CLASS, /* IllegalAccessError */
485 VERIFY_ERROR_ACCESS_FIELD, /* IllegalAccessError */
486 VERIFY_ERROR_ACCESS_METHOD, /* IllegalAccessError */
487 VERIFY_ERROR_CLASS_CHANGE, /* IncompatibleClassChangeError */
488 VERIFY_ERROR_INSTANTIATION, /* InstantiationError */
489};
490std::ostream& operator<<(std::ostream& os, const VerifyError& rhs);
491
492/*
493 * Identifies the type of reference in the instruction that generated the verify error
494 * (e.g. VERIFY_ERROR_ACCESS_CLASS could come from a method, field, or class reference).
495 *
496 * This must fit in two bits.
497 */
498enum VerifyErrorRefType {
499 VERIFY_ERROR_REF_CLASS = 0,
500 VERIFY_ERROR_REF_FIELD = 1,
501 VERIFY_ERROR_REF_METHOD = 2,
502};
503const int kVerifyErrorRefTypeShift = 6;
504
505/*
506 * Format enumeration for RegisterMap data area.
507 */
508enum RegisterMapFormat {
509 kRegMapFormatUnknown = 0,
510 kRegMapFormatNone, /* indicates no map data follows */
511 kRegMapFormatCompact8, /* compact layout, 8-bit addresses */
512 kRegMapFormatCompact16, /* compact layout, 16-bit addresses */
513};
514
515// During verification, we associate one of these with every "interesting" instruction. We track
516// the status of all registers, and (if the method has any monitor-enter instructions) maintain a
517// stack of entered monitors (identified by code unit offset).
518// If live-precise register maps are enabled, the "liveRegs" vector will be populated. Unlike the
519// other lists of registers here, we do not track the liveness of the method result register
520// (which is not visible to the GC).
521class RegisterLine {
522 public:
523 RegisterLine(size_t num_regs, DexVerifier* verifier) :
524 line_(new uint16_t[num_regs]), verifier_(verifier), num_regs_(num_regs) {
525 memset(line_.get(), 0, num_regs_ * sizeof(uint16_t));
526 result_[0] = RegType::kRegTypeUnknown;
527 result_[1] = RegType::kRegTypeUnknown;
528 }
529
530 // Implement category-1 "move" instructions. Copy a 32-bit value from "vsrc" to "vdst".
531 void CopyRegister1(uint32_t vdst, uint32_t vsrc, TypeCategory cat);
532
533 // Implement category-2 "move" instructions. Copy a 64-bit value from "vsrc" to "vdst". This
534 // copies both halves of the register.
535 void CopyRegister2(uint32_t vdst, uint32_t vsrc);
536
537 // Implement "move-result". Copy the category-1 value from the result register to another
538 // register, and reset the result register.
539 void CopyResultRegister1(uint32_t vdst, bool is_reference);
540
541 // Implement "move-result-wide". Copy the category-2 value from the result register to another
542 // register, and reset the result register.
543 void CopyResultRegister2(uint32_t vdst);
544
545 // Set the invisible result register to unknown
546 void SetResultTypeToUnknown();
547
548 // Set the type of register N, verifying that the register is valid. If "newType" is the "Lo"
549 // part of a 64-bit value, register N+1 will be set to "newType+1".
550 // The register index was validated during the static pass, so we don't need to check it here.
551 void SetRegisterType(uint32_t vdst, const RegType& new_type);
552
553 /* Set the type of the "result" register. */
554 void SetResultRegisterType(const RegType& new_type);
555
556 // Get the type of register vsrc.
557 const RegType& GetRegisterType(uint32_t vsrc) const;
558
559 bool VerifyRegisterType(uint32_t vsrc, const RegType& check_type);
560
561 void CopyFromLine(const RegisterLine* src) {
562 DCHECK_EQ(num_regs_, src->num_regs_);
563 memcpy(line_.get(), src->line_.get(), num_regs_ * sizeof(uint16_t));
564 monitors_ = src->monitors_;
565 reg_to_lock_depths_ = src->reg_to_lock_depths_;
566 }
567
Ian Rogers2c8a8572011-10-24 17:11:36 -0700568 std::string Dump() const {
569 std::string result;
Ian Rogersd81871c2011-10-03 13:57:23 -0700570 for (size_t i = 0; i < num_regs_; i++) {
Elliott Hughes92cb4982011-12-16 16:57:28 -0800571 result += StringPrintf("%zd:[", i);
Ian Rogers2c8a8572011-10-24 17:11:36 -0700572 result += GetRegisterType(i).Dump();
Ian Rogers84fa0742011-10-25 18:13:30 -0700573 result += "],";
Ian Rogersd81871c2011-10-03 13:57:23 -0700574 }
Ian Rogers55d249f2011-11-02 16:48:09 -0700575 typedef std::deque<uint32_t>::const_iterator It; // TODO: C++0x auto
576 for (It it = monitors_.begin(), end = monitors_.end(); it != end ; ++it) {
577 result += StringPrintf("{%d},", *it);
578 }
Ian Rogers2c8a8572011-10-24 17:11:36 -0700579 return result;
Ian Rogersd81871c2011-10-03 13:57:23 -0700580 }
581
582 void FillWithGarbage() {
583 memset(line_.get(), 0xf1, num_regs_ * sizeof(uint16_t));
584 while (!monitors_.empty()) {
Ian Rogers55d249f2011-11-02 16:48:09 -0700585 monitors_.pop_back();
Ian Rogersd81871c2011-10-03 13:57:23 -0700586 }
587 reg_to_lock_depths_.clear();
588 }
589
590 /*
591 * We're creating a new instance of class C at address A. Any registers holding instances
592 * previously created at address A must be initialized by now. If not, we mark them as "conflict"
593 * to prevent them from being used (otherwise, MarkRefsAsInitialized would mark the old ones and
594 * the new ones at the same time).
595 */
596 void MarkUninitRefsAsInvalid(const RegType& uninit_type);
597
598 /*
599 * Update all registers holding "uninit_type" to instead hold the corresponding initialized
600 * reference type. This is called when an appropriate constructor is invoked -- all copies of
601 * the reference must be marked as initialized.
602 */
603 void MarkRefsAsInitialized(const RegType& uninit_type);
604
605 /*
606 * Check constraints on constructor return. Specifically, make sure that the "this" argument got
607 * initialized.
608 * The "this" argument to <init> uses code offset kUninitThisArgAddr, which puts it at the start
609 * of the list in slot 0. If we see a register with an uninitialized slot 0 reference, we know it
610 * somehow didn't get initialized.
611 */
612 bool CheckConstructorReturn() const;
613
614 // Compare two register lines. Returns 0 if they match.
615 // Using this for a sort is unwise, since the value can change based on machine endianness.
616 int CompareLine(const RegisterLine* line2) const {
617 DCHECK(monitors_ == line2->monitors_);
618 // TODO: DCHECK(reg_to_lock_depths_ == line2->reg_to_lock_depths_);
619 return memcmp(line_.get(), line2->line_.get(), num_regs_ * sizeof(uint16_t));
620 }
621
622 size_t NumRegs() const {
623 return num_regs_;
624 }
625
626 /*
627 * Get the "this" pointer from a non-static method invocation. This returns the RegType so the
628 * caller can decide whether it needs the reference to be initialized or not. (Can also return
629 * kRegTypeZero if the reference can only be zero at this point.)
630 *
631 * The argument count is in vA, and the first argument is in vC, for both "simple" and "range"
632 * versions. We just need to make sure vA is >= 1 and then return vC.
633 */
634 const RegType& GetInvocationThis(const Instruction::DecodedInstruction& dec_insn);
635
636 /*
637 * Get the value from a register, and cast it to a Class. Sets "*failure" if something fails.
638 * This fails if the register holds an uninitialized class.
639 * If the register holds kRegTypeZero, this returns a NULL pointer.
640 */
641 Class* GetClassFromRegister(uint32_t vsrc) const;
642
643 /*
644 * Verify types for a simple two-register instruction (e.g. "neg-int").
645 * "dst_type" is stored into vA, and "src_type" is verified against vB.
646 */
647 void CheckUnaryOp(const Instruction::DecodedInstruction& dec_insn,
648 const RegType& dst_type, const RegType& src_type);
649
650 /*
651 * Verify types for a simple three-register instruction (e.g. "add-int").
652 * "dst_type" is stored into vA, and "src_type1"/"src_type2" are verified
653 * against vB/vC.
654 */
655 void CheckBinaryOp(const Instruction::DecodedInstruction& dec_insn,
656 const RegType& dst_type, const RegType& src_type1, const RegType& src_type2,
657 bool check_boolean_op);
658
659 /*
660 * Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
661 * are verified against vA/vB, then "dst_type" is stored into vA.
662 */
663 void CheckBinaryOp2addr(const Instruction::DecodedInstruction& dec_insn,
664 const RegType& dst_type,
665 const RegType& src_type1, const RegType& src_type2,
666 bool check_boolean_op);
667
668 /*
669 * Verify types for A two-register instruction with a literal constant (e.g. "add-int/lit8").
670 * "dst_type" is stored into vA, and "src_type" is verified against vB.
671 *
672 * If "check_boolean_op" is set, we use the constant value in vC.
673 */
674 void CheckLiteralOp(const Instruction::DecodedInstruction& dec_insn,
675 const RegType& dst_type, const RegType& src_type, bool check_boolean_op);
676
677 // Verify/push monitor onto the monitor stack, locking the value in reg_idx at location insn_idx.
678 void PushMonitor(uint32_t reg_idx, int32_t insn_idx);
679
680 // Verify/pop monitor from monitor stack ensuring that we believe the monitor is locked
681 void PopMonitor(uint32_t reg_idx);
682
683 // Stack of currently held monitors and where they were locked
684 size_t MonitorStackDepth() const {
685 return monitors_.size();
686 }
687
688 // We expect no monitors to be held at certain points, such a method returns. Verify the stack
689 // is empty, failing and returning false if not.
690 bool VerifyMonitorStackEmpty();
691
692 bool MergeRegisters(const RegisterLine* incoming_line);
693
Ian Rogers84fa0742011-10-25 18:13:30 -0700694 size_t GetMaxNonZeroReferenceReg(size_t max_ref_reg) {
Ian Rogersd81871c2011-10-03 13:57:23 -0700695 size_t i = static_cast<int>(max_ref_reg) < 0 ? 0 : max_ref_reg;
696 for(; i < num_regs_; i++) {
Ian Rogers84fa0742011-10-25 18:13:30 -0700697 if (GetRegisterType(i).IsNonZeroReferenceTypes()) {
Ian Rogersd81871c2011-10-03 13:57:23 -0700698 max_ref_reg = i;
699 }
700 }
701 return max_ref_reg;
702 }
703
704 // Write a bit at each register location that holds a reference
705 void WriteReferenceBitMap(int8_t* data, size_t max_bytes);
706 private:
707
708 void CopyRegToLockDepth(size_t dst, size_t src) {
709 if (reg_to_lock_depths_.count(src) > 0) {
Elliott Hughesfbef9462011-12-14 14:24:40 -0800710 uint32_t depths = reg_to_lock_depths_[src];
Ian Rogersd81871c2011-10-03 13:57:23 -0700711 reg_to_lock_depths_[dst] = depths;
712 }
713 }
714
715 bool IsSetLockDepth(size_t reg, size_t depth) {
716 if (reg_to_lock_depths_.count(reg) > 0) {
Elliott Hughesfbef9462011-12-14 14:24:40 -0800717 uint32_t depths = reg_to_lock_depths_[reg];
Ian Rogersd81871c2011-10-03 13:57:23 -0700718 return (depths & (1 << depth)) != 0;
719 } else {
720 return false;
721 }
722 }
723
724 void SetRegToLockDepth(size_t reg, size_t depth) {
Elliott Hughesfbef9462011-12-14 14:24:40 -0800725 CHECK_LT(depth, 32u);
Ian Rogersd81871c2011-10-03 13:57:23 -0700726 DCHECK(!IsSetLockDepth(reg, depth));
Elliott Hughesfbef9462011-12-14 14:24:40 -0800727 uint32_t depths;
Ian Rogersd81871c2011-10-03 13:57:23 -0700728 if (reg_to_lock_depths_.count(reg) > 0) {
729 depths = reg_to_lock_depths_[reg];
730 depths = depths | (1 << depth);
731 } else {
732 depths = 1 << depth;
733 }
734 reg_to_lock_depths_[reg] = depths;
735 }
736
737 void ClearRegToLockDepth(size_t reg, size_t depth) {
Elliott Hughesfbef9462011-12-14 14:24:40 -0800738 CHECK_LT(depth, 32u);
Ian Rogersd81871c2011-10-03 13:57:23 -0700739 DCHECK(IsSetLockDepth(reg, depth));
Elliott Hughesfbef9462011-12-14 14:24:40 -0800740 uint32_t depths = reg_to_lock_depths_[reg];
Ian Rogersd81871c2011-10-03 13:57:23 -0700741 depths = depths ^ (1 << depth);
742 if (depths != 0) {
743 reg_to_lock_depths_[reg] = depths;
744 } else {
745 reg_to_lock_depths_.erase(reg);
746 }
747 }
748
749 void ClearAllRegToLockDepths(size_t reg) {
750 reg_to_lock_depths_.erase(reg);
751 }
752
753 // Storage for the result register's type, valid after an invocation
754 uint16_t result_[2];
755
756 // An array of RegType Ids associated with each dex register
757 UniquePtr<uint16_t[]> line_;
758
759 // Back link to the verifier
760 DexVerifier* verifier_;
761
762 // Length of reg_types_
763 const size_t num_regs_;
764 // A stack of monitor enter locations
Ian Rogers55d249f2011-11-02 16:48:09 -0700765 std::deque<uint32_t> monitors_;
Ian Rogersd81871c2011-10-03 13:57:23 -0700766 // A map from register to a bit vector of indices into the monitors_ stack. As we pop the monitor
767 // stack we verify that monitor-enter/exit are correctly nested. That is, if there was a
768 // monitor-enter on v5 and then on v6, we expect the monitor-exit to be on v6 then on v5
Elliott Hughesfbef9462011-12-14 14:24:40 -0800769 std::map<uint32_t, uint32_t> reg_to_lock_depths_;
Ian Rogersd81871c2011-10-03 13:57:23 -0700770};
771std::ostream& operator<<(std::ostream& os, const RegisterLine& rhs);
772
773class PcToRegisterLineTable {
774 public:
775 // We don't need to store the register data for many instructions, because we either only need
776 // it at branch points (for verification) or GC points and branches (for verification +
777 // type-precise register analysis).
778 enum RegisterTrackingMode {
779 kTrackRegsBranches,
780 kTrackRegsGcPoints,
781 kTrackRegsAll,
782 };
783 PcToRegisterLineTable() {}
784 ~PcToRegisterLineTable() {
785 STLDeleteValues(&pc_to_register_line_);
786 }
787
788 // Initialize the RegisterTable. Every instruction address can have a different set of information
789 // about what's in which register, but for verification purposes we only need to store it at
790 // branch target addresses (because we merge into that).
791 void Init(RegisterTrackingMode mode, InsnFlags* flags, uint32_t insns_size,
792 uint16_t registers_size, DexVerifier* verifier);
793
794 RegisterLine* GetLine(size_t idx) {
795 return pc_to_register_line_[idx];
796 }
797
798 private:
799 // Map from a dex pc to the register status associated with it
800 std::map<int32_t, RegisterLine*> pc_to_register_line_;
801
802 // Number of registers we track for each instruction. This is equal to the method's declared
803 // "registersSize" plus kExtraRegs (2).
804 size_t insn_reg_count_plus_;
805};
806
807
808
809// The verifier
810class DexVerifier {
811 public:
812 /* Verify a class. Returns "true" on success. */
813 static bool VerifyClass(const Class* klass);
jeffhaobdb76512011-09-07 11:43:16 -0700814 /*
815 * Perform verification on a single method.
816 *
817 * We do this in three passes:
818 * (1) Walk through all code units, determining instruction locations,
819 * widths, and other characteristics.
820 * (2) Walk through all code units, performing static checks on
821 * operands.
822 * (3) Iterate through the method, checking type safety and looking
823 * for code flow problems.
824 *
jeffhaod1f0fde2011-09-08 17:25:33 -0700825 * Some checks may be bypassed depending on the verification mode. We can't
jeffhaobdb76512011-09-07 11:43:16 -0700826 * turn this stuff off completely if we want to do "exact" GC.
827 *
828 * Confirmed here:
829 * - code array must not be empty
830 * Confirmed by ComputeWidthsAndCountOps():
831 * - opcode of first instruction begins at index 0
832 * - only documented instructions may appear
833 * - each instruction follows the last
834 * - last byte of last instruction is at (code_length-1)
835 */
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700836 static bool VerifyMethod(Method* method);
Shih-wei Liao371814f2011-10-27 16:52:10 -0700837 static void VerifyMethodAndDump(Method* method);
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700838
Ian Rogersd81871c2011-10-03 13:57:23 -0700839 uint8_t EncodePcToReferenceMapData() const;
840
841 uint32_t DexFileVersion() const {
842 return dex_file_->GetVersion();
843 }
844
845 RegTypeCache* GetRegTypeCache() {
846 return &reg_types_;
847 }
848
849 // Verification failed
850 std::ostream& Fail(VerifyError error) {
851 CHECK_EQ(failure_, VERIFY_ERROR_NONE);
852 failure_ = error;
853 return fail_messages_ << "VFY: " << PrettyMethod(method_)
854 << '[' << (void*)work_insn_idx_ << "] : ";
855 }
856
857 // Log for verification information
858 std::ostream& LogVerifyInfo() {
859 return info_messages_ << "VFY: " << PrettyMethod(method_)
860 << '[' << (void*)work_insn_idx_ << "] : ";
861 }
862
863 // Dump the state of the verifier, namely each instruction, what flags are set on it, register
864 // information
865 void Dump(std::ostream& os);
866
867 private:
868
869 explicit DexVerifier(Method* method);
870
871 bool Verify();
872
873 /*
874 * Compute the width of the instruction at each address in the instruction stream, and store it in
875 * insn_flags_. Addresses that are in the middle of an instruction, or that are part of switch
876 * table data, are not touched (so the caller should probably initialize "insn_flags" to zero).
877 *
878 * The "new_instance_count_" and "monitor_enter_count_" fields in vdata are also set.
879 *
880 * Performs some static checks, notably:
881 * - opcode of first instruction begins at index 0
882 * - only documented instructions may appear
883 * - each instruction follows the last
884 * - last byte of last instruction is at (code_length-1)
885 *
886 * Logs an error and returns "false" on failure.
887 */
888 bool ComputeWidthsAndCountOps();
889
890 /*
891 * Set the "in try" flags for all instructions protected by "try" statements. Also sets the
892 * "branch target" flags for exception handlers.
893 *
894 * Call this after widths have been set in "insn_flags".
895 *
896 * Returns "false" if something in the exception table looks fishy, but we're expecting the
897 * exception table to be somewhat sane.
898 */
899 bool ScanTryCatchBlocks();
900
jeffhaobdb76512011-09-07 11:43:16 -0700901 /*
902 * Perform static verification on all instructions in a method.
903 *
904 * Walks through instructions in a method calling VerifyInstruction on each.
905 */
Ian Rogersd81871c2011-10-03 13:57:23 -0700906 bool VerifyInstructions();
jeffhaobdb76512011-09-07 11:43:16 -0700907
908 /*
909 * Perform static verification on an instruction.
910 *
911 * As a side effect, this sets the "branch target" flags in InsnFlags.
912 *
913 * "(CF)" items are handled during code-flow analysis.
914 *
915 * v3 4.10.1
916 * - target of each jump and branch instruction must be valid
917 * - targets of switch statements must be valid
918 * - operands referencing constant pool entries must be valid
919 * - (CF) operands of getfield, putfield, getstatic, putstatic must be valid
920 * - (CF) operands of method invocation instructions must be valid
921 * - (CF) only invoke-direct can call a method starting with '<'
922 * - (CF) <clinit> must never be called explicitly
923 * - operands of instanceof, checkcast, new (and variants) must be valid
924 * - new-array[-type] limited to 255 dimensions
925 * - can't use "new" on an array class
926 * - (?) limit dimensions in multi-array creation
927 * - local variable load/store register values must be in valid range
928 *
929 * v3 4.11.1.2
930 * - branches must be within the bounds of the code array
931 * - targets of all control-flow instructions are the start of an instruction
932 * - register accesses fall within range of allocated registers
933 * - (N/A) access to constant pool must be of appropriate type
934 * - code does not end in the middle of an instruction
935 * - execution cannot fall off the end of the code
936 * - (earlier) for each exception handler, the "try" area must begin and
937 * end at the start of an instruction (end can be at the end of the code)
938 * - (earlier) for each exception handler, the handler must start at a valid
939 * instruction
940 */
Ian Rogersd81871c2011-10-03 13:57:23 -0700941 bool VerifyInstruction(const Instruction* inst, uint32_t code_offset);
jeffhaobdb76512011-09-07 11:43:16 -0700942
943 /* Ensure that the register index is valid for this code item. */
Ian Rogersd81871c2011-10-03 13:57:23 -0700944 bool CheckRegisterIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700945
946 /* Ensure that the wide register index is valid for this code item. */
Ian Rogersd81871c2011-10-03 13:57:23 -0700947 bool CheckWideRegisterIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700948
Ian Rogersd81871c2011-10-03 13:57:23 -0700949 // Perform static checks on a field get or set instruction. All we do here is ensure that the
950 // field index is in the valid range.
951 bool CheckFieldIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700952
Ian Rogersd81871c2011-10-03 13:57:23 -0700953 // Perform static checks on a method invocation instruction. All we do here is ensure that the
954 // method index is in the valid range.
955 bool CheckMethodIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700956
Ian Rogersd81871c2011-10-03 13:57:23 -0700957 // Perform static checks on a "new-instance" instruction. Specifically, make sure the class
958 // reference isn't for an array class.
959 bool CheckNewInstance(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700960
Ian Rogersd81871c2011-10-03 13:57:23 -0700961 /* Ensure that the string index is in the valid range. */
962 bool CheckStringIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700963
Ian Rogersd81871c2011-10-03 13:57:23 -0700964 // Perform static checks on an instruction that takes a class constant. Ensure that the class
965 // index is in the valid range.
966 bool CheckTypeIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700967
Ian Rogersd81871c2011-10-03 13:57:23 -0700968 // Perform static checks on a "new-array" instruction. Specifically, make sure they aren't
969 // creating an array of arrays that causes the number of dimensions to exceed 255.
970 bool CheckNewArray(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700971
Ian Rogersd81871c2011-10-03 13:57:23 -0700972 // Verify an array data table. "cur_offset" is the offset of the fill-array-data instruction.
973 bool CheckArrayData(uint32_t cur_offset);
jeffhaobdb76512011-09-07 11:43:16 -0700974
Ian Rogersd81871c2011-10-03 13:57:23 -0700975 // Verify that the target of a branch instruction is valid. We don't expect code to jump directly
976 // into an exception handler, but it's valid to do so as long as the target isn't a
977 // "move-exception" instruction. We verify that in a later stage.
978 // The dex format forbids certain instructions from branching to themselves.
979 // Updates "insnFlags", setting the "branch target" flag.
980 bool CheckBranchTarget(uint32_t cur_offset);
jeffhaobdb76512011-09-07 11:43:16 -0700981
Ian Rogersd81871c2011-10-03 13:57:23 -0700982 // Verify a switch table. "cur_offset" is the offset of the switch instruction.
983 // Updates "insnFlags", setting the "branch target" flag.
984 bool CheckSwitchTargets(uint32_t cur_offset);
jeffhaobdb76512011-09-07 11:43:16 -0700985
Ian Rogersd81871c2011-10-03 13:57:23 -0700986 // Check the register indices used in a "vararg" instruction, such as invoke-virtual or
987 // filled-new-array.
988 // - vA holds word count (0-5), args[] have values.
989 // There are some tests we don't do here, e.g. we don't try to verify that invoking a method that
990 // takes a double is done with consecutive registers. This requires parsing the target method
991 // signature, which we will be doing later on during the code flow analysis.
992 bool CheckVarArgRegs(uint32_t vA, uint32_t arg[]);
jeffhaobdb76512011-09-07 11:43:16 -0700993
Ian Rogersd81871c2011-10-03 13:57:23 -0700994 // Check the register indices used in a "vararg/range" instruction, such as invoke-virtual/range
995 // or filled-new-array/range.
996 // - vA holds word count, vC holds index of first reg.
997 bool CheckVarArgRangeRegs(uint32_t vA, uint32_t vC);
jeffhaobdb76512011-09-07 11:43:16 -0700998
Ian Rogersd81871c2011-10-03 13:57:23 -0700999 // Extract the relative offset from a branch instruction.
1000 // Returns "false" on failure (e.g. this isn't a branch instruction).
1001 bool GetBranchOffset(uint32_t cur_offset, int32_t* pOffset, bool* pConditional,
1002 bool* selfOkay);
jeffhaobdb76512011-09-07 11:43:16 -07001003
Ian Rogersd81871c2011-10-03 13:57:23 -07001004 /* Perform detailed code-flow analysis on a single method. */
1005 bool VerifyCodeFlow();
jeffhaobdb76512011-09-07 11:43:16 -07001006
Ian Rogersd81871c2011-10-03 13:57:23 -07001007 // Set the register types for the first instruction in the method based on the method signature.
1008 // This has the side-effect of validating the signature.
1009 bool SetTypesFromSignature();
jeffhaobdb76512011-09-07 11:43:16 -07001010
1011 /*
1012 * Perform code flow on a method.
1013 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001014 * The basic strategy is as outlined in v3 4.11.1.2: set the "changed" bit on the first
1015 * instruction, process it (setting additional "changed" bits), and repeat until there are no
1016 * more.
jeffhaobdb76512011-09-07 11:43:16 -07001017 *
1018 * v3 4.11.1.1
1019 * - (N/A) operand stack is always the same size
1020 * - operand stack [registers] contain the correct types of values
1021 * - local variables [registers] contain the correct types of values
1022 * - methods are invoked with the appropriate arguments
1023 * - fields are assigned using values of appropriate types
1024 * - opcodes have the correct type values in operand registers
Ian Rogersd81871c2011-10-03 13:57:23 -07001025 * - there is never an uninitialized class instance in a local variable in code protected by an
1026 * exception handler (operand stack is okay, because the operand stack is discarded when an
1027 * exception is thrown) [can't know what's a local var w/o the debug info -- should fall out of
jeffhaobdb76512011-09-07 11:43:16 -07001028 * register typing]
1029 *
1030 * v3 4.11.1.2
1031 * - execution cannot fall off the end of the code
1032 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001033 * (We also do many of the items described in the "static checks" sections, because it's easier to
1034 * do them here.)
jeffhaobdb76512011-09-07 11:43:16 -07001035 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001036 * We need an array of RegType values, one per register, for every instruction. If the method uses
1037 * monitor-enter, we need extra data for every register, and a stack for every "interesting"
1038 * instruction. In theory this could become quite large -- up to several megabytes for a monster
1039 * function.
jeffhaobdb76512011-09-07 11:43:16 -07001040 *
1041 * NOTE:
Ian Rogersd81871c2011-10-03 13:57:23 -07001042 * The spec forbids backward branches when there's an uninitialized reference in a register. The
1043 * idea is to prevent something like this:
jeffhaobdb76512011-09-07 11:43:16 -07001044 * loop:
1045 * move r1, r0
1046 * new-instance r0, MyClass
1047 * ...
1048 * if-eq rN, loop // once
1049 * initialize r0
1050 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001051 * This leaves us with two different instances, both allocated by the same instruction, but only
1052 * one is initialized. The scheme outlined in v3 4.11.1.4 wouldn't catch this, so they work around
1053 * it by preventing backward branches. We achieve identical results without restricting code
1054 * reordering by specifying that you can't execute the new-instance instruction if a register
1055 * contains an uninitialized instance created by that same instruction.
jeffhaobdb76512011-09-07 11:43:16 -07001056 */
Ian Rogersd81871c2011-10-03 13:57:23 -07001057 bool CodeFlowVerifyMethod();
jeffhaobdb76512011-09-07 11:43:16 -07001058
1059 /*
1060 * Perform verification for a single instruction.
1061 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001062 * This requires fully decoding the instruction to determine the effect it has on registers.
jeffhaobdb76512011-09-07 11:43:16 -07001063 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001064 * Finds zero or more following instructions and sets the "changed" flag if execution at that
1065 * point needs to be (re-)evaluated. Register changes are merged into "reg_types_" at the target
1066 * addresses. Does not set or clear any other flags in "insn_flags_".
jeffhaobdb76512011-09-07 11:43:16 -07001067 */
Ian Rogersd81871c2011-10-03 13:57:23 -07001068 bool CodeFlowVerifyInstruction(uint32_t* start_guess);
1069
1070 // Perform verification of an aget instruction. The destination register's type will be set to
1071 // be that of component type of the array unless the array type is unknown, in which case a
1072 // bottom type inferred from the type of instruction is used. is_primitive is false for an
1073 // aget-object.
1074 void VerifyAGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1075 bool is_primitive);
1076
1077 // Perform verification of an aput instruction.
1078 void VerifyAPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1079 bool is_primitive);
1080
1081 // Lookup instance field and fail for resolution violations
1082 Field* GetInstanceField(const RegType& obj_type, int field_idx);
1083
Ian Rogersd81871c2011-10-03 13:57:23 -07001084 // Lookup static field and fail for resolution violations
1085 Field* GetStaticField(int field_idx);
1086
Ian Rogersb94a27b2011-10-26 00:33:41 -07001087 // Perform verification of an iget or sget instruction.
1088 void VerifyISGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1089 bool is_primitive, bool is_static);
Ian Rogersd81871c2011-10-03 13:57:23 -07001090
Ian Rogersb94a27b2011-10-26 00:33:41 -07001091 // Perform verification of an iput or sput instruction.
1092 void VerifyISPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1093 bool is_primitive, bool is_static);
Ian Rogersd81871c2011-10-03 13:57:23 -07001094
1095 // Verify that the arguments in a filled-new-array instruction are valid.
Ian Rogers28ad40d2011-10-27 15:19:26 -07001096 void VerifyFilledNewArrayRegs(const Instruction::DecodedInstruction& dec_insn,
1097 const RegType& res_type, bool is_range);
jeffhaobdb76512011-09-07 11:43:16 -07001098
Ian Rogers28ad40d2011-10-27 15:19:26 -07001099 // Resolves a class based on an index and performs access checks to ensure the referrer can
1100 // access the resolved class.
1101 const RegType& ResolveClassAndCheckAccess(uint32_t class_idx);
Ian Rogersd81871c2011-10-03 13:57:23 -07001102
1103 /*
1104 * For the "move-exception" instruction at "work_insn_idx_", which must be at an exception handler
1105 * address, determine the first common superclass of all exceptions that can land here.
1106 * Returns NULL if no matching exception handler can be found, or if the exception is not a
1107 * subclass of Throwable.
1108 */
Ian Rogers28ad40d2011-10-27 15:19:26 -07001109 const RegType& GetCaughtExceptionType();
jeffhao98eacac2011-09-14 16:11:53 -07001110
1111 /*
jeffhaob4df5142011-09-19 20:25:32 -07001112 * Resolves a method based on an index and performs access checks to ensure
1113 * the referrer can access the resolved method.
jeffhaob4df5142011-09-19 20:25:32 -07001114 * Does not throw exceptions.
jeffhaob4df5142011-09-19 20:25:32 -07001115 */
Ian Rogersd81871c2011-10-03 13:57:23 -07001116 Method* ResolveMethodAndCheckAccess(uint32_t method_idx, bool is_direct);
jeffhaobdb76512011-09-07 11:43:16 -07001117
1118 /*
jeffhaod1f0fde2011-09-08 17:25:33 -07001119 * Verify the arguments to a method. We're executing in "method", making
jeffhaobdb76512011-09-07 11:43:16 -07001120 * a call to the method reference in vB.
1121 *
jeffhaod1f0fde2011-09-08 17:25:33 -07001122 * If this is a "direct" invoke, we allow calls to <init>. For calls to
1123 * <init>, the first argument may be an uninitialized reference. Otherwise,
jeffhaobdb76512011-09-07 11:43:16 -07001124 * calls to anything starting with '<' will be rejected, as will any
1125 * uninitialized reference arguments.
1126 *
1127 * For non-static method calls, this will verify that the method call is
1128 * appropriate for the "this" argument.
1129 *
jeffhaod1f0fde2011-09-08 17:25:33 -07001130 * The method reference is in vBBBB. The "is_range" parameter determines
jeffhaobdb76512011-09-07 11:43:16 -07001131 * whether we use 0-4 "args" values or a range of registers defined by
1132 * vAA and vCCCC.
1133 *
1134 * Widening conversions on integers and references are allowed, but
1135 * narrowing conversions are not.
1136 *
1137 * Returns the resolved method on success, NULL on failure (with *failure
1138 * set appropriately).
1139 */
Ian Rogersd81871c2011-10-03 13:57:23 -07001140 Method* VerifyInvocationArgs(const Instruction::DecodedInstruction& dec_insn,
1141 MethodType method_type, bool is_range, bool is_super);
jeffhaobdb76512011-09-07 11:43:16 -07001142
Ian Rogersd81871c2011-10-03 13:57:23 -07001143 /*
1144 * Return the register type for the method. We can't just use the already-computed
1145 * DalvikJniReturnType, because if it's a reference type we need to do the class lookup.
1146 * Returned references are assumed to be initialized. Returns kRegTypeUnknown for "void".
1147 */
Ian Rogers6d4d9fc2011-11-30 16:24:48 -08001148 const RegType& GetMethodReturnType();
jeffhaoe0cfb6f2011-09-22 16:42:56 -07001149
Ian Rogersd81871c2011-10-03 13:57:23 -07001150 /*
1151 * Verify that the target instruction is not "move-exception". It's important that the only way
1152 * to execute a move-exception is as the first instruction of an exception handler.
1153 * Returns "true" if all is well, "false" if the target instruction is move-exception.
1154 */
1155 bool CheckMoveException(const uint16_t* insns, int insn_idx);
1156
1157 /*
1158 * Replace an instruction with "throw-verification-error". This allows us to
1159 * defer error reporting until the code path is first used.
1160 */
1161 void ReplaceFailingInstruction();
1162
1163 /*
1164 * Control can transfer to "next_insn". Merge the registers from merge_line into the table at
1165 * next_insn, and set the changed flag on the target address if any of the registers were changed.
1166 * Returns "false" if an error is encountered.
1167 */
1168 bool UpdateRegisters(uint32_t next_insn, const RegisterLine* merge_line);
1169
1170 /*
1171 * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
1172 * verification). For type-precise determination we have all the data we need, so we just need to
1173 * encode it in some clever fashion.
1174 * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
1175 */
1176 ByteArray* GenerateGcMap();
1177
1178 // Verify that the GC map associated with method_ is well formed
1179 void VerifyGcMap();
1180
1181 // Compute sizes for GC map data
1182 void ComputeGcMapSizes(size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
1183
Ian Rogersd81871c2011-10-03 13:57:23 -07001184 InsnFlags CurrentInsnFlags() {
1185 return insn_flags_[work_insn_idx_];
1186 }
1187
1188 RegTypeCache reg_types_;
1189
1190 PcToRegisterLineTable reg_table_;
1191
1192 // Storage for the register status we're currently working on.
1193 UniquePtr<RegisterLine> work_line_;
1194
Ian Rogersd81871c2011-10-03 13:57:23 -07001195 // The address of the instruction we're currently working on, note that this is in 2 byte
1196 // quantities
1197 uint32_t work_insn_idx_;
1198
1199 // Storage for the register status we're saving for later.
1200 UniquePtr<RegisterLine> saved_line_;
1201
1202 Method* method_; // The method we're working on.
1203 const DexFile* dex_file_; // The dex file containing the method.
1204 const DexFile::CodeItem* code_item_; // The code item containing the code for the method.
1205 UniquePtr<InsnFlags[]> insn_flags_; // Instruction widths and flags, one entry per code unit.
1206
1207 // The type of any error that occurs
1208 VerifyError failure_;
1209
1210 // Failure message log
1211 std::ostringstream fail_messages_;
1212 // Info message log
1213 std::ostringstream info_messages_;
1214
1215 // The number of occurrences of specific opcodes.
1216 size_t new_instance_count_;
1217 size_t monitor_enter_count_;
Carl Shapiro0e5d75d2011-07-06 18:28:37 -07001218};
1219
Ian Rogersd81871c2011-10-03 13:57:23 -07001220// Lightweight wrapper for PC to reference bit maps.
1221class PcToReferenceMap {
1222 public:
1223 PcToReferenceMap(Method* m) {
1224 data_ = down_cast<ByteArray*>(m->GetGcMap());
Ian Rogersb861dc02011-11-14 17:00:05 -08001225 CHECK(data_ != NULL) << PrettyMethod(m);
Ian Rogersd81871c2011-10-03 13:57:23 -07001226 // Check the size of the table agrees with the number of entries
1227 size_t data_size = data_->GetLength() - 4;
1228 DCHECK_EQ(EntryWidth() * NumEntries(), data_size);
1229 }
1230
1231 // The number of entries in the table
1232 size_t NumEntries() const {
1233 return GetData()[2] | (GetData()[3] << 8);
1234 }
1235
1236 // Get the PC at the given index
1237 uint16_t GetPC(size_t index) const {
1238 size_t entry_offset = index * EntryWidth();
1239 if (PcWidth() == 1) {
1240 return Table()[entry_offset];
1241 } else {
1242 return Table()[entry_offset] | (Table()[entry_offset + 1] << 8);
1243 }
1244 }
1245
1246 // Return address of bitmap encoding what are live references
1247 const uint8_t* GetBitMap(size_t index) const {
1248 size_t entry_offset = index * EntryWidth();
1249 return &Table()[entry_offset + PcWidth()];
1250 }
1251
1252 // Find the bitmap associated with the given dex pc
1253 const uint8_t* FindBitMap(uint16_t dex_pc, bool error_if_not_present = true) const;
1254
1255 // The number of bytes used to encode registers
1256 size_t RegWidth() const {
1257 return GetData()[1];
1258 }
1259
1260 private:
1261 // Table of num_entries * (dex pc, bitmap)
1262 const uint8_t* Table() const {
1263 return GetData() + 4;
1264 }
1265
1266 // The format of the table of the PCs for the table
1267 RegisterMapFormat Format() const {
1268 return static_cast<RegisterMapFormat>(GetData()[0]);
1269 }
1270
1271 // Number of bytes used to encode a dex pc
1272 size_t PcWidth() const {
1273 RegisterMapFormat format = Format();
1274 switch (format) {
1275 case kRegMapFormatCompact8:
1276 return 1;
1277 case kRegMapFormatCompact16:
1278 return 2;
1279 default:
1280 LOG(FATAL) << "Invalid format " << static_cast<int>(format);
1281 return -1;
1282 }
1283 }
1284
1285 // The width of an entry in the table
1286 size_t EntryWidth() const {
1287 return PcWidth() + RegWidth();
1288 }
1289
1290 const uint8_t* GetData() const {
1291 return reinterpret_cast<uint8_t*>(data_->GetData());
1292 }
1293 ByteArray* data_; // The header and table data
1294};
1295
1296} // namespace verifier
Carl Shapiro0e5d75d2011-07-06 18:28:37 -07001297} // namespace art
1298
1299#endif // ART_SRC_DEX_VERIFY_H_