blob: 3a614d5ea5cca65a243f6c1dcd04bc0be732290f [file] [log] [blame]
Carl Shapiro0e5d75d2011-07-06 18:28:37 -07001// Copyright 2011 Google Inc. All Rights Reserved.
2
3#ifndef ART_SRC_DEX_VERIFY_H_
4#define ART_SRC_DEX_VERIFY_H_
5
Ian Rogersd81871c2011-10-03 13:57:23 -07006#include "casts.h"
Elliott Hughes90a33692011-08-30 13:27:07 -07007#include "dex_file.h"
8#include "dex_instruction.h"
Brian Carlstrom578bbdc2011-07-21 14:07:47 -07009#include "macros.h"
10#include "object.h"
Ian Rogersd81871c2011-10-03 13:57:23 -070011#include "stl_util.h"
Elliott Hughes5fe594f2011-09-08 12:33:17 -070012#include "UniquePtr.h"
Carl Shapiro0e5d75d2011-07-06 18:28:37 -070013
Ian Rogersd81871c2011-10-03 13:57:23 -070014#include <map>
15#include <stack>
16#include <vector>
Carl Shapiro0e5d75d2011-07-06 18:28:37 -070017
Ian Rogersd81871c2011-10-03 13:57:23 -070018namespace art {
19namespace verifier {
20
21class DexVerifier;
22class PcToReferenceMap;
23class RegTypeCache;
jeffhaobdb76512011-09-07 11:43:16 -070024
25/*
Ian Rogersd81871c2011-10-03 13:57:23 -070026 * Set this to enable dead code scanning. This is not required, but it's very useful when testing
27 * changes to the verifier (to make sure we're not skipping over stuff). The only reason not to do
28 * it is that it slightly increases the time required to perform verification.
jeffhaobdb76512011-09-07 11:43:16 -070029 */
30#ifndef NDEBUG
31# define DEAD_CODE_SCAN true
32#else
33# define DEAD_CODE_SCAN false
34#endif
35
36/*
Ian Rogersd81871c2011-10-03 13:57:23 -070037 * RegType holds information about the type of data held in a register. For most types it's a simple
38 * enum. For reference types it holds a pointer to the ClassObject, and for uninitialized references
39 * it holds an index into the UninitInstanceMap.
jeffhaobdb76512011-09-07 11:43:16 -070040 */
Ian Rogersd81871c2011-10-03 13:57:23 -070041class RegType {
Carl Shapiro0e5d75d2011-07-06 18:28:37 -070042 public:
jeffhaobdb76512011-09-07 11:43:16 -070043 /*
Ian Rogersd81871c2011-10-03 13:57:23 -070044 * Enumeration for register type values. The "hi" piece of a 64-bit value MUST immediately follow
45 * the "lo" piece in the enumeration, so we can check that hi==lo+1.
jeffhaobdb76512011-09-07 11:43:16 -070046 *
47 * Assignment of constants:
48 * [-MAXINT,-32768) : integer
49 * [-32768,-128) : short
50 * [-128,0) : byte
51 * 0 : zero
52 * 1 : one
53 * [2,128) : posbyte
54 * [128,32768) : posshort
55 * [32768,65536) : char
56 * [65536,MAXINT] : integer
57 *
58 * Allowed "implicit" widening conversions:
59 * zero -> boolean, posbyte, byte, posshort, short, char, integer, ref (null)
60 * one -> boolean, posbyte, byte, posshort, short, char, integer
61 * boolean -> posbyte, byte, posshort, short, char, integer
62 * posbyte -> posshort, short, integer, char
63 * byte -> short, integer
64 * posshort -> integer, char
65 * short -> integer
66 * char -> integer
67 *
68 * In addition, all of the above can convert to "float".
69 *
Ian Rogersd81871c2011-10-03 13:57:23 -070070 * We're more careful with integer values than the spec requires. The motivation is to restrict
71 * byte/char/short to the correct range of values. For example, if a method takes a byte argument,
72 * we don't want to allow the code to load the constant "1024" and pass it in.
jeffhaobdb76512011-09-07 11:43:16 -070073 */
Ian Rogersd81871c2011-10-03 13:57:23 -070074 enum Type {
75 kRegTypeUnknown = 0, /* initial state */
jeffhaobdb76512011-09-07 11:43:16 -070076 kRegTypeConflict, /* merge clash makes this reg's type unknowable */
77
78 /*
Ian Rogersd81871c2011-10-03 13:57:23 -070079 * Category-1nr types. The order of these is chiseled into a couple of tables, so don't add,
80 * remove, or reorder if you can avoid it.
jeffhaobdb76512011-09-07 11:43:16 -070081 */
Ian Rogersd81871c2011-10-03 13:57:23 -070082 kRegTypeZero, /* 0 - 32-bit 0, could be Boolean, Int, Float, or Ref */
83 kRegType1nrSTART = kRegTypeZero,
84 kRegTypeIntegralSTART = kRegTypeZero,
85 kRegTypeOne, /* 1 - 32-bit 1, could be Boolean, Int, Float */
86 kRegTypeBoolean, /* Z - must be 0 or 1 */
87 kRegTypeConstPosByte, /* y - const derived byte, known positive */
88 kRegTypeConstByte, /* Y - const derived byte */
89 kRegTypeConstPosShort, /* h - const derived short, known positive */
90 kRegTypeConstShort, /* H - const derived short */
91 kRegTypeConstChar, /* c - const derived char */
92 kRegTypeConstInteger, /* i - const derived integer */
93 kRegTypePosByte, /* b - byte, known positive (can become char) */
94 kRegTypeByte, /* B */
95 kRegTypePosShort, /* s - short, known positive (can become char) */
96 kRegTypeShort, /* S */
97 kRegTypeChar, /* C */
98 kRegTypeInteger, /* I */
99 kRegTypeIntegralEND = kRegTypeInteger,
100 kRegTypeFloat, /* F */
101 kRegType1nrEND = kRegTypeFloat,
102 kRegTypeConstLo, /* const derived wide, lower half - could be long or double */
103 kRegTypeConstHi, /* const derived wide, upper half - could be long or double */
jeffhaobdb76512011-09-07 11:43:16 -0700104 kRegTypeLongLo, /* lower-numbered register; endian-independent */
105 kRegTypeLongHi,
106 kRegTypeDoubleLo,
107 kRegTypeDoubleHi,
Ian Rogersd81871c2011-10-03 13:57:23 -0700108 kRegTypeReference, // Reference type
109 kRegTypeMAX = kRegTypeReference + 1,
jeffhaobdb76512011-09-07 11:43:16 -0700110 };
111
Ian Rogersd81871c2011-10-03 13:57:23 -0700112 bool IsUninitializedThisReference() const {
113 return allocation_pc_ == kUninitThisArgAddr;
jeffhaobdb76512011-09-07 11:43:16 -0700114 }
115
Ian Rogersd81871c2011-10-03 13:57:23 -0700116 Type GetType() const {
117 return type_;
jeffhaobdb76512011-09-07 11:43:16 -0700118 }
119
Ian Rogers2c8a8572011-10-24 17:11:36 -0700120 std::string Dump() const;
Ian Rogersd81871c2011-10-03 13:57:23 -0700121
122 Class* GetClass() const {
123 DCHECK(klass_ != NULL);
124 return klass_;
jeffhaobdb76512011-09-07 11:43:16 -0700125 }
126
Ian Rogersd81871c2011-10-03 13:57:23 -0700127 bool IsInitialized() const { return allocation_pc_ == kInitArgAddr; }
128 bool IsUninitializedReference() const { return allocation_pc_ != kInitArgAddr; }
129
130 bool IsUnknown() const { return type_ == kRegTypeUnknown; }
131 bool IsConflict() const { return type_ == kRegTypeConflict; }
132 bool IsZero() const { return type_ == kRegTypeZero; }
133 bool IsOne() const { return type_ == kRegTypeOne; }
134 bool IsConstLo() const { return type_ == kRegTypeConstLo; }
135 bool IsBoolean() const { return type_ == kRegTypeBoolean; }
136 bool IsByte() const { return type_ == kRegTypeByte; }
137 bool IsChar() const { return type_ == kRegTypeChar; }
138 bool IsShort() const { return type_ == kRegTypeShort; }
139 bool IsInteger() const { return type_ == kRegTypeInteger; }
140 bool IsLong() const { return type_ == kRegTypeLongLo; }
141 bool IsFloat() const { return type_ == kRegTypeFloat; }
142 bool IsDouble() const { return type_ == kRegTypeDoubleLo; }
143 bool IsReference() const { return type_ == kRegTypeReference; }
144
145 bool IsLowHalf() const { return type_ == kRegTypeLongLo ||
146 type_ == kRegTypeDoubleLo ||
147 type_ == kRegTypeConstLo; }
148 bool IsHighHalf() const { return type_ == kRegTypeLongHi ||
149 type_ == kRegTypeDoubleHi ||
150 type_ == kRegTypeConstHi; }
151
152 const RegType& HighHalf(RegTypeCache* cache) const;
153
154 bool CheckWidePair(const RegType& type_h) const {
155 return IsLowHalf() && (type_h.type_ == type_ + 1);
jeffhaobdb76512011-09-07 11:43:16 -0700156 }
157
Ian Rogersd81871c2011-10-03 13:57:23 -0700158 uint16_t GetId() const {
159 return cache_id_;
jeffhaobdb76512011-09-07 11:43:16 -0700160 }
161
Ian Rogersd81871c2011-10-03 13:57:23 -0700162 bool IsLongOrDoubleTypes() const { return IsLowHalf(); }
163
164 bool IsReferenceTypes() const {
165 return type_ == kRegTypeReference || type_ == kRegTypeZero;
jeffhaobdb76512011-09-07 11:43:16 -0700166 }
167
Ian Rogersd81871c2011-10-03 13:57:23 -0700168 bool IsCategory1Types() const {
169 return type_ >= kRegType1nrSTART && type_ <= kRegType1nrEND;
jeffhaobdb76512011-09-07 11:43:16 -0700170 }
171
Ian Rogersd81871c2011-10-03 13:57:23 -0700172 bool IsCategory2Types() const {
173 return IsLowHalf(); // Don't expect explicit testing of high halves
jeffhaobdb76512011-09-07 11:43:16 -0700174 }
175
Ian Rogersd81871c2011-10-03 13:57:23 -0700176 bool IsBooleanTypes() const { return IsBoolean() || IsZero() || IsOne(); }
177
178 bool IsByteTypes() const {
179 return IsByte() || IsBooleanTypes() || type_ == kRegTypeConstPosByte ||
180 type_ == kRegTypeConstByte || type_ == kRegTypePosByte;
jeffhaobdb76512011-09-07 11:43:16 -0700181 }
182
Ian Rogersd81871c2011-10-03 13:57:23 -0700183 bool IsShortTypes() const {
184 return IsShort() || IsByteTypes() || type_ == kRegTypeConstPosShort ||
185 type_ == kRegTypeConstShort || type_ == kRegTypePosShort;
jeffhaobdb76512011-09-07 11:43:16 -0700186 }
187
Ian Rogersd81871c2011-10-03 13:57:23 -0700188 bool IsCharTypes() const {
189 return IsChar() || IsBooleanTypes() || type_ == kRegTypeConstPosByte ||
190 type_ == kRegTypePosByte || type_ == kRegTypeConstPosShort || type_ == kRegTypePosShort ||
191 type_ == kRegTypeConstChar;
192 }
193
194 bool IsIntegralTypes() const {
195 return type_ >= kRegTypeIntegralSTART && type_ <= kRegTypeIntegralEND;
196 }
197
198 bool IsArrayIndexTypes() const {
199 return IsIntegralTypes();
200 }
201
202 // Float type may be derived from any constant type
203 bool IsFloatTypes() const {
204 return IsFloat() || IsZero() || IsOne() ||
205 type_ == kRegTypeConstPosByte || type_ == kRegTypeConstByte ||
206 type_ == kRegTypeConstPosShort || type_ == kRegTypeConstShort ||
207 type_ == kRegTypeConstChar || type_ == kRegTypeConstInteger;
208 }
209
210 bool IsLongTypes() const {
211 return IsLong() || type_ == kRegTypeConstLo;
212 }
213
214 bool IsDoubleTypes() const {
215 return IsDouble() || type_ == kRegTypeConstLo;
216 }
217
218 const RegType& VerifyAgainst(const RegType& check_type, RegTypeCache* reg_types) const;
219
220 const RegType& Merge(const RegType& incoming_type, RegTypeCache* reg_types) const;
221
222 bool Equals(const RegType& other) const {
223 return type_ == other.type_ && klass_ == other.klass_ && allocation_pc_ == other.allocation_pc_;
jeffhaobdb76512011-09-07 11:43:16 -0700224 }
225
226 /*
Ian Rogersd81871c2011-10-03 13:57:23 -0700227 * A basic Join operation on classes. For a pair of types S and T the Join, written S v T = J, is
228 * S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is J is the parent of
229 * S and T such that there isn't a parent of both S and T that isn't also the parent of J (ie J
230 * is the deepest (lowest upper bound) parent of S and T).
231 *
232 * This operation applies for regular classes and arrays, however, for interface types there needn't
233 * be a partial ordering on the types. We could solve the problem of a lack of a partial order by
234 * introducing sets of types, however, the only operation permissible on an interface is
235 * invoke-interface. In the tradition of Java verifiers [1] we defer the verification of interface
236 * types until an invoke-interface call on the interface typed reference at runtime and allow
237 * the perversion of Object being assignable to an interface type (note, however, that we don't
238 * allow assignment of Object or Interface to any concrete class and are therefore type safe).
239 *
240 * [1] Java bytecode verifcation: algorithms and formalizations, Xavier Leroy
jeffhaobdb76512011-09-07 11:43:16 -0700241 */
Ian Rogersd81871c2011-10-03 13:57:23 -0700242 static Class* ClassJoin(Class* s, Class* t);
jeffhaoe23d93c2011-09-15 14:48:43 -0700243
244 private:
Ian Rogersd81871c2011-10-03 13:57:23 -0700245 friend class RegTypeCache;
246
247 // Address given to an allocation_pc for an initialized object.
248 static const uint32_t kInitArgAddr = -2;
249
250 // Address given to an uninitialized allocation_pc if an object is uninitialized through being
251 // a constructor.
252 static const uint32_t kUninitThisArgAddr = -1;
253
254 RegType(Type type, Class* klass, uint32_t allocation_pc, uint16_t cache_id) :
255 type_(type), klass_(klass), allocation_pc_(allocation_pc), cache_id_(cache_id) {
256 DCHECK(type >= kRegTypeReference || allocation_pc_ == kInitArgAddr);
Ian Rogers2c8a8572011-10-24 17:11:36 -0700257 if (type >= kRegTypeReference) DCHECK(klass != NULL);
Ian Rogersd81871c2011-10-03 13:57:23 -0700258 }
259
260 const Type type_; // The current type of the register
261
262 // If known the type of the register
263 Class* klass_;
264
265 // Address an uninitialized reference was created
266 const uint32_t allocation_pc_;
267
268 // A RegType cache densely encodes types, this is the location in the cache for this type
269 const uint16_t cache_id_;
270
271 /*
272 * Merge result table for primitive values. The table is symmetric along the diagonal.
273 *
274 * Note that 32-bit int/float do not merge into 64-bit long/double. This is a register merge, not
275 * a widening conversion. Only the "implicit" widening within a category, e.g. byte to short, is
276 * allowed.
277 *
278 * Dalvik does not draw a distinction between int and float, but we enforce that once a value is
279 * used as int, it can't be used as float, and vice-versa. We do not allow free exchange between
280 * 32-bit int/float and 64-bit long/double.
281 *
282 * Note that Uninit + Uninit = Uninit. This holds true because we only use this when the RegType
283 * value is exactly equal to kRegTypeUninit, which can only happen for the zeroth entry in the
284 * table.
285 *
286 * "Unknown" never merges with anything known. The only time a register transitions from "unknown"
287 * to "known" is when we're executing code for the first time, and we handle that with a simple
288 * copy.
289 */
290 static const RegType::Type merge_table_[kRegTypeReference][kRegTypeReference];
291
292 DISALLOW_COPY_AND_ASSIGN(RegType);
293};
294std::ostream& operator<<(std::ostream& os, const RegType& rhs);
295
296class RegTypeCache {
297 public:
298 explicit RegTypeCache() : entries_(RegType::kRegTypeReference) {
299 Unknown(); // ensure Unknown is initialized
300 }
301 ~RegTypeCache() {
302 STLDeleteElements(&entries_);
303 }
304
305 const RegType& GetFromId(uint16_t id) {
306 DCHECK_LT(id, entries_.size());
307 RegType* result = entries_[id];
308 DCHECK(result != NULL);
309 return *result;
310 }
311
312 const RegType& From(RegType::Type type, const ClassLoader* loader, const std::string& descriptor);
313 const RegType& FromClass(Class* klass);
314 const RegType& FromCat1Const(int32_t value);
315 const RegType& FromDescriptor(const ClassLoader* loader, const std::string& descriptor);
316 const RegType& FromType(RegType::Type);
317
318 const RegType& Boolean() { return FromType(RegType::kRegTypeBoolean); }
319 const RegType& Byte() { return FromType(RegType::kRegTypeByte); }
320 const RegType& Char() { return FromType(RegType::kRegTypeChar); }
321 const RegType& Short() { return FromType(RegType::kRegTypeShort); }
322 const RegType& Integer() { return FromType(RegType::kRegTypeInteger); }
323 const RegType& Float() { return FromType(RegType::kRegTypeFloat); }
324 const RegType& Long() { return FromType(RegType::kRegTypeLongLo); }
325 const RegType& Double() { return FromType(RegType::kRegTypeDoubleLo); }
326
327 const RegType& JavaLangClass() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Class;"); }
328 const RegType& JavaLangObject() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Object;"); }
329 const RegType& JavaLangString() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/String;"); }
330
331 const RegType& Unknown() { return FromType(RegType::kRegTypeUnknown); }
332 const RegType& Conflict() { return FromType(RegType::kRegTypeConflict); }
333 const RegType& Zero() { return FromType(RegType::kRegTypeZero); }
334 const RegType& ConstLo() { return FromType(RegType::kRegTypeConstLo); }
335
336 const RegType& Uninitialized(Class* klass, uint32_t allocation_pc);
337 const RegType& UninitializedThisArgument(Class* klass);
338
339 private:
340 // The allocated entries
341 std::vector<RegType*> entries_;
342
343 DISALLOW_COPY_AND_ASSIGN(RegTypeCache);
344};
345
346class InsnFlags {
347 public:
348 InsnFlags() : length_(0), flags_(0) {}
349
350 void SetLengthInCodeUnits(size_t length) {
351 CHECK_LT(length, 65536u);
352 length_ = length;
353 }
354 size_t GetLengthInCodeUnits() {
355 return length_;
356 }
357 bool IsOpcode() const {
358 return length_ != 0;
359 }
360
361 void SetInTry() {
362 flags_ |= 1 << kInsnFlagInTry;
363 }
364 void ClearInTry() {
365 flags_ &= ~(1 << kInsnFlagInTry);
366 }
367 bool IsInTry() const {
368 return (flags_ & (1 << kInsnFlagInTry)) != 0;
369 }
370
371 void SetBranchTarget() {
372 flags_ |= 1 << kInsnFlagBranchTarget;
373 }
374 void ClearBranchTarget() {
375 flags_ &= ~(1 << kInsnFlagBranchTarget);
376 }
377 bool IsBranchTarget() const {
378 return (flags_ & (1 << kInsnFlagBranchTarget)) != 0;
379 }
380
381 void SetGcPoint() {
382 flags_ |= 1 << kInsnFlagGcPoint;
383 }
384 void ClearGcPoint() {
385 flags_ &= ~(1 << kInsnFlagGcPoint);
386 }
387 bool IsGcPoint() const {
388 return (flags_ & (1 << kInsnFlagGcPoint)) != 0;
389 }
390
391 void SetVisited() {
392 flags_ |= 1 << kInsnFlagVisited;
393 }
394 void ClearVisited() {
395 flags_ &= ~(1 << kInsnFlagVisited);
396 }
397 bool IsVisited() const {
398 return (flags_ & (1 << kInsnFlagVisited)) != 0;
399 }
400
401 void SetChanged() {
402 flags_ |= 1 << kInsnFlagChanged;
403 }
404 void ClearChanged() {
405 flags_ &= ~(1 << kInsnFlagChanged);
406 }
407 bool IsChanged() const {
408 return (flags_ & (1 << kInsnFlagChanged)) != 0;
409 }
410
411 bool IsVisitedOrChanged() const {
412 return IsVisited() || IsChanged();
413 }
414
Ian Rogers2c8a8572011-10-24 17:11:36 -0700415 std::string Dump() {
Ian Rogersd81871c2011-10-03 13:57:23 -0700416 char encoding[6];
417 if (!IsOpcode()) {
418 strncpy(encoding, "XXXXX", sizeof(encoding));
419 } else {
420 strncpy(encoding, "-----", sizeof(encoding));
421 if (IsInTry()) encoding[kInsnFlagInTry] = 'T';
422 if (IsBranchTarget()) encoding[kInsnFlagBranchTarget] = 'B';
423 if (IsGcPoint()) encoding[kInsnFlagGcPoint] = 'G';
424 if (IsVisited()) encoding[kInsnFlagVisited] = 'V';
425 if (IsChanged()) encoding[kInsnFlagChanged] = 'C';
426 }
Ian Rogers2c8a8572011-10-24 17:11:36 -0700427 return std::string(encoding);
Ian Rogersd81871c2011-10-03 13:57:23 -0700428 }
429 private:
430 enum InsnFlag {
431 kInsnFlagInTry,
432 kInsnFlagBranchTarget,
433 kInsnFlagGcPoint,
434 kInsnFlagVisited,
435 kInsnFlagChanged,
436 };
437
438 // Size of instruction in code units
439 uint16_t length_;
440 uint8_t flags_;
441};
442
443/*
444 * "Direct" and "virtual" methods are stored independently. The type of call used to invoke the
445 * method determines which list we search, and whether we travel up into superclasses.
446 *
447 * (<clinit>, <init>, and methods declared "private" or "static" are stored in the "direct" list.
448 * All others are stored in the "virtual" list.)
449 */
450enum MethodType {
451 METHOD_UNKNOWN = 0,
452 METHOD_DIRECT, // <init>, private
453 METHOD_STATIC, // static
454 METHOD_VIRTUAL, // virtual, super
455 METHOD_INTERFACE // interface
456};
457
458const int kRegTypeUninitMask = 0xff;
459const int kRegTypeUninitShift = 8;
460
461/*
462 * Register type categories, for type checking.
463 *
464 * The spec says category 1 includes boolean, byte, char, short, int, float, reference, and
465 * returnAddress. Category 2 includes long and double.
466 *
467 * We treat object references separately, so we have "category1nr". We don't support jsr/ret, so
468 * there is no "returnAddress" type.
469 */
470enum TypeCategory {
471 kTypeCategoryUnknown = 0,
472 kTypeCategory1nr = 1, // boolean, byte, char, short, int, float
473 kTypeCategory2 = 2, // long, double
474 kTypeCategoryRef = 3, // object reference
475};
476
477/*
478 * An enumeration of problems that can turn up during verification.
479 * VERIFY_ERROR_GENERIC denotes a failure that causes the entire class to be rejected. Other errors
480 * denote verification errors that cause bytecode to be rewritten to fail at runtime.
481 */
482enum VerifyError {
483 VERIFY_ERROR_NONE = 0, /* no error; must be zero */
484 VERIFY_ERROR_GENERIC, /* VerifyError */
485
486 VERIFY_ERROR_NO_CLASS, /* NoClassDefFoundError */
487 VERIFY_ERROR_NO_FIELD, /* NoSuchFieldError */
488 VERIFY_ERROR_NO_METHOD, /* NoSuchMethodError */
489 VERIFY_ERROR_ACCESS_CLASS, /* IllegalAccessError */
490 VERIFY_ERROR_ACCESS_FIELD, /* IllegalAccessError */
491 VERIFY_ERROR_ACCESS_METHOD, /* IllegalAccessError */
492 VERIFY_ERROR_CLASS_CHANGE, /* IncompatibleClassChangeError */
493 VERIFY_ERROR_INSTANTIATION, /* InstantiationError */
494};
495std::ostream& operator<<(std::ostream& os, const VerifyError& rhs);
496
497/*
498 * Identifies the type of reference in the instruction that generated the verify error
499 * (e.g. VERIFY_ERROR_ACCESS_CLASS could come from a method, field, or class reference).
500 *
501 * This must fit in two bits.
502 */
503enum VerifyErrorRefType {
504 VERIFY_ERROR_REF_CLASS = 0,
505 VERIFY_ERROR_REF_FIELD = 1,
506 VERIFY_ERROR_REF_METHOD = 2,
507};
508const int kVerifyErrorRefTypeShift = 6;
509
510/*
511 * Format enumeration for RegisterMap data area.
512 */
513enum RegisterMapFormat {
514 kRegMapFormatUnknown = 0,
515 kRegMapFormatNone, /* indicates no map data follows */
516 kRegMapFormatCompact8, /* compact layout, 8-bit addresses */
517 kRegMapFormatCompact16, /* compact layout, 16-bit addresses */
518};
519
520// During verification, we associate one of these with every "interesting" instruction. We track
521// the status of all registers, and (if the method has any monitor-enter instructions) maintain a
522// stack of entered monitors (identified by code unit offset).
523// If live-precise register maps are enabled, the "liveRegs" vector will be populated. Unlike the
524// other lists of registers here, we do not track the liveness of the method result register
525// (which is not visible to the GC).
526class RegisterLine {
527 public:
528 RegisterLine(size_t num_regs, DexVerifier* verifier) :
529 line_(new uint16_t[num_regs]), verifier_(verifier), num_regs_(num_regs) {
530 memset(line_.get(), 0, num_regs_ * sizeof(uint16_t));
531 result_[0] = RegType::kRegTypeUnknown;
532 result_[1] = RegType::kRegTypeUnknown;
533 }
534
535 // Implement category-1 "move" instructions. Copy a 32-bit value from "vsrc" to "vdst".
536 void CopyRegister1(uint32_t vdst, uint32_t vsrc, TypeCategory cat);
537
538 // Implement category-2 "move" instructions. Copy a 64-bit value from "vsrc" to "vdst". This
539 // copies both halves of the register.
540 void CopyRegister2(uint32_t vdst, uint32_t vsrc);
541
542 // Implement "move-result". Copy the category-1 value from the result register to another
543 // register, and reset the result register.
544 void CopyResultRegister1(uint32_t vdst, bool is_reference);
545
546 // Implement "move-result-wide". Copy the category-2 value from the result register to another
547 // register, and reset the result register.
548 void CopyResultRegister2(uint32_t vdst);
549
550 // Set the invisible result register to unknown
551 void SetResultTypeToUnknown();
552
553 // Set the type of register N, verifying that the register is valid. If "newType" is the "Lo"
554 // part of a 64-bit value, register N+1 will be set to "newType+1".
555 // The register index was validated during the static pass, so we don't need to check it here.
556 void SetRegisterType(uint32_t vdst, const RegType& new_type);
557
558 /* Set the type of the "result" register. */
559 void SetResultRegisterType(const RegType& new_type);
560
561 // Get the type of register vsrc.
562 const RegType& GetRegisterType(uint32_t vsrc) const;
563
564 bool VerifyRegisterType(uint32_t vsrc, const RegType& check_type);
565
566 void CopyFromLine(const RegisterLine* src) {
567 DCHECK_EQ(num_regs_, src->num_regs_);
568 memcpy(line_.get(), src->line_.get(), num_regs_ * sizeof(uint16_t));
569 monitors_ = src->monitors_;
570 reg_to_lock_depths_ = src->reg_to_lock_depths_;
571 }
572
Ian Rogers2c8a8572011-10-24 17:11:36 -0700573 std::string Dump() const {
574 std::string result;
Ian Rogersd81871c2011-10-03 13:57:23 -0700575 for (size_t i = 0; i < num_regs_; i++) {
Ian Rogers2c8a8572011-10-24 17:11:36 -0700576 result += GetRegisterType(i).Dump();
Ian Rogersd81871c2011-10-03 13:57:23 -0700577 }
Ian Rogers2c8a8572011-10-24 17:11:36 -0700578 return result;
Ian Rogersd81871c2011-10-03 13:57:23 -0700579 }
580
581 void FillWithGarbage() {
582 memset(line_.get(), 0xf1, num_regs_ * sizeof(uint16_t));
583 while (!monitors_.empty()) {
584 monitors_.pop();
585 }
586 reg_to_lock_depths_.clear();
587 }
588
589 /*
590 * We're creating a new instance of class C at address A. Any registers holding instances
591 * previously created at address A must be initialized by now. If not, we mark them as "conflict"
592 * to prevent them from being used (otherwise, MarkRefsAsInitialized would mark the old ones and
593 * the new ones at the same time).
594 */
595 void MarkUninitRefsAsInvalid(const RegType& uninit_type);
596
597 /*
598 * Update all registers holding "uninit_type" to instead hold the corresponding initialized
599 * reference type. This is called when an appropriate constructor is invoked -- all copies of
600 * the reference must be marked as initialized.
601 */
602 void MarkRefsAsInitialized(const RegType& uninit_type);
603
604 /*
605 * Check constraints on constructor return. Specifically, make sure that the "this" argument got
606 * initialized.
607 * The "this" argument to <init> uses code offset kUninitThisArgAddr, which puts it at the start
608 * of the list in slot 0. If we see a register with an uninitialized slot 0 reference, we know it
609 * somehow didn't get initialized.
610 */
611 bool CheckConstructorReturn() const;
612
613 // Compare two register lines. Returns 0 if they match.
614 // Using this for a sort is unwise, since the value can change based on machine endianness.
615 int CompareLine(const RegisterLine* line2) const {
616 DCHECK(monitors_ == line2->monitors_);
617 // TODO: DCHECK(reg_to_lock_depths_ == line2->reg_to_lock_depths_);
618 return memcmp(line_.get(), line2->line_.get(), num_regs_ * sizeof(uint16_t));
619 }
620
621 size_t NumRegs() const {
622 return num_regs_;
623 }
624
625 /*
626 * Get the "this" pointer from a non-static method invocation. This returns the RegType so the
627 * caller can decide whether it needs the reference to be initialized or not. (Can also return
628 * kRegTypeZero if the reference can only be zero at this point.)
629 *
630 * The argument count is in vA, and the first argument is in vC, for both "simple" and "range"
631 * versions. We just need to make sure vA is >= 1 and then return vC.
632 */
633 const RegType& GetInvocationThis(const Instruction::DecodedInstruction& dec_insn);
634
635 /*
636 * Get the value from a register, and cast it to a Class. Sets "*failure" if something fails.
637 * This fails if the register holds an uninitialized class.
638 * If the register holds kRegTypeZero, this returns a NULL pointer.
639 */
640 Class* GetClassFromRegister(uint32_t vsrc) const;
641
642 /*
643 * Verify types for a simple two-register instruction (e.g. "neg-int").
644 * "dst_type" is stored into vA, and "src_type" is verified against vB.
645 */
646 void CheckUnaryOp(const Instruction::DecodedInstruction& dec_insn,
647 const RegType& dst_type, const RegType& src_type);
648
649 /*
650 * Verify types for a simple three-register instruction (e.g. "add-int").
651 * "dst_type" is stored into vA, and "src_type1"/"src_type2" are verified
652 * against vB/vC.
653 */
654 void CheckBinaryOp(const Instruction::DecodedInstruction& dec_insn,
655 const RegType& dst_type, const RegType& src_type1, const RegType& src_type2,
656 bool check_boolean_op);
657
658 /*
659 * Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
660 * are verified against vA/vB, then "dst_type" is stored into vA.
661 */
662 void CheckBinaryOp2addr(const Instruction::DecodedInstruction& dec_insn,
663 const RegType& dst_type,
664 const RegType& src_type1, const RegType& src_type2,
665 bool check_boolean_op);
666
667 /*
668 * Verify types for A two-register instruction with a literal constant (e.g. "add-int/lit8").
669 * "dst_type" is stored into vA, and "src_type" is verified against vB.
670 *
671 * If "check_boolean_op" is set, we use the constant value in vC.
672 */
673 void CheckLiteralOp(const Instruction::DecodedInstruction& dec_insn,
674 const RegType& dst_type, const RegType& src_type, bool check_boolean_op);
675
676 // Verify/push monitor onto the monitor stack, locking the value in reg_idx at location insn_idx.
677 void PushMonitor(uint32_t reg_idx, int32_t insn_idx);
678
679 // Verify/pop monitor from monitor stack ensuring that we believe the monitor is locked
680 void PopMonitor(uint32_t reg_idx);
681
682 // Stack of currently held monitors and where they were locked
683 size_t MonitorStackDepth() const {
684 return monitors_.size();
685 }
686
687 // We expect no monitors to be held at certain points, such a method returns. Verify the stack
688 // is empty, failing and returning false if not.
689 bool VerifyMonitorStackEmpty();
690
691 bool MergeRegisters(const RegisterLine* incoming_line);
692
693 size_t GetMaxReferenceReg(size_t max_ref_reg) {
694 size_t i = static_cast<int>(max_ref_reg) < 0 ? 0 : max_ref_reg;
695 for(; i < num_regs_; i++) {
696 if (line_[i] >= RegType::kRegTypeReference) {
697 max_ref_reg = i;
698 }
699 }
700 return max_ref_reg;
701 }
702
703 // Write a bit at each register location that holds a reference
704 void WriteReferenceBitMap(int8_t* data, size_t max_bytes);
705 private:
706
707 void CopyRegToLockDepth(size_t dst, size_t src) {
708 if (reg_to_lock_depths_.count(src) > 0) {
709 uint32_t depths = reg_to_lock_depths_[src];
710 reg_to_lock_depths_[dst] = depths;
711 }
712 }
713
714 bool IsSetLockDepth(size_t reg, size_t depth) {
715 if (reg_to_lock_depths_.count(reg) > 0) {
716 uint32_t depths = reg_to_lock_depths_[reg];
717 return (depths & (1 << depth)) != 0;
718 } else {
719 return false;
720 }
721 }
722
723 void SetRegToLockDepth(size_t reg, size_t depth) {
724 CHECK_LT(depth, 32u);
725 DCHECK(!IsSetLockDepth(reg, depth));
726 uint32_t depths;
727 if (reg_to_lock_depths_.count(reg) > 0) {
728 depths = reg_to_lock_depths_[reg];
729 depths = depths | (1 << depth);
730 } else {
731 depths = 1 << depth;
732 }
733 reg_to_lock_depths_[reg] = depths;
734 }
735
736 void ClearRegToLockDepth(size_t reg, size_t depth) {
737 CHECK_LT(depth, 32u);
738 DCHECK(IsSetLockDepth(reg, depth));
739 uint32_t depths = reg_to_lock_depths_[reg];
740 depths = depths ^ (1 << depth);
741 if (depths != 0) {
742 reg_to_lock_depths_[reg] = depths;
743 } else {
744 reg_to_lock_depths_.erase(reg);
745 }
746 }
747
748 void ClearAllRegToLockDepths(size_t reg) {
749 reg_to_lock_depths_.erase(reg);
750 }
751
752 // Storage for the result register's type, valid after an invocation
753 uint16_t result_[2];
754
755 // An array of RegType Ids associated with each dex register
756 UniquePtr<uint16_t[]> line_;
757
758 // Back link to the verifier
759 DexVerifier* verifier_;
760
761 // Length of reg_types_
762 const size_t num_regs_;
763 // A stack of monitor enter locations
764 std::stack<uint32_t> monitors_;
765 // A map from register to a bit vector of indices into the monitors_ stack. As we pop the monitor
766 // stack we verify that monitor-enter/exit are correctly nested. That is, if there was a
767 // monitor-enter on v5 and then on v6, we expect the monitor-exit to be on v6 then on v5
768 std::map<uint32_t, uint32_t> reg_to_lock_depths_;
769};
770std::ostream& operator<<(std::ostream& os, const RegisterLine& rhs);
771
772class PcToRegisterLineTable {
773 public:
774 // We don't need to store the register data for many instructions, because we either only need
775 // it at branch points (for verification) or GC points and branches (for verification +
776 // type-precise register analysis).
777 enum RegisterTrackingMode {
778 kTrackRegsBranches,
779 kTrackRegsGcPoints,
780 kTrackRegsAll,
781 };
782 PcToRegisterLineTable() {}
783 ~PcToRegisterLineTable() {
784 STLDeleteValues(&pc_to_register_line_);
785 }
786
787 // Initialize the RegisterTable. Every instruction address can have a different set of information
788 // about what's in which register, but for verification purposes we only need to store it at
789 // branch target addresses (because we merge into that).
790 void Init(RegisterTrackingMode mode, InsnFlags* flags, uint32_t insns_size,
791 uint16_t registers_size, DexVerifier* verifier);
792
793 RegisterLine* GetLine(size_t idx) {
794 return pc_to_register_line_[idx];
795 }
796
797 private:
798 // Map from a dex pc to the register status associated with it
799 std::map<int32_t, RegisterLine*> pc_to_register_line_;
800
801 // Number of registers we track for each instruction. This is equal to the method's declared
802 // "registersSize" plus kExtraRegs (2).
803 size_t insn_reg_count_plus_;
804};
805
806
807
808// The verifier
809class DexVerifier {
810 public:
811 /* Verify a class. Returns "true" on success. */
812 static bool VerifyClass(const Class* klass);
jeffhaobdb76512011-09-07 11:43:16 -0700813 /*
814 * Perform verification on a single method.
815 *
816 * We do this in three passes:
817 * (1) Walk through all code units, determining instruction locations,
818 * widths, and other characteristics.
819 * (2) Walk through all code units, performing static checks on
820 * operands.
821 * (3) Iterate through the method, checking type safety and looking
822 * for code flow problems.
823 *
jeffhaod1f0fde2011-09-08 17:25:33 -0700824 * Some checks may be bypassed depending on the verification mode. We can't
jeffhaobdb76512011-09-07 11:43:16 -0700825 * turn this stuff off completely if we want to do "exact" GC.
826 *
827 * Confirmed here:
828 * - code array must not be empty
829 * Confirmed by ComputeWidthsAndCountOps():
830 * - opcode of first instruction begins at index 0
831 * - only documented instructions may appear
832 * - each instruction follows the last
833 * - last byte of last instruction is at (code_length-1)
834 */
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700835 static bool VerifyMethod(Method* method);
Carl Shapiro0e5d75d2011-07-06 18:28:37 -0700836
Ian Rogersd81871c2011-10-03 13:57:23 -0700837 uint8_t EncodePcToReferenceMapData() const;
838
839 uint32_t DexFileVersion() const {
840 return dex_file_->GetVersion();
841 }
842
843 RegTypeCache* GetRegTypeCache() {
844 return &reg_types_;
845 }
846
847 // Verification failed
848 std::ostream& Fail(VerifyError error) {
849 CHECK_EQ(failure_, VERIFY_ERROR_NONE);
850 failure_ = error;
851 return fail_messages_ << "VFY: " << PrettyMethod(method_)
852 << '[' << (void*)work_insn_idx_ << "] : ";
853 }
854
855 // Log for verification information
856 std::ostream& LogVerifyInfo() {
857 return info_messages_ << "VFY: " << PrettyMethod(method_)
858 << '[' << (void*)work_insn_idx_ << "] : ";
859 }
860
861 // Dump the state of the verifier, namely each instruction, what flags are set on it, register
862 // information
863 void Dump(std::ostream& os);
864
865 private:
866
867 explicit DexVerifier(Method* method);
868
869 bool Verify();
870
871 /*
872 * Compute the width of the instruction at each address in the instruction stream, and store it in
873 * insn_flags_. Addresses that are in the middle of an instruction, or that are part of switch
874 * table data, are not touched (so the caller should probably initialize "insn_flags" to zero).
875 *
876 * The "new_instance_count_" and "monitor_enter_count_" fields in vdata are also set.
877 *
878 * Performs some static checks, notably:
879 * - opcode of first instruction begins at index 0
880 * - only documented instructions may appear
881 * - each instruction follows the last
882 * - last byte of last instruction is at (code_length-1)
883 *
884 * Logs an error and returns "false" on failure.
885 */
886 bool ComputeWidthsAndCountOps();
887
888 /*
889 * Set the "in try" flags for all instructions protected by "try" statements. Also sets the
890 * "branch target" flags for exception handlers.
891 *
892 * Call this after widths have been set in "insn_flags".
893 *
894 * Returns "false" if something in the exception table looks fishy, but we're expecting the
895 * exception table to be somewhat sane.
896 */
897 bool ScanTryCatchBlocks();
898
jeffhaobdb76512011-09-07 11:43:16 -0700899 /*
900 * Perform static verification on all instructions in a method.
901 *
902 * Walks through instructions in a method calling VerifyInstruction on each.
903 */
Ian Rogersd81871c2011-10-03 13:57:23 -0700904 bool VerifyInstructions();
jeffhaobdb76512011-09-07 11:43:16 -0700905
906 /*
907 * Perform static verification on an instruction.
908 *
909 * As a side effect, this sets the "branch target" flags in InsnFlags.
910 *
911 * "(CF)" items are handled during code-flow analysis.
912 *
913 * v3 4.10.1
914 * - target of each jump and branch instruction must be valid
915 * - targets of switch statements must be valid
916 * - operands referencing constant pool entries must be valid
917 * - (CF) operands of getfield, putfield, getstatic, putstatic must be valid
918 * - (CF) operands of method invocation instructions must be valid
919 * - (CF) only invoke-direct can call a method starting with '<'
920 * - (CF) <clinit> must never be called explicitly
921 * - operands of instanceof, checkcast, new (and variants) must be valid
922 * - new-array[-type] limited to 255 dimensions
923 * - can't use "new" on an array class
924 * - (?) limit dimensions in multi-array creation
925 * - local variable load/store register values must be in valid range
926 *
927 * v3 4.11.1.2
928 * - branches must be within the bounds of the code array
929 * - targets of all control-flow instructions are the start of an instruction
930 * - register accesses fall within range of allocated registers
931 * - (N/A) access to constant pool must be of appropriate type
932 * - code does not end in the middle of an instruction
933 * - execution cannot fall off the end of the code
934 * - (earlier) for each exception handler, the "try" area must begin and
935 * end at the start of an instruction (end can be at the end of the code)
936 * - (earlier) for each exception handler, the handler must start at a valid
937 * instruction
938 */
Ian Rogersd81871c2011-10-03 13:57:23 -0700939 bool VerifyInstruction(const Instruction* inst, uint32_t code_offset);
jeffhaobdb76512011-09-07 11:43:16 -0700940
941 /* Ensure that the register index is valid for this code item. */
Ian Rogersd81871c2011-10-03 13:57:23 -0700942 bool CheckRegisterIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700943
944 /* Ensure that the wide register index is valid for this code item. */
Ian Rogersd81871c2011-10-03 13:57:23 -0700945 bool CheckWideRegisterIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700946
Ian Rogersd81871c2011-10-03 13:57:23 -0700947 // Perform static checks on a field get or set instruction. All we do here is ensure that the
948 // field index is in the valid range.
949 bool CheckFieldIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700950
Ian Rogersd81871c2011-10-03 13:57:23 -0700951 // Perform static checks on a method invocation instruction. All we do here is ensure that the
952 // method index is in the valid range.
953 bool CheckMethodIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700954
Ian Rogersd81871c2011-10-03 13:57:23 -0700955 // Perform static checks on a "new-instance" instruction. Specifically, make sure the class
956 // reference isn't for an array class.
957 bool CheckNewInstance(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700958
Ian Rogersd81871c2011-10-03 13:57:23 -0700959 /* Ensure that the string index is in the valid range. */
960 bool CheckStringIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700961
Ian Rogersd81871c2011-10-03 13:57:23 -0700962 // Perform static checks on an instruction that takes a class constant. Ensure that the class
963 // index is in the valid range.
964 bool CheckTypeIndex(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700965
Ian Rogersd81871c2011-10-03 13:57:23 -0700966 // Perform static checks on a "new-array" instruction. Specifically, make sure they aren't
967 // creating an array of arrays that causes the number of dimensions to exceed 255.
968 bool CheckNewArray(uint32_t idx);
jeffhaobdb76512011-09-07 11:43:16 -0700969
Ian Rogersd81871c2011-10-03 13:57:23 -0700970 // Verify an array data table. "cur_offset" is the offset of the fill-array-data instruction.
971 bool CheckArrayData(uint32_t cur_offset);
jeffhaobdb76512011-09-07 11:43:16 -0700972
Ian Rogersd81871c2011-10-03 13:57:23 -0700973 // Verify that the target of a branch instruction is valid. We don't expect code to jump directly
974 // into an exception handler, but it's valid to do so as long as the target isn't a
975 // "move-exception" instruction. We verify that in a later stage.
976 // The dex format forbids certain instructions from branching to themselves.
977 // Updates "insnFlags", setting the "branch target" flag.
978 bool CheckBranchTarget(uint32_t cur_offset);
jeffhaobdb76512011-09-07 11:43:16 -0700979
Ian Rogersd81871c2011-10-03 13:57:23 -0700980 // Verify a switch table. "cur_offset" is the offset of the switch instruction.
981 // Updates "insnFlags", setting the "branch target" flag.
982 bool CheckSwitchTargets(uint32_t cur_offset);
jeffhaobdb76512011-09-07 11:43:16 -0700983
Ian Rogersd81871c2011-10-03 13:57:23 -0700984 // Check the register indices used in a "vararg" instruction, such as invoke-virtual or
985 // filled-new-array.
986 // - vA holds word count (0-5), args[] have values.
987 // There are some tests we don't do here, e.g. we don't try to verify that invoking a method that
988 // takes a double is done with consecutive registers. This requires parsing the target method
989 // signature, which we will be doing later on during the code flow analysis.
990 bool CheckVarArgRegs(uint32_t vA, uint32_t arg[]);
jeffhaobdb76512011-09-07 11:43:16 -0700991
Ian Rogersd81871c2011-10-03 13:57:23 -0700992 // Check the register indices used in a "vararg/range" instruction, such as invoke-virtual/range
993 // or filled-new-array/range.
994 // - vA holds word count, vC holds index of first reg.
995 bool CheckVarArgRangeRegs(uint32_t vA, uint32_t vC);
jeffhaobdb76512011-09-07 11:43:16 -0700996
Ian Rogersd81871c2011-10-03 13:57:23 -0700997 // Extract the relative offset from a branch instruction.
998 // Returns "false" on failure (e.g. this isn't a branch instruction).
999 bool GetBranchOffset(uint32_t cur_offset, int32_t* pOffset, bool* pConditional,
1000 bool* selfOkay);
jeffhaobdb76512011-09-07 11:43:16 -07001001
Ian Rogersd81871c2011-10-03 13:57:23 -07001002 /* Perform detailed code-flow analysis on a single method. */
1003 bool VerifyCodeFlow();
jeffhaobdb76512011-09-07 11:43:16 -07001004
Ian Rogersd81871c2011-10-03 13:57:23 -07001005 // Set the register types for the first instruction in the method based on the method signature.
1006 // This has the side-effect of validating the signature.
1007 bool SetTypesFromSignature();
jeffhaobdb76512011-09-07 11:43:16 -07001008
1009 /*
1010 * Perform code flow on a method.
1011 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001012 * The basic strategy is as outlined in v3 4.11.1.2: set the "changed" bit on the first
1013 * instruction, process it (setting additional "changed" bits), and repeat until there are no
1014 * more.
jeffhaobdb76512011-09-07 11:43:16 -07001015 *
1016 * v3 4.11.1.1
1017 * - (N/A) operand stack is always the same size
1018 * - operand stack [registers] contain the correct types of values
1019 * - local variables [registers] contain the correct types of values
1020 * - methods are invoked with the appropriate arguments
1021 * - fields are assigned using values of appropriate types
1022 * - opcodes have the correct type values in operand registers
Ian Rogersd81871c2011-10-03 13:57:23 -07001023 * - there is never an uninitialized class instance in a local variable in code protected by an
1024 * exception handler (operand stack is okay, because the operand stack is discarded when an
1025 * exception is thrown) [can't know what's a local var w/o the debug info -- should fall out of
jeffhaobdb76512011-09-07 11:43:16 -07001026 * register typing]
1027 *
1028 * v3 4.11.1.2
1029 * - execution cannot fall off the end of the code
1030 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001031 * (We also do many of the items described in the "static checks" sections, because it's easier to
1032 * do them here.)
jeffhaobdb76512011-09-07 11:43:16 -07001033 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001034 * We need an array of RegType values, one per register, for every instruction. If the method uses
1035 * monitor-enter, we need extra data for every register, and a stack for every "interesting"
1036 * instruction. In theory this could become quite large -- up to several megabytes for a monster
1037 * function.
jeffhaobdb76512011-09-07 11:43:16 -07001038 *
1039 * NOTE:
Ian Rogersd81871c2011-10-03 13:57:23 -07001040 * The spec forbids backward branches when there's an uninitialized reference in a register. The
1041 * idea is to prevent something like this:
jeffhaobdb76512011-09-07 11:43:16 -07001042 * loop:
1043 * move r1, r0
1044 * new-instance r0, MyClass
1045 * ...
1046 * if-eq rN, loop // once
1047 * initialize r0
1048 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001049 * This leaves us with two different instances, both allocated by the same instruction, but only
1050 * one is initialized. The scheme outlined in v3 4.11.1.4 wouldn't catch this, so they work around
1051 * it by preventing backward branches. We achieve identical results without restricting code
1052 * reordering by specifying that you can't execute the new-instance instruction if a register
1053 * contains an uninitialized instance created by that same instruction.
jeffhaobdb76512011-09-07 11:43:16 -07001054 */
Ian Rogersd81871c2011-10-03 13:57:23 -07001055 bool CodeFlowVerifyMethod();
jeffhaobdb76512011-09-07 11:43:16 -07001056
1057 /*
1058 * Perform verification for a single instruction.
1059 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001060 * This requires fully decoding the instruction to determine the effect it has on registers.
jeffhaobdb76512011-09-07 11:43:16 -07001061 *
Ian Rogersd81871c2011-10-03 13:57:23 -07001062 * Finds zero or more following instructions and sets the "changed" flag if execution at that
1063 * point needs to be (re-)evaluated. Register changes are merged into "reg_types_" at the target
1064 * addresses. Does not set or clear any other flags in "insn_flags_".
jeffhaobdb76512011-09-07 11:43:16 -07001065 */
Ian Rogersd81871c2011-10-03 13:57:23 -07001066 bool CodeFlowVerifyInstruction(uint32_t* start_guess);
1067
1068 // Perform verification of an aget instruction. The destination register's type will be set to
1069 // be that of component type of the array unless the array type is unknown, in which case a
1070 // bottom type inferred from the type of instruction is used. is_primitive is false for an
1071 // aget-object.
1072 void VerifyAGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1073 bool is_primitive);
1074
1075 // Perform verification of an aput instruction.
1076 void VerifyAPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1077 bool is_primitive);
1078
1079 // Lookup instance field and fail for resolution violations
1080 Field* GetInstanceField(const RegType& obj_type, int field_idx);
1081
1082 // Perform verification of an iget instruction.
1083 void VerifyIGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1084 bool is_primitive);
1085
1086 // Perform verification of an iput instruction.
1087 void VerifyIPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1088 bool is_primitive);
1089
1090 // Lookup static field and fail for resolution violations
1091 Field* GetStaticField(int field_idx);
1092
1093 // Perform verification of an sget instruction.
1094 void VerifySGet(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1095 bool is_primitive);
1096
1097 // Perform verification of an sput instruction.
1098 void VerifySPut(const Instruction::DecodedInstruction& insn, const RegType& insn_type,
1099 bool is_primitive);
1100
1101 // Verify that the arguments in a filled-new-array instruction are valid.
1102 // "res_class" is the class refered to by dec_insn->vB_.
1103 void VerifyFilledNewArrayRegs(const Instruction::DecodedInstruction& dec_insn, Class* res_class,
1104 bool is_range);
jeffhaobdb76512011-09-07 11:43:16 -07001105
1106 /*
Ian Rogersd81871c2011-10-03 13:57:23 -07001107 * Resolves a class based on an index and performs access checks to ensure the referrer can
1108 * access the resolved class.
jeffhao98eacac2011-09-14 16:11:53 -07001109 * Exceptions caused by failures are cleared before returning.
jeffhao98eacac2011-09-14 16:11:53 -07001110 * Sets "*failure" on failure.
1111 */
Ian Rogersd81871c2011-10-03 13:57:23 -07001112 Class* ResolveClassAndCheckAccess(uint32_t class_idx);
1113
1114 /*
1115 * For the "move-exception" instruction at "work_insn_idx_", which must be at an exception handler
1116 * address, determine the first common superclass of all exceptions that can land here.
1117 * Returns NULL if no matching exception handler can be found, or if the exception is not a
1118 * subclass of Throwable.
1119 */
1120 Class* GetCaughtExceptionType();
jeffhao98eacac2011-09-14 16:11:53 -07001121
1122 /*
jeffhaob4df5142011-09-19 20:25:32 -07001123 * Resolves a method based on an index and performs access checks to ensure
1124 * the referrer can access the resolved method.
jeffhaob4df5142011-09-19 20:25:32 -07001125 * Does not throw exceptions.
jeffhaob4df5142011-09-19 20:25:32 -07001126 */
Ian Rogersd81871c2011-10-03 13:57:23 -07001127 Method* ResolveMethodAndCheckAccess(uint32_t method_idx, bool is_direct);
jeffhaobdb76512011-09-07 11:43:16 -07001128
1129 /*
jeffhaod1f0fde2011-09-08 17:25:33 -07001130 * Verify the arguments to a method. We're executing in "method", making
jeffhaobdb76512011-09-07 11:43:16 -07001131 * a call to the method reference in vB.
1132 *
jeffhaod1f0fde2011-09-08 17:25:33 -07001133 * If this is a "direct" invoke, we allow calls to <init>. For calls to
1134 * <init>, the first argument may be an uninitialized reference. Otherwise,
jeffhaobdb76512011-09-07 11:43:16 -07001135 * calls to anything starting with '<' will be rejected, as will any
1136 * uninitialized reference arguments.
1137 *
1138 * For non-static method calls, this will verify that the method call is
1139 * appropriate for the "this" argument.
1140 *
jeffhaod1f0fde2011-09-08 17:25:33 -07001141 * The method reference is in vBBBB. The "is_range" parameter determines
jeffhaobdb76512011-09-07 11:43:16 -07001142 * whether we use 0-4 "args" values or a range of registers defined by
1143 * vAA and vCCCC.
1144 *
1145 * Widening conversions on integers and references are allowed, but
1146 * narrowing conversions are not.
1147 *
1148 * Returns the resolved method on success, NULL on failure (with *failure
1149 * set appropriately).
1150 */
Ian Rogersd81871c2011-10-03 13:57:23 -07001151 Method* VerifyInvocationArgs(const Instruction::DecodedInstruction& dec_insn,
1152 MethodType method_type, bool is_range, bool is_super);
jeffhaobdb76512011-09-07 11:43:16 -07001153
Ian Rogersd81871c2011-10-03 13:57:23 -07001154 /*
1155 * Return the register type for the method. We can't just use the already-computed
1156 * DalvikJniReturnType, because if it's a reference type we need to do the class lookup.
1157 * Returned references are assumed to be initialized. Returns kRegTypeUnknown for "void".
1158 */
1159 const RegType& GetMethodReturnType() {
1160 return reg_types_.FromClass(method_->GetReturnType());
1161 }
jeffhaoe0cfb6f2011-09-22 16:42:56 -07001162
Ian Rogersd81871c2011-10-03 13:57:23 -07001163 /*
1164 * Verify that the target instruction is not "move-exception". It's important that the only way
1165 * to execute a move-exception is as the first instruction of an exception handler.
1166 * Returns "true" if all is well, "false" if the target instruction is move-exception.
1167 */
1168 bool CheckMoveException(const uint16_t* insns, int insn_idx);
1169
1170 /*
1171 * Replace an instruction with "throw-verification-error". This allows us to
1172 * defer error reporting until the code path is first used.
1173 */
1174 void ReplaceFailingInstruction();
1175
1176 /*
1177 * Control can transfer to "next_insn". Merge the registers from merge_line into the table at
1178 * next_insn, and set the changed flag on the target address if any of the registers were changed.
1179 * Returns "false" if an error is encountered.
1180 */
1181 bool UpdateRegisters(uint32_t next_insn, const RegisterLine* merge_line);
1182
1183 /*
1184 * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
1185 * verification). For type-precise determination we have all the data we need, so we just need to
1186 * encode it in some clever fashion.
1187 * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
1188 */
1189 ByteArray* GenerateGcMap();
1190
1191 // Verify that the GC map associated with method_ is well formed
1192 void VerifyGcMap();
1193
1194 // Compute sizes for GC map data
1195 void ComputeGcMapSizes(size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
1196
1197 Class* JavaLangThrowable();
1198
1199 InsnFlags CurrentInsnFlags() {
1200 return insn_flags_[work_insn_idx_];
1201 }
1202
1203 RegTypeCache reg_types_;
1204
1205 PcToRegisterLineTable reg_table_;
1206
1207 // Storage for the register status we're currently working on.
1208 UniquePtr<RegisterLine> work_line_;
1209
1210 // Lazily initialized reference to java.lang.Class<java.lang.Throwable>
1211 Class* java_lang_throwable_;
1212
1213 // The address of the instruction we're currently working on, note that this is in 2 byte
1214 // quantities
1215 uint32_t work_insn_idx_;
1216
1217 // Storage for the register status we're saving for later.
1218 UniquePtr<RegisterLine> saved_line_;
1219
1220 Method* method_; // The method we're working on.
1221 const DexFile* dex_file_; // The dex file containing the method.
1222 const DexFile::CodeItem* code_item_; // The code item containing the code for the method.
1223 UniquePtr<InsnFlags[]> insn_flags_; // Instruction widths and flags, one entry per code unit.
1224
1225 // The type of any error that occurs
1226 VerifyError failure_;
1227
1228 // Failure message log
1229 std::ostringstream fail_messages_;
1230 // Info message log
1231 std::ostringstream info_messages_;
1232
1233 // The number of occurrences of specific opcodes.
1234 size_t new_instance_count_;
1235 size_t monitor_enter_count_;
Carl Shapiro0e5d75d2011-07-06 18:28:37 -07001236};
1237
Ian Rogersd81871c2011-10-03 13:57:23 -07001238// Lightweight wrapper for PC to reference bit maps.
1239class PcToReferenceMap {
1240 public:
1241 PcToReferenceMap(Method* m) {
1242 data_ = down_cast<ByteArray*>(m->GetGcMap());
1243 CHECK(data_ != NULL);
1244 // Check the size of the table agrees with the number of entries
1245 size_t data_size = data_->GetLength() - 4;
1246 DCHECK_EQ(EntryWidth() * NumEntries(), data_size);
1247 }
1248
1249 // The number of entries in the table
1250 size_t NumEntries() const {
1251 return GetData()[2] | (GetData()[3] << 8);
1252 }
1253
1254 // Get the PC at the given index
1255 uint16_t GetPC(size_t index) const {
1256 size_t entry_offset = index * EntryWidth();
1257 if (PcWidth() == 1) {
1258 return Table()[entry_offset];
1259 } else {
1260 return Table()[entry_offset] | (Table()[entry_offset + 1] << 8);
1261 }
1262 }
1263
1264 // Return address of bitmap encoding what are live references
1265 const uint8_t* GetBitMap(size_t index) const {
1266 size_t entry_offset = index * EntryWidth();
1267 return &Table()[entry_offset + PcWidth()];
1268 }
1269
1270 // Find the bitmap associated with the given dex pc
1271 const uint8_t* FindBitMap(uint16_t dex_pc, bool error_if_not_present = true) const;
1272
1273 // The number of bytes used to encode registers
1274 size_t RegWidth() const {
1275 return GetData()[1];
1276 }
1277
1278 private:
1279 // Table of num_entries * (dex pc, bitmap)
1280 const uint8_t* Table() const {
1281 return GetData() + 4;
1282 }
1283
1284 // The format of the table of the PCs for the table
1285 RegisterMapFormat Format() const {
1286 return static_cast<RegisterMapFormat>(GetData()[0]);
1287 }
1288
1289 // Number of bytes used to encode a dex pc
1290 size_t PcWidth() const {
1291 RegisterMapFormat format = Format();
1292 switch (format) {
1293 case kRegMapFormatCompact8:
1294 return 1;
1295 case kRegMapFormatCompact16:
1296 return 2;
1297 default:
1298 LOG(FATAL) << "Invalid format " << static_cast<int>(format);
1299 return -1;
1300 }
1301 }
1302
1303 // The width of an entry in the table
1304 size_t EntryWidth() const {
1305 return PcWidth() + RegWidth();
1306 }
1307
1308 const uint8_t* GetData() const {
1309 return reinterpret_cast<uint8_t*>(data_->GetData());
1310 }
1311 ByteArray* data_; // The header and table data
1312};
1313
1314} // namespace verifier
Carl Shapiro0e5d75d2011-07-06 18:28:37 -07001315} // namespace art
1316
1317#endif // ART_SRC_DEX_VERIFY_H_