Add dex file intermediate representation

Add an intermediate representation, constructed from the DexFile
utilities, that can be used to optimize the layout and re-emit dex
files.

BUG: 29921113
TEST: TODO: Add dex files to compare output with dexdump

(cherry-picked from commit af14d9f551e6169c201dc7981d9cfbcb53c7d2c7)

Change-Id: I3dacb78fad5228e43d77f20b110bd40e0e116cdb
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
new file mode 100644
index 0000000..40e60f7
--- /dev/null
+++ b/dexlayout/dex_ir.cc
@@ -0,0 +1,407 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Implementation file of the dex file intermediate representation.
+ *
+ * Utilities for reading dex files into an internal representation,
+ * manipulating them, and writing them out.
+ */
+
+#include "dex_ir.h"
+
+#include <map>
+#include <vector>
+
+#include "dex_file.h"
+#include "dex_file-inl.h"
+#include "utils.h"
+
+namespace art {
+namespace dex_ir {
+
+namespace {
+static uint64_t ReadVarWidth(const uint8_t** data, uint8_t length, bool sign_extend) {
+  uint64_t value = 0;
+  for (uint32_t i = 0; i <= length; i++) {
+    value |= static_cast<uint64_t>(*(*data)++) << (i * 8);
+  }
+  if (sign_extend) {
+    int shift = (7 - length) * 8;
+    return (static_cast<int64_t>(value) << shift) >> shift;
+  }
+  return value;
+}
+
+static bool GetPositionsCb(void* context, const DexFile::PositionInfo& entry) {
+  DebugInfoItem* debug_info = reinterpret_cast<DebugInfoItem*>(context);
+  std::vector<std::unique_ptr<PositionInfo>>& positions = debug_info->GetPositionInfo();
+  positions.push_back(std::unique_ptr<PositionInfo>(new PositionInfo(entry.address_, entry.line_)));
+  return false;
+}
+
+static void GetLocalsCb(void* context, const DexFile::LocalInfo& entry) {
+  DebugInfoItem* debug_info = reinterpret_cast<DebugInfoItem*>(context);
+  std::vector<std::unique_ptr<LocalInfo>>& locals = debug_info->GetLocalInfo();
+  const char* signature = entry.signature_ != nullptr ? entry.signature_ : "";
+  locals.push_back(std::unique_ptr<LocalInfo>(
+      new LocalInfo(entry.name_, entry.descriptor_, signature, entry.start_address_,
+                    entry.end_address_, entry.reg_)));
+}
+}  // namespace
+
+Header::Header(const DexFile& dex_file) : dex_file_(dex_file) {
+  const DexFile::Header& disk_header = dex_file.GetHeader();
+  memcpy(magic_, disk_header.magic_, sizeof(magic_));
+  checksum_ = disk_header.checksum_;
+  // TODO(sehr): clearly the signature will need to be recomputed before dumping.
+  memcpy(signature_, disk_header.signature_, sizeof(signature_));
+  endian_tag_ = disk_header.endian_tag_;
+  file_size_ = disk_header.file_size_;
+  header_size_ = disk_header.header_size_;
+  link_size_ = disk_header.link_size_;
+  link_offset_ = disk_header.link_off_;
+  data_size_ = disk_header.data_size_;
+  data_offset_ = disk_header.data_off_;
+  // Walk the rest of the header fields.
+  string_ids_.SetOffset(disk_header.string_ids_off_);
+  for (uint32_t i = 0; i < dex_file_.NumStringIds(); ++i) {
+    string_ids_.AddWithPosition(i, new StringId(dex_file_.GetStringId(i), *this));
+  }
+  type_ids_.SetOffset(disk_header.type_ids_off_);
+  for (uint32_t i = 0; i < dex_file_.NumTypeIds(); ++i) {
+    type_ids_.AddWithPosition(i, new TypeId(dex_file_.GetTypeId(i), *this));
+  }
+  proto_ids_.SetOffset(disk_header.proto_ids_off_);
+  for (uint32_t i = 0; i < dex_file_.NumProtoIds(); ++i) {
+    proto_ids_.AddWithPosition(i, new ProtoId(dex_file_.GetProtoId(i), *this));
+  }
+  field_ids_.SetOffset(disk_header.field_ids_off_);
+  for (uint32_t i = 0; i < dex_file_.NumFieldIds(); ++i) {
+    field_ids_.AddWithPosition(i, new FieldId(dex_file_.GetFieldId(i), *this));
+  }
+  method_ids_.SetOffset(disk_header.method_ids_off_);
+  for (uint32_t i = 0; i < dex_file_.NumMethodIds(); ++i) {
+    method_ids_.AddWithPosition(i, new MethodId(dex_file_.GetMethodId(i), *this));
+  }
+  class_defs_.SetOffset(disk_header.class_defs_off_);
+  for (uint32_t i = 0; i < dex_file_.NumClassDefs(); ++i) {
+    class_defs_.AddWithPosition(i, new ClassDef(dex_file_.GetClassDef(i), *this));
+  }
+}
+
+ArrayItem::ArrayItem(Header& header, const uint8_t** data, uint8_t type, uint8_t length) {
+  Read(header, data, type, length);
+}
+
+ArrayItem::ArrayItem(Header& header, const uint8_t** data) {
+  const uint8_t encoded_value = *(*data)++;
+  Read(header, data, encoded_value & 0x1f, encoded_value >> 5);
+}
+
+void ArrayItem::Read(Header& header, const uint8_t** data, uint8_t type, uint8_t length) {
+  type_ = type;
+  switch (type_) {
+    case DexFile::kDexAnnotationByte:
+      item_.byte_val_ = static_cast<int8_t>(ReadVarWidth(data, length, false));
+      break;
+    case DexFile::kDexAnnotationShort:
+      item_.short_val_ = static_cast<int16_t>(ReadVarWidth(data, length, true));
+      break;
+    case DexFile::kDexAnnotationChar:
+      item_.char_val_ = static_cast<uint16_t>(ReadVarWidth(data, length, false));
+      break;
+    case DexFile::kDexAnnotationInt:
+      item_.int_val_ = static_cast<int32_t>(ReadVarWidth(data, length, true));
+      break;
+    case DexFile::kDexAnnotationLong:
+      item_.long_val_ = static_cast<int64_t>(ReadVarWidth(data, length, true));
+      break;
+    case DexFile::kDexAnnotationFloat: {
+      // Fill on right.
+      union {
+        float f;
+        uint32_t data;
+      } conv;
+      conv.data = static_cast<uint32_t>(ReadVarWidth(data, length, false)) << (3 - length) * 8;
+      item_.float_val_ = conv.f;
+      break;
+    }
+    case DexFile::kDexAnnotationDouble: {
+      // Fill on right.
+      union {
+        double d;
+        uint64_t data;
+      } conv;
+      conv.data = ReadVarWidth(data, length, false) << (7 - length) * 8;
+      item_.double_val_ = conv.d;
+      break;
+    }
+    case DexFile::kDexAnnotationString: {
+      const uint32_t string_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
+      item_.string_val_ = header.StringIds()[string_index].get();
+      break;
+    }
+    case DexFile::kDexAnnotationType: {
+      const uint32_t string_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
+      item_.string_val_ = header.TypeIds()[string_index]->GetStringId();
+      break;
+    }
+    case DexFile::kDexAnnotationField:
+    case DexFile::kDexAnnotationEnum: {
+      const uint32_t field_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
+      item_.field_val_ = header.FieldIds()[field_index].get();
+      break;
+    }
+    case DexFile::kDexAnnotationMethod: {
+      const uint32_t method_index = static_cast<uint32_t>(ReadVarWidth(data, length, false));
+      item_.method_val_ = header.MethodIds()[method_index].get();
+      break;
+    }
+    case DexFile::kDexAnnotationArray: {
+      item_.annotation_array_val_ = new std::vector<std::unique_ptr<ArrayItem>>();
+      // Decode all elements.
+      const uint32_t size = DecodeUnsignedLeb128(data);
+      for (uint32_t i = 0; i < size; i++) {
+        item_.annotation_array_val_->push_back(
+            std::unique_ptr<ArrayItem>(new ArrayItem(header, data)));
+      }
+      break;
+    }
+    case DexFile::kDexAnnotationAnnotation: {
+      const uint32_t type_idx = DecodeUnsignedLeb128(data);
+      item_.annotation_annotation_val_.string_ = header.TypeIds()[type_idx]->GetStringId();
+      item_.annotation_annotation_val_.array_ = new std::vector<std::unique_ptr<NameValuePair>>();
+      // Decode all name=value pairs.
+      const uint32_t size = DecodeUnsignedLeb128(data);
+      for (uint32_t i = 0; i < size; i++) {
+        const uint32_t name_index = DecodeUnsignedLeb128(data);
+        item_.annotation_annotation_val_.array_->push_back(std::unique_ptr<NameValuePair>(
+            new NameValuePair(header.StringIds()[name_index].get(), new ArrayItem(header, data))));
+      }
+      break;
+    }
+    case DexFile::kDexAnnotationNull:
+      break;
+    case DexFile::kDexAnnotationBoolean:
+      item_.bool_val_ = (length != 0);
+      break;
+    default:
+      break;
+  }
+}
+
+ClassDef::ClassDef(const DexFile::ClassDef& disk_class_def, Header& header) {
+  class_type_ = header.TypeIds()[disk_class_def.class_idx_].get();
+  access_flags_ = disk_class_def.access_flags_;
+  superclass_ = header.TypeIds()[disk_class_def.superclass_idx_].get();
+
+  const DexFile::TypeList* type_list = header.GetDexFile().GetInterfacesList(disk_class_def);
+  interfaces_offset_ = disk_class_def.interfaces_off_;
+  if (type_list != nullptr) {
+    for (uint32_t index = 0; index < type_list->Size(); ++index) {
+      interfaces_.push_back(header.TypeIds()[type_list->GetTypeItem(index).type_idx_].get());
+    }
+  }
+  if (disk_class_def.source_file_idx_ == DexFile::kDexNoIndex) {
+    source_file_ = nullptr;
+  } else {
+    source_file_ = header.StringIds()[disk_class_def.source_file_idx_].get();
+  }
+  // Annotations.
+  const DexFile::AnnotationsDirectoryItem* disk_annotations_directory_item =
+      header.GetDexFile().GetAnnotationsDirectory(disk_class_def);
+  if (disk_annotations_directory_item == nullptr) {
+    annotations_.reset(nullptr);
+  } else {
+    annotations_.reset(new AnnotationsDirectoryItem(disk_annotations_directory_item, header));
+    annotations_->SetOffset(disk_class_def.annotations_off_);
+  }
+  // Static field initializers.
+  static_values_ = nullptr;
+  const uint8_t* static_data = header.GetDexFile().GetEncodedStaticFieldValuesArray(disk_class_def);
+  if (static_data != nullptr) {
+    uint32_t static_value_count = static_data == nullptr ? 0 : DecodeUnsignedLeb128(&static_data);
+    if (static_value_count > 0) {
+      static_values_ = new std::vector<std::unique_ptr<ArrayItem>>();
+      for (uint32_t i = 0; i < static_value_count; ++i) {
+        static_values_->push_back(std::unique_ptr<ArrayItem>(new ArrayItem(header, &static_data)));
+      }
+    }
+  }
+  // Read the fields and methods defined by the class, resolving the circular reference from those
+  // to classes by setting class at the same time.
+  const uint8_t* encoded_data = header.GetDexFile().GetClassData(disk_class_def);
+  class_data_.SetOffset(disk_class_def.class_data_off_);
+  if (encoded_data != nullptr) {
+    ClassDataItemIterator cdii(header.GetDexFile(), encoded_data);
+    // Static fields.
+    for (uint32_t i = 0; cdii.HasNextStaticField(); i++, cdii.Next()) {
+      FieldId* field_item = header.FieldIds()[cdii.GetMemberIndex()].get();
+      uint32_t access_flags = cdii.GetRawMemberAccessFlags();
+      class_data_.StaticFields().push_back(
+          std::unique_ptr<FieldItem>(new FieldItem(access_flags, field_item)));
+    }
+    // Instance fields.
+    for (uint32_t i = 0; cdii.HasNextInstanceField(); i++, cdii.Next()) {
+      FieldId* field_item = header.FieldIds()[cdii.GetMemberIndex()].get();
+      uint32_t access_flags = cdii.GetRawMemberAccessFlags();
+      class_data_.InstanceFields().push_back(
+          std::unique_ptr<FieldItem>(new FieldItem(access_flags, field_item)));
+    }
+    // Direct methods.
+    for (uint32_t i = 0; cdii.HasNextDirectMethod(); i++, cdii.Next()) {
+      MethodId* method_item = header.MethodIds()[cdii.GetMemberIndex()].get();
+      uint32_t access_flags = cdii.GetRawMemberAccessFlags();
+      const DexFile::CodeItem* disk_code_item = cdii.GetMethodCodeItem();
+      CodeItem* code_item = nullptr;
+      DebugInfoItem* debug_info = nullptr;
+      if (disk_code_item != nullptr) {
+        code_item = new CodeItem(*disk_code_item, header);
+        code_item->SetOffset(cdii.GetMethodCodeItemOffset());
+        debug_info = code_item->DebugInfo();
+      }
+      if (debug_info != nullptr) {
+        // TODO: Fix debug local info.
+        // bool is_static = (access_flags & kAccStatic) != 0;
+        // header.GetDexFile().DecodeDebugLocalInfo(
+        //       disk_code_item, is_static, cdii.GetMemberIndex(), GetLocalsCb, debug_info);
+        header.GetDexFile().DecodeDebugPositionInfo(disk_code_item, GetPositionsCb, debug_info);
+      }
+      class_data_.DirectMethods().push_back(
+          std::unique_ptr<MethodItem>(new MethodItem(access_flags, method_item, code_item)));
+    }
+    // Virtual methods.
+    for (uint32_t i = 0; cdii.HasNextVirtualMethod(); i++, cdii.Next()) {
+      MethodId* method_item = header.MethodIds()[cdii.GetMemberIndex()].get();
+      uint32_t access_flags = cdii.GetRawMemberAccessFlags();
+      const DexFile::CodeItem* disk_code_item = cdii.GetMethodCodeItem();
+      CodeItem* code_item = nullptr;
+      DebugInfoItem* debug_info = nullptr;
+      if (disk_code_item != nullptr) {
+        code_item = new CodeItem(*disk_code_item, header);
+        code_item->SetOffset(cdii.GetMethodCodeItemOffset());
+        debug_info = code_item->DebugInfo();
+      }
+      if (debug_info != nullptr) {
+        // TODO: Fix debug local info.
+        // bool is_static = (access_flags & kAccStatic) != 0;
+        // header.GetDexFile().DecodeDebugLocalInfo(
+        //       disk_code_item, is_static, cdii.GetMemberIndex(), GetLocalsCb, debug_info);
+        header.GetDexFile().DecodeDebugPositionInfo(disk_code_item, GetPositionsCb, debug_info);
+      }
+      class_data_.VirtualMethods().push_back(
+          std::unique_ptr<MethodItem>(new MethodItem(access_flags, method_item, code_item)));
+    }
+  }
+}
+
+CodeItem::CodeItem(const DexFile::CodeItem& disk_code_item, Header& header) {
+  registers_size_ = disk_code_item.registers_size_;
+  ins_size_ = disk_code_item.ins_size_;
+  outs_size_ = disk_code_item.outs_size_;
+  tries_size_ = disk_code_item.tries_size_;
+
+  const uint8_t* debug_info_stream = header.GetDexFile().GetDebugInfoStream(&disk_code_item);
+  if (debug_info_stream != nullptr) {
+    debug_info_.reset(new DebugInfoItem());
+  } else {
+    debug_info_.reset(nullptr);
+  }
+
+  insns_size_ = disk_code_item.insns_size_in_code_units_;
+  insns_.reset(new uint16_t[insns_size_]);
+  memcpy(insns_.get(), disk_code_item.insns_, insns_size_ * sizeof(uint16_t));
+
+  if (tries_size_ > 0) {
+    tries_ = new std::vector<std::unique_ptr<const TryItem>>();
+    for (uint32_t i = 0; i < tries_size_; ++i) {
+      const DexFile::TryItem* disk_try_item = header.GetDexFile().GetTryItems(disk_code_item, i);
+      tries_->push_back(std::unique_ptr<const TryItem>(
+          new TryItem(*disk_try_item, disk_code_item, header)));
+    }
+  } else {
+    tries_ = nullptr;
+  }
+}
+
+AnnotationSetItem::AnnotationSetItem(const DexFile::AnnotationSetItem& disk_annotations_item,
+                                     Header& header) {
+  if (disk_annotations_item.size_ == 0) {
+    return;
+  }
+  for (uint32_t i = 0; i < disk_annotations_item.size_; ++i) {
+    const DexFile::AnnotationItem* annotation =
+        header.GetDexFile().GetAnnotationItem(&disk_annotations_item, i);
+    if (annotation == nullptr) {
+      continue;
+    }
+    uint8_t visibility = annotation->visibility_;
+    const uint8_t* annotation_data = annotation->annotation_;
+    ArrayItem* array_item =
+        new ArrayItem(header, &annotation_data, DexFile::kDexAnnotationAnnotation, 0);
+    items_.push_back(std::unique_ptr<AnnotationItem>(new AnnotationItem(visibility, array_item)));
+  }
+}
+
+AnnotationsDirectoryItem::AnnotationsDirectoryItem(
+    const DexFile::AnnotationsDirectoryItem* disk_annotations_item, Header& header) {
+  const DexFile::AnnotationSetItem* class_set_item =
+      header.GetDexFile().GetClassAnnotationSet(disk_annotations_item);
+  if (class_set_item == nullptr) {
+    class_annotation_.reset(nullptr);
+  } else {
+    class_annotation_.reset(new AnnotationSetItem(*class_set_item, header));
+  }
+  const DexFile::FieldAnnotationsItem* fields =
+      header.GetDexFile().GetFieldAnnotations(disk_annotations_item);
+  if (fields != nullptr) {
+    for (uint32_t i = 0; i < disk_annotations_item->fields_size_; ++i) {
+      FieldId* field_id = header.FieldIds()[fields[i].field_idx_].get();
+      const DexFile::AnnotationSetItem* field_set_item =
+          header.GetDexFile().GetFieldAnnotationSetItem(fields[i]);
+      dex_ir::AnnotationSetItem* annotation_set_item =
+          new AnnotationSetItem(*field_set_item, header);
+      field_annotations_.push_back(std::unique_ptr<FieldAnnotation>(
+          new FieldAnnotation(field_id, annotation_set_item)));
+    }
+  }
+  const DexFile::MethodAnnotationsItem* methods =
+      header.GetDexFile().GetMethodAnnotations(disk_annotations_item);
+  if (methods != nullptr) {
+    for (uint32_t i = 0; i < disk_annotations_item->methods_size_; ++i) {
+      MethodId* method_id = header.MethodIds()[methods[i].method_idx_].get();
+      const DexFile::AnnotationSetItem* method_set_item =
+          header.GetDexFile().GetMethodAnnotationSetItem(methods[i]);
+      dex_ir::AnnotationSetItem* annotation_set_item =
+          new AnnotationSetItem(*method_set_item, header);
+      method_annotations_.push_back(std::unique_ptr<MethodAnnotation>(
+          new MethodAnnotation(method_id, annotation_set_item)));
+    }
+  }
+  const DexFile::ParameterAnnotationsItem* parameters =
+      header.GetDexFile().GetParameterAnnotations(disk_annotations_item);
+  if (parameters != nullptr) {
+    for (uint32_t i = 0; i < disk_annotations_item->parameters_size_; ++i) {
+      MethodId* method_id = header.MethodIds()[parameters[i].method_idx_].get();
+      const DexFile::AnnotationSetRefList* list =
+          header.GetDexFile().GetParameterAnnotationSetRefList(&parameters[i]);
+      parameter_annotations_.push_back(std::unique_ptr<ParameterAnnotation>(
+          new ParameterAnnotation(method_id, list, header)));
+    }
+  }
+}
+
+}  // namespace dex_ir
+}  // namespace art