diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index a3d9a0b..e84f65a 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -57,7 +57,7 @@
       const std::vector<uint8_t>& vmap_table = compiled_method->GetVmapTable();
       uint32_t vmap_table_offset = vmap_table.empty() ? 0u
           : sizeof(OatQuickMethodHeader) + vmap_table.size();
-      const std::vector<uint8_t>& mapping_table = compiled_method->GetMappingTable();
+      const std::vector<uint8_t>& mapping_table = *compiled_method->GetMappingTable();
       uint32_t mapping_table_offset = mapping_table.empty() ? 0u
           : sizeof(OatQuickMethodHeader) + vmap_table.size() + mapping_table.size();
       const std::vector<uint8_t>& gc_map = *compiled_method->GetGcMap();
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 698bf3b..e292834 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -170,14 +170,13 @@
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask,
-                               const std::vector<uint8_t>& mapping_table,
                                const std::vector<uint8_t>& stack_map)
     : CompiledCode(driver, instruction_set, quick_code),
       frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask),
       fp_spill_mask_(fp_spill_mask),
       src_mapping_table_(driver->DeduplicateSrcMappingTable(SrcMap())),
-      mapping_table_(driver->DeduplicateMappingTable(mapping_table)),
+      mapping_table_(nullptr),
       vmap_table_(driver->DeduplicateVMapTable(stack_map)),
       gc_map_(nullptr),
       cfi_info_(nullptr),
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 7f76eef..d2f5d01 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -291,7 +291,6 @@
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask,
-                 const std::vector<uint8_t>& mapping_table,
                  const std::vector<uint8_t>& vmap_table);
 
   // Constructs a CompiledMethod for the QuickJniCompiler.
@@ -330,9 +329,8 @@
     return *src_mapping_table_;
   }
 
-  const std::vector<uint8_t>& GetMappingTable() const {
-    DCHECK(mapping_table_ != nullptr);
-    return *mapping_table_;
+  std::vector<uint8_t> const* GetMappingTable() const {
+    return mapping_table_;
   }
 
   const std::vector<uint8_t>& GetVmapTable() const {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 8a7abb4..6bb526c 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -530,7 +530,7 @@
 
 struct OatWriter::MappingTableDataAccess {
   static const std::vector<uint8_t>* GetData(const CompiledMethod* compiled_method) ALWAYS_INLINE {
-    return &compiled_method->GetMappingTable();
+    return compiled_method->GetMappingTable();
   }
 
   static uint32_t GetOffset(OatClass* oat_class, size_t method_offsets_index) ALWAYS_INLINE {
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index b3ac7ff..ea0dc66 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -347,8 +347,8 @@
         return lhs->GetQuickCode() < rhs->GetQuickCode();
       }
       // If the code is the same, all other fields are likely to be the same as well.
-      if (UNLIKELY(&lhs->GetMappingTable() != &rhs->GetMappingTable())) {
-        return &lhs->GetMappingTable() < &rhs->GetMappingTable();
+      if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) {
+        return lhs->GetMappingTable() < rhs->GetMappingTable();
       }
       if (UNLIKELY(&lhs->GetVmapTable() != &rhs->GetVmapTable())) {
         return &lhs->GetVmapTable() < &rhs->GetVmapTable();
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 11fc9bf..89a0cf9 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -238,6 +238,17 @@
   return true;
 }
 
+// The stack map we generate must be 4-byte aligned on ARM. Since existing
+// maps are generated alongside these stack maps, we must also align them.
+static std::vector<uint8_t>& AlignVectorSize(std::vector<uint8_t>& vector) {
+  size_t size = vector.size();
+  size_t aligned_size = RoundUp(size, 4);
+  for (; size < aligned_size; ++size) {
+    vector.push_back(0);
+  }
+  return vector;
+}
+
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
@@ -318,12 +329,6 @@
     visualizer.DumpGraph(kRegisterAllocatorPassName);
     codegen->CompileOptimized(&allocator);
 
-    std::vector<uint8_t> mapping_table;
-    SrcMap src_mapping_table;
-    codegen->BuildMappingTable(&mapping_table,
-            GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ?
-                 &src_mapping_table : nullptr);
-
     std::vector<uint8_t> stack_map;
     codegen->BuildStackMaps(&stack_map);
 
@@ -333,7 +338,6 @@
                               codegen->GetFrameSize(),
                               codegen->GetCoreSpillMask(),
                               0, /* FPR spill mask, unused */
-                              mapping_table,
                               stack_map);
   } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) {
     LOG(FATAL) << "Could not allocate registers in optimizing compiler";
@@ -360,9 +364,9 @@
                               codegen->GetCoreSpillMask(),
                               0, /* FPR spill mask, unused */
                               &src_mapping_table,
-                              mapping_table,
-                              vmap_table,
-                              gc_map,
+                              AlignVectorSize(mapping_table),
+                              AlignVectorSize(vmap_table),
+                              AlignVectorSize(gc_map),
                               nullptr);
   }
 }
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 9cfa71c..3974e53 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -111,7 +111,7 @@
   }
 
   size_t ComputeStackMapSize() const {
-    return stack_maps_.Size() * (StackMap::kFixedSize + StackMaskEncodingSize(stack_mask_max_));
+    return stack_maps_.Size() * StackMap::ComputeAlignedStackMapSize(stack_mask_max_);
   }
 
   size_t ComputeDexRegisterMapSize() const {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 1729686..1a13f93 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -204,7 +204,14 @@
     // Portable doesn't use the machine pc, we just use dex pc instead.
     return static_cast<uint32_t>(pc);
   }
+
   const void* entry_point = GetQuickOatEntryPoint(sizeof(void*));
+  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
+  if (IsOptimized(sizeof(void*))) {
+    uint32_t ret = GetStackMap(sought_offset).GetDexPc();
+    return ret;
+  }
+
   MappingTable table(entry_point != nullptr ?
       GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
   if (table.TotalSize() == 0) {
@@ -213,7 +220,6 @@
     DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
     return DexFile::kDexNoIndex;   // Special no mapping case
   }
-  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
   // Assume the caller wants a pc-to-dex mapping so check here first.
   typedef MappingTable::PcToDexIterator It;
   for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 2107944..64663ed 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -151,7 +151,8 @@
     // Temporary solution for detecting if a method has been optimized: the compiler
     // does not create a GC map. Instead, the vmap table contains the stack map
     // (as in stack_map.h).
-    return GetEntryPointFromQuickCompiledCodePtrSize(pointer_size) != nullptr
+    return !IsNative()
+        && GetEntryPointFromQuickCompiledCodePtrSize(pointer_size) != nullptr
         && GetQuickOatCodePointer(pointer_size) != nullptr
         && GetNativeGcMap(pointer_size) == nullptr;
   }
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index a58ecab..7cc3e57 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -19,6 +19,7 @@
 
 #include "base/bit_vector.h"
 #include "memory_region.h"
+#include "utils.h"
 
 namespace art {
 
@@ -199,6 +200,11 @@
        && region_.size() == other.region_.size();
   }
 
+  static size_t ComputeAlignedStackMapSize(size_t stack_mask_size) {
+    // On ARM, the stack maps must be 4-byte aligned.
+    return RoundUp(StackMap::kFixedSize + stack_mask_size, 4);
+  }
+
  private:
   static constexpr int kDexPcOffset = 0;
   static constexpr int kNativePcOffsetOffset = kDexPcOffset + sizeof(uint32_t);
@@ -262,7 +268,7 @@
   }
 
   size_t StackMapSize() const {
-    return StackMap::kFixedSize + GetStackMaskSize();
+    return StackMap::ComputeAlignedStackMapSize(GetStackMaskSize());
   }
 
   DexRegisterMap GetDexRegisterMapOf(StackMap stack_map, uint32_t number_of_dex_registers) {
