Chris Lattner | a3dcfb1 | 2009-12-22 22:50:29 +0000 | [diff] [blame] | 1 | //===- Disassembler.cpp - Disassembler for hex strings --------------------===// |
Sean Callanan | ba847da | 2009-12-17 01:49:59 +0000 | [diff] [blame] | 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This class implements the disassembler of strings of bytes written in |
| 11 | // hexadecimal, from standard input or from a file. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
Chris Lattner | a3dcfb1 | 2009-12-22 22:50:29 +0000 | [diff] [blame] | 15 | #include "Disassembler.h" |
Chandler Carruth | f010c46 | 2012-12-04 10:44:52 +0000 | [diff] [blame] | 16 | #include "llvm/ADT/Triple.h" |
Lang Hames | 508bd63 | 2014-04-15 04:40:56 +0000 | [diff] [blame] | 17 | #include "llvm/MC/MCAsmInfo.h" |
| 18 | #include "llvm/MC/MCContext.h" |
Benjamin Kramer | b6242a8 | 2016-01-26 16:44:37 +0000 | [diff] [blame] | 19 | #include "llvm/MC/MCDisassembler/MCDisassembler.h" |
Sean Callanan | ba847da | 2009-12-17 01:49:59 +0000 | [diff] [blame] | 20 | #include "llvm/MC/MCInst.h" |
Lang Hames | 508bd63 | 2014-04-15 04:40:56 +0000 | [diff] [blame] | 21 | #include "llvm/MC/MCRegisterInfo.h" |
Richard Barton | d0c478d | 2012-04-16 11:32:10 +0000 | [diff] [blame] | 22 | #include "llvm/MC/MCStreamer.h" |
James Molloy | b950585 | 2011-09-07 17:24:38 +0000 | [diff] [blame] | 23 | #include "llvm/MC/MCSubtargetInfo.h" |
Sean Callanan | ba847da | 2009-12-17 01:49:59 +0000 | [diff] [blame] | 24 | #include "llvm/Support/MemoryBuffer.h" |
Chris Lattner | c3de94f | 2009-12-22 06:45:48 +0000 | [diff] [blame] | 25 | #include "llvm/Support/SourceMgr.h" |
Evan Cheng | 3e74d6f | 2011-08-24 18:08:43 +0000 | [diff] [blame] | 26 | #include "llvm/Support/TargetRegistry.h" |
| 27 | #include "llvm/Support/raw_ostream.h" |
Richard Barton | d0c478d | 2012-04-16 11:32:10 +0000 | [diff] [blame] | 28 | |
Sean Callanan | ba847da | 2009-12-17 01:49:59 +0000 | [diff] [blame] | 29 | using namespace llvm; |
| 30 | |
Rafael Espindola | 0cb5820 | 2014-11-07 17:59:05 +0000 | [diff] [blame] | 31 | typedef std::pair<std::vector<unsigned char>, std::vector<const char *>> |
| 32 | ByteArrayTy; |
Sean Callanan | ba847da | 2009-12-17 01:49:59 +0000 | [diff] [blame] | 33 | |
Daniel Dunbar | c6ab190 | 2010-03-20 22:36:35 +0000 | [diff] [blame] | 34 | static bool PrintInsts(const MCDisassembler &DisAsm, |
Richard Barton | d0c478d | 2012-04-16 11:32:10 +0000 | [diff] [blame] | 35 | const ByteArrayTy &Bytes, |
| 36 | SourceMgr &SM, raw_ostream &Out, |
David Woodhouse | 4396f5d | 2014-01-28 23:12:42 +0000 | [diff] [blame] | 37 | MCStreamer &Streamer, bool InAtomicBlock, |
| 38 | const MCSubtargetInfo &STI) { |
Rafael Espindola | 6a222ec | 2014-11-12 02:04:27 +0000 | [diff] [blame] | 39 | ArrayRef<uint8_t> Data(Bytes.first.data(), Bytes.first.size()); |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 40 | |
Sean Callanan | 2e235a8 | 2010-02-03 03:46:41 +0000 | [diff] [blame] | 41 | // Disassemble it to strings. |
Chris Lattner | 665e947 | 2009-12-22 06:56:51 +0000 | [diff] [blame] | 42 | uint64_t Size; |
Sean Callanan | 2e235a8 | 2010-02-03 03:46:41 +0000 | [diff] [blame] | 43 | uint64_t Index; |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 44 | |
Rafael Espindola | 0cb5820 | 2014-11-07 17:59:05 +0000 | [diff] [blame] | 45 | for (Index = 0; Index < Bytes.first.size(); Index += Size) { |
Sean Callanan | 2e235a8 | 2010-02-03 03:46:41 +0000 | [diff] [blame] | 46 | MCInst Inst; |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 47 | |
Owen Anderson | 83e3f67 | 2011-08-17 17:44:15 +0000 | [diff] [blame] | 48 | MCDisassembler::DecodeStatus S; |
Rafael Espindola | 6a222ec | 2014-11-12 02:04:27 +0000 | [diff] [blame] | 49 | S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, |
Owen Anderson | 98c5dda | 2011-09-15 23:38:46 +0000 | [diff] [blame] | 50 | /*REMOVE*/ nulls(), nulls()); |
Owen Anderson | 83e3f67 | 2011-08-17 17:44:15 +0000 | [diff] [blame] | 51 | switch (S) { |
| 52 | case MCDisassembler::Fail: |
Rafael Espindola | 0cb5820 | 2014-11-07 17:59:05 +0000 | [diff] [blame] | 53 | SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]), |
Chris Lattner | 3f2d5f6 | 2011-10-16 05:43:57 +0000 | [diff] [blame] | 54 | SourceMgr::DK_Warning, |
| 55 | "invalid instruction encoding"); |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 56 | // Don't try to resynchronise the stream in a block |
| 57 | if (InAtomicBlock) |
| 58 | return true; |
| 59 | |
Sean Callanan | 2e235a8 | 2010-02-03 03:46:41 +0000 | [diff] [blame] | 60 | if (Size == 0) |
| 61 | Size = 1; // skip illegible bytes |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 62 | |
Owen Anderson | 83e3f67 | 2011-08-17 17:44:15 +0000 | [diff] [blame] | 63 | break; |
| 64 | |
| 65 | case MCDisassembler::SoftFail: |
Rafael Espindola | 0cb5820 | 2014-11-07 17:59:05 +0000 | [diff] [blame] | 66 | SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]), |
Chris Lattner | 3f2d5f6 | 2011-10-16 05:43:57 +0000 | [diff] [blame] | 67 | SourceMgr::DK_Warning, |
| 68 | "potentially undefined instruction encoding"); |
Justin Bogner | 7d7a23e | 2016-08-17 20:30:52 +0000 | [diff] [blame] | 69 | LLVM_FALLTHROUGH; |
Owen Anderson | 83e3f67 | 2011-08-17 17:44:15 +0000 | [diff] [blame] | 70 | |
| 71 | case MCDisassembler::Success: |
David Woodhouse | 4396f5d | 2014-01-28 23:12:42 +0000 | [diff] [blame] | 72 | Streamer.EmitInstruction(Inst, STI); |
Owen Anderson | 83e3f67 | 2011-08-17 17:44:15 +0000 | [diff] [blame] | 73 | break; |
Sean Callanan | 2e235a8 | 2010-02-03 03:46:41 +0000 | [diff] [blame] | 74 | } |
Chris Lattner | 665e947 | 2009-12-22 06:56:51 +0000 | [diff] [blame] | 75 | } |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 76 | |
Chris Lattner | 665e947 | 2009-12-22 06:56:51 +0000 | [diff] [blame] | 77 | return false; |
Sean Callanan | ba847da | 2009-12-17 01:49:59 +0000 | [diff] [blame] | 78 | } |
| 79 | |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 80 | static bool SkipToToken(StringRef &Str) { |
Colin LeMahieu | 6d093fd | 2014-11-11 21:03:09 +0000 | [diff] [blame] | 81 | for (;;) { |
| 82 | if (Str.empty()) |
| 83 | return false; |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 84 | |
Colin LeMahieu | 6d093fd | 2014-11-11 21:03:09 +0000 | [diff] [blame] | 85 | // Strip horizontal whitespace and commas. |
| 86 | if (size_t Pos = Str.find_first_not_of(" \t\r\n,")) { |
| 87 | Str = Str.substr(Pos); |
Sean Callanan | 668b154 | 2010-04-12 19:43:00 +0000 | [diff] [blame] | 88 | continue; |
| 89 | } |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 90 | |
Colin LeMahieu | 6d093fd | 2014-11-11 21:03:09 +0000 | [diff] [blame] | 91 | // If this is the start of a comment, remove the rest of the line. |
| 92 | if (Str[0] == '#') { |
| 93 | Str = Str.substr(Str.find_first_of('\n')); |
| 94 | continue; |
| 95 | } |
| 96 | return true; |
| 97 | } |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 98 | } |
| 99 | |
| 100 | |
| 101 | static bool ByteArrayFromString(ByteArrayTy &ByteArray, |
| 102 | StringRef &Str, |
| 103 | SourceMgr &SM) { |
| 104 | while (SkipToToken(Str)) { |
| 105 | // Handled by higher level |
| 106 | if (Str[0] == '[' || Str[0] == ']') |
| 107 | return false; |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 108 | |
Sean Callanan | 668b154 | 2010-04-12 19:43:00 +0000 | [diff] [blame] | 109 | // Get the current token. |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 110 | size_t Next = Str.find_first_of(" \t\n\r,#[]"); |
Sean Callanan | 668b154 | 2010-04-12 19:43:00 +0000 | [diff] [blame] | 111 | StringRef Value = Str.substr(0, Next); |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 112 | |
Sean Callanan | 668b154 | 2010-04-12 19:43:00 +0000 | [diff] [blame] | 113 | // Convert to a byte and add to the byte vector. |
| 114 | unsigned ByteVal; |
| 115 | if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) { |
| 116 | // If we have an error, print it and skip to the end of line. |
Chris Lattner | 3f2d5f6 | 2011-10-16 05:43:57 +0000 | [diff] [blame] | 117 | SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error, |
| 118 | "invalid input token"); |
Sean Callanan | 668b154 | 2010-04-12 19:43:00 +0000 | [diff] [blame] | 119 | Str = Str.substr(Str.find('\n')); |
Rafael Espindola | 0cb5820 | 2014-11-07 17:59:05 +0000 | [diff] [blame] | 120 | ByteArray.first.clear(); |
| 121 | ByteArray.second.clear(); |
Sean Callanan | 668b154 | 2010-04-12 19:43:00 +0000 | [diff] [blame] | 122 | continue; |
| 123 | } |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 124 | |
Rafael Espindola | 0cb5820 | 2014-11-07 17:59:05 +0000 | [diff] [blame] | 125 | ByteArray.first.push_back(ByteVal); |
| 126 | ByteArray.second.push_back(Value.data()); |
Sean Callanan | 668b154 | 2010-04-12 19:43:00 +0000 | [diff] [blame] | 127 | Str = Str.substr(Next); |
| 128 | } |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 129 | |
Sean Callanan | 668b154 | 2010-04-12 19:43:00 +0000 | [diff] [blame] | 130 | return false; |
| 131 | } |
| 132 | |
Evan Cheng | b262799 | 2011-07-06 19:45:42 +0000 | [diff] [blame] | 133 | int Disassembler::disassemble(const Target &T, |
Bill Wendling | a5c177e | 2011-03-21 04:13:46 +0000 | [diff] [blame] | 134 | const std::string &Triple, |
Richard Barton | d0c478d | 2012-04-16 11:32:10 +0000 | [diff] [blame] | 135 | MCSubtargetInfo &STI, |
| 136 | MCStreamer &Streamer, |
Dan Gohman | d5826a3 | 2010-08-20 01:07:01 +0000 | [diff] [blame] | 137 | MemoryBuffer &Buffer, |
Richard Barton | d0c478d | 2012-04-16 11:32:10 +0000 | [diff] [blame] | 138 | SourceMgr &SM, |
Dan Gohman | d5826a3 | 2010-08-20 01:07:01 +0000 | [diff] [blame] | 139 | raw_ostream &Out) { |
Lang Hames | 508bd63 | 2014-04-15 04:40:56 +0000 | [diff] [blame] | 140 | |
| 141 | std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple)); |
| 142 | if (!MRI) { |
| 143 | errs() << "error: no register info for target " << Triple << "\n"; |
| 144 | return -1; |
| 145 | } |
| 146 | |
| 147 | std::unique_ptr<const MCAsmInfo> MAI(T.createMCAsmInfo(*MRI, Triple)); |
| 148 | if (!MAI) { |
| 149 | errs() << "error: no assembly info for target " << Triple << "\n"; |
| 150 | return -1; |
| 151 | } |
| 152 | |
| 153 | // Set up the MCContext for creating symbols and MCExpr's. |
Craig Topper | 573faec | 2014-04-25 04:24:47 +0000 | [diff] [blame] | 154 | MCContext Ctx(MAI.get(), MRI.get(), nullptr); |
Lang Hames | 508bd63 | 2014-04-15 04:40:56 +0000 | [diff] [blame] | 155 | |
| 156 | std::unique_ptr<const MCDisassembler> DisAsm( |
| 157 | T.createMCDisassembler(STI, Ctx)); |
Chris Lattner | 222af46 | 2009-12-22 06:24:00 +0000 | [diff] [blame] | 158 | if (!DisAsm) { |
| 159 | errs() << "error: no disassembler for target " << Triple << "\n"; |
Sean Callanan | ba847da | 2009-12-17 01:49:59 +0000 | [diff] [blame] | 160 | return -1; |
| 161 | } |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 162 | |
Richard Barton | d0c478d | 2012-04-16 11:32:10 +0000 | [diff] [blame] | 163 | // Set up initial section manually here |
Rafael Espindola | 90ce9f7 | 2014-10-15 16:12:52 +0000 | [diff] [blame] | 164 | Streamer.InitSections(false); |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 165 | |
Chris Lattner | 665e947 | 2009-12-22 06:56:51 +0000 | [diff] [blame] | 166 | bool ErrorOccurred = false; |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 167 | |
Sean Callanan | ba847da | 2009-12-17 01:49:59 +0000 | [diff] [blame] | 168 | // Convert the input to a vector for disassembly. |
Chris Lattner | 665e947 | 2009-12-22 06:56:51 +0000 | [diff] [blame] | 169 | ByteArrayTy ByteArray; |
Chris Lattner | cfc99a9 | 2010-04-09 04:24:20 +0000 | [diff] [blame] | 170 | StringRef Str = Buffer.getBuffer(); |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 171 | bool InAtomicBlock = false; |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 172 | |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 173 | while (SkipToToken(Str)) { |
Rafael Espindola | 0cb5820 | 2014-11-07 17:59:05 +0000 | [diff] [blame] | 174 | ByteArray.first.clear(); |
| 175 | ByteArray.second.clear(); |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 176 | |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 177 | if (Str[0] == '[') { |
| 178 | if (InAtomicBlock) { |
| 179 | SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, |
| 180 | "nested atomic blocks make no sense"); |
| 181 | ErrorOccurred = true; |
| 182 | } |
| 183 | InAtomicBlock = true; |
| 184 | Str = Str.drop_front(); |
| 185 | continue; |
| 186 | } else if (Str[0] == ']') { |
| 187 | if (!InAtomicBlock) { |
| 188 | SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, |
| 189 | "attempt to close atomic block without opening"); |
| 190 | ErrorOccurred = true; |
| 191 | } |
| 192 | InAtomicBlock = false; |
| 193 | Str = Str.drop_front(); |
| 194 | continue; |
| 195 | } |
| 196 | |
| 197 | // It's a real token, get the bytes and emit them |
| 198 | ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM); |
| 199 | |
Rafael Espindola | 0cb5820 | 2014-11-07 17:59:05 +0000 | [diff] [blame] | 200 | if (!ByteArray.first.empty()) |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 201 | ErrorOccurred |= PrintInsts(*DisAsm, ByteArray, SM, Out, Streamer, |
David Woodhouse | 4396f5d | 2014-01-28 23:12:42 +0000 | [diff] [blame] | 202 | InAtomicBlock, STI); |
Tim Northover | 38c6ff6 | 2013-07-19 10:05:04 +0000 | [diff] [blame] | 203 | } |
| 204 | |
| 205 | if (InAtomicBlock) { |
| 206 | SM.PrintMessage(SMLoc::getFromPointer(Str.data()), SourceMgr::DK_Error, |
| 207 | "unclosed atomic block"); |
| 208 | ErrorOccurred = true; |
| 209 | } |
Jim Grosbach | f5bf3cf | 2011-05-09 20:05:25 +0000 | [diff] [blame] | 210 | |
Chris Lattner | 665e947 | 2009-12-22 06:56:51 +0000 | [diff] [blame] | 211 | return ErrorOccurred; |
Sean Callanan | ba847da | 2009-12-17 01:49:59 +0000 | [diff] [blame] | 212 | } |