Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 1 | //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | //===----------------------------------------------------------------------===// |
| 11 | |
| 12 | #include "llvm-c/Disassembler.h" |
| 13 | #include "llvm-c/Target.h" |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 14 | #include "llvm/MC/SubtargetFeature.h" |
| 15 | #include "llvm/Support/CommandLine.h" |
| 16 | #include "llvm/Support/raw_ostream.h" |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 17 | |
| 18 | using namespace llvm; |
| 19 | |
| 20 | const unsigned AssemblyTextBufSize = 80; |
| 21 | |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 22 | static cl::opt<std::string> |
| 23 | TripleName("triple", cl::desc("Target triple to assemble for, " |
| 24 | "see -version for available targets")); |
| 25 | |
| 26 | static cl::opt<std::string> |
| 27 | MCPU("mcpu", |
| 28 | cl::desc("Target a specific cpu type (-mcpu=help for details)"), |
| 29 | cl::value_desc("cpu-name"), cl::init("")); |
| 30 | |
Daniel Sanders | e59aef6 | 2015-09-22 09:22:53 +0000 | [diff] [blame] | 31 | // This is useful for variable-length instruction sets. |
| 32 | static cl::opt<unsigned> InsnLimit( |
| 33 | "insn-limit", |
| 34 | cl::desc("Limit the number of instructions to process (0 for no limit)"), |
| 35 | cl::value_desc("count"), cl::init(0)); |
| 36 | |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 37 | static cl::list<std::string> |
| 38 | MAttrs("mattr", cl::CommaSeparated, |
| 39 | cl::desc("Target specific attributes (-mattr=help for details)"), |
| 40 | cl::value_desc("a1,+a2,-a3,...")); |
| 41 | // The feature string derived from -mattr's values. |
| 42 | std::string FeaturesStr; |
| 43 | |
| 44 | static cl::list<std::string> |
Daniel Sanders | acd1ec7 | 2016-05-13 10:23:04 +0000 | [diff] [blame] | 45 | FuzzerArgs("fuzzer-args", cl::Positional, |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 46 | cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore, |
| 47 | cl::PositionalEatsArgs); |
Daniel Sanders | acd1ec7 | 2016-05-13 10:23:04 +0000 | [diff] [blame] | 48 | static std::vector<char *> ModifiedArgv; |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 49 | |
Kostya Serebryany | 9906eef | 2015-10-02 23:34:06 +0000 | [diff] [blame] | 50 | int DisassembleOneInput(const uint8_t *Data, size_t Size) { |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 51 | char AssemblyText[AssemblyTextBufSize]; |
| 52 | |
| 53 | std::vector<uint8_t> DataCopy(Data, Data + Size); |
| 54 | |
| 55 | LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures( |
| 56 | TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0, |
| 57 | nullptr, nullptr); |
| 58 | assert(Ctx); |
| 59 | uint8_t *p = DataCopy.data(); |
| 60 | unsigned Consumed; |
Daniel Sanders | e59aef6 | 2015-09-22 09:22:53 +0000 | [diff] [blame] | 61 | unsigned InstructionsProcessed = 0; |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 62 | do { |
| 63 | Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText, |
| 64 | AssemblyTextBufSize); |
| 65 | Size -= Consumed; |
| 66 | p += Consumed; |
Daniel Sanders | e59aef6 | 2015-09-22 09:22:53 +0000 | [diff] [blame] | 67 | |
| 68 | InstructionsProcessed ++; |
| 69 | if (InsnLimit != 0 && InstructionsProcessed < InsnLimit) |
| 70 | break; |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 71 | } while (Consumed != 0); |
| 72 | LLVMDisasmDispose(Ctx); |
Kostya Serebryany | 9906eef | 2015-10-02 23:34:06 +0000 | [diff] [blame] | 73 | return 0; |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 74 | } |
| 75 | |
Justin Bogner | ae0931e | 2017-08-29 17:08:44 +0000 | [diff] [blame] | 76 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { |
Brian Cain | f54e7aa | 2017-02-27 06:22:17 +0000 | [diff] [blame] | 77 | return DisassembleOneInput(Data, Size); |
Daniel Sanders | acd1ec7 | 2016-05-13 10:23:04 +0000 | [diff] [blame] | 78 | } |
| 79 | |
Justin Bogner | ae0931e | 2017-08-29 17:08:44 +0000 | [diff] [blame] | 80 | extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, |
| 81 | char ***argv) { |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 82 | // The command line is unusual compared to other fuzzers due to the need to |
| 83 | // specify the target. Options like -triple, -mcpu, and -mattr work like |
| 84 | // their counterparts in llvm-mc, while -fuzzer-args collects options for the |
| 85 | // fuzzer itself. |
| 86 | // |
| 87 | // Examples: |
| 88 | // |
| 89 | // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to |
| 90 | // 4-bytes each and use the contents of ./corpus as the test corpus: |
| 91 | // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \ |
| 92 | // -fuzzer-args -max_len=4 -runs=100000 ./corpus |
| 93 | // |
| 94 | // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA |
| 95 | // feature enabled using up to 64-byte inputs: |
| 96 | // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \ |
| 97 | // -disassemble -fuzzer-args ./corpus |
| 98 | // |
| 99 | // If your aim is to find instructions that are not tested, then it is |
| 100 | // advisable to constrain the maximum input size to a single instruction |
| 101 | // using -max_len as in the first example. This results in a test corpus of |
| 102 | // individual instructions that test unique paths. Without this constraint, |
| 103 | // there will be considerable redundancy in the corpus. |
| 104 | |
Daniel Sanders | acd1ec7 | 2016-05-13 10:23:04 +0000 | [diff] [blame] | 105 | char **OriginalArgv = *argv; |
| 106 | |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 107 | LLVMInitializeAllTargetInfos(); |
| 108 | LLVMInitializeAllTargetMCs(); |
| 109 | LLVMInitializeAllDisassemblers(); |
| 110 | |
Daniel Sanders | acd1ec7 | 2016-05-13 10:23:04 +0000 | [diff] [blame] | 111 | cl::ParseCommandLineOptions(*argc, OriginalArgv); |
| 112 | |
| 113 | // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that |
| 114 | // the driver can parse its arguments. |
| 115 | // |
| 116 | // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs. |
| 117 | // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a |
| 118 | // non-const buffer to avoid the need to clean up when the fuzzer terminates. |
| 119 | ModifiedArgv.push_back(OriginalArgv[0]); |
| 120 | for (const auto &FuzzerArg : FuzzerArgs) { |
| 121 | for (int i = 1; i < *argc; ++i) { |
| 122 | if (FuzzerArg == OriginalArgv[i]) |
| 123 | ModifiedArgv.push_back(OriginalArgv[i]); |
| 124 | } |
| 125 | } |
| 126 | *argc = ModifiedArgv.size(); |
| 127 | *argv = ModifiedArgv.data(); |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 128 | |
| 129 | // Package up features to be passed to target/subtarget |
| 130 | // We have to pass it via a global since the callback doesn't |
| 131 | // permit any user data. |
| 132 | if (MAttrs.size()) { |
| 133 | SubtargetFeatures Features; |
| 134 | for (unsigned i = 0; i != MAttrs.size(); ++i) |
| 135 | Features.AddFeature(MAttrs[i]); |
| 136 | FeaturesStr = Features.getString(); |
| 137 | } |
| 138 | |
Brian Cain | f54e7aa | 2017-02-27 06:22:17 +0000 | [diff] [blame] | 139 | if (TripleName.empty()) |
| 140 | TripleName = sys::getDefaultTargetTriple(); |
| 141 | |
Daniel Sanders | acd1ec7 | 2016-05-13 10:23:04 +0000 | [diff] [blame] | 142 | return 0; |
Daniel Sanders | 5369e0f | 2015-09-16 11:49:49 +0000 | [diff] [blame] | 143 | } |