Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 1 | //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
Chris Lattner | 3060910 | 2007-12-29 20:37:13 +0000 | [diff] [blame] | 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // Implement the Lexer for TableGen. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Chris Lattner | 6aaca04 | 2007-11-18 05:25:45 +0000 | [diff] [blame] | 14 | #include "TGLexer.h" |
Bill Wendling | cd466f5 | 2010-12-08 20:02:49 +0000 | [diff] [blame] | 15 | #include "llvm/ADT/StringSwitch.h" |
| 16 | #include "llvm/ADT/Twine.h" |
Chandler Carruth | d04a8d4 | 2012-12-03 16:50:05 +0000 | [diff] [blame] | 17 | #include "llvm/Config/config.h" // for strtoull()/strtoll() define |
Eugene Zelenko | 9feaa97 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 18 | #include "llvm/Support/Compiler.h" |
Chandler Carruth | d04a8d4 | 2012-12-03 16:50:05 +0000 | [diff] [blame] | 19 | #include "llvm/Support/MemoryBuffer.h" |
| 20 | #include "llvm/Support/SourceMgr.h" |
| 21 | #include "llvm/TableGen/Error.h" |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 22 | #include <algorithm> |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 23 | #include <cctype> |
Chandler Carruth | d04a8d4 | 2012-12-03 16:50:05 +0000 | [diff] [blame] | 24 | #include <cerrno> |
Eugene Zelenko | 9feaa97 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 25 | #include <cstdint> |
Duncan Sands | 4520dd2 | 2008-10-08 07:23:46 +0000 | [diff] [blame] | 26 | #include <cstdio> |
Anton Korobeynikov | ae9f3a3 | 2008-02-20 11:08:44 +0000 | [diff] [blame] | 27 | #include <cstdlib> |
| 28 | #include <cstring> |
Dylan Noblesmith | 8cc300c | 2011-12-22 23:08:39 +0000 | [diff] [blame] | 29 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 30 | using namespace llvm; |
| 31 | |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 32 | namespace { |
| 33 | // A list of supported preprocessing directives with their |
| 34 | // internal token kinds and names. |
| 35 | struct { |
| 36 | tgtok::TokKind Kind; |
| 37 | const char *Word; |
| 38 | } PreprocessorDirs[] = { |
| 39 | { tgtok::Ifdef, "ifdef" }, |
| 40 | { tgtok::Else, "else" }, |
| 41 | { tgtok::Endif, "endif" }, |
| 42 | { tgtok::Define, "define" } |
| 43 | }; |
| 44 | } // end anonymous namespace |
| 45 | |
| 46 | TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) { |
Alp Toker | 1508c82 | 2014-07-06 10:33:31 +0000 | [diff] [blame] | 47 | CurBuffer = SrcMgr.getMainFileID(); |
Rafael Espindola | 245fbdf | 2014-07-06 14:24:03 +0000 | [diff] [blame] | 48 | CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); |
| 49 | CurPtr = CurBuf.begin(); |
Craig Topper | 8a0d1c8 | 2014-04-09 04:50:04 +0000 | [diff] [blame] | 50 | TokStart = nullptr; |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 51 | |
| 52 | // Pretend that we enter the "top-level" include file. |
| 53 | PrepIncludeStack.push_back( |
| 54 | make_unique<std::vector<PreprocessorControlDesc>>()); |
| 55 | |
| 56 | // Put all macros defined in the command line into the DefinedMacros set. |
| 57 | std::for_each(Macros.begin(), Macros.end(), |
| 58 | [this](const std::string &MacroName) { |
| 59 | DefinedMacros.insert(MacroName); |
| 60 | }); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 61 | } |
| 62 | |
Chris Lattner | 1e3a8a4 | 2009-06-21 03:39:35 +0000 | [diff] [blame] | 63 | SMLoc TGLexer::getLoc() const { |
| 64 | return SMLoc::getFromPointer(TokStart); |
Chris Lattner | 1c8ae59 | 2009-03-13 16:01:53 +0000 | [diff] [blame] | 65 | } |
| 66 | |
Chris Lattner | c8a9bbc | 2007-11-19 07:38:58 +0000 | [diff] [blame] | 67 | /// ReturnError - Set the error to the specified string at the specified |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 68 | /// location. This is defined to always return tgtok::Error. |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 69 | tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) { |
Chris Lattner | c8a9bbc | 2007-11-19 07:38:58 +0000 | [diff] [blame] | 70 | PrintError(Loc, Msg); |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 71 | return tgtok::Error; |
Chris Lattner | c8a9bbc | 2007-11-19 07:38:58 +0000 | [diff] [blame] | 72 | } |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 73 | |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 74 | tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { |
| 75 | return ReturnError(SMLoc::getFromPointer(Loc), Msg); |
| 76 | } |
| 77 | |
| 78 | bool TGLexer::processEOF() { |
| 79 | SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); |
| 80 | if (ParentIncludeLoc != SMLoc()) { |
| 81 | // If prepExitInclude() detects a problem with the preprocessing |
| 82 | // control stack, it will return false. Pretend that we reached |
| 83 | // the final EOF and stop lexing more tokens by returning false |
| 84 | // to LexToken(). |
| 85 | if (!prepExitInclude(false)) |
| 86 | return false; |
| 87 | |
| 88 | CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); |
| 89 | CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); |
| 90 | CurPtr = ParentIncludeLoc.getPointer(); |
| 91 | // Make sure TokStart points into the parent file's buffer. |
| 92 | // LexToken() assigns to it before calling getNextChar(), |
| 93 | // so it is pointing into the included file now. |
| 94 | TokStart = CurPtr; |
| 95 | return true; |
| 96 | } |
| 97 | |
| 98 | // Pretend that we exit the "top-level" include file. |
| 99 | // Note that in case of an error (e.g. control stack imbalance) |
| 100 | // the routine will issue a fatal error. |
| 101 | prepExitInclude(true); |
| 102 | return false; |
| 103 | } |
| 104 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 105 | int TGLexer::getNextChar() { |
| 106 | char CurChar = *CurPtr++; |
| 107 | switch (CurChar) { |
| 108 | default: |
Chris Lattner | c181918 | 2007-11-18 05:48:46 +0000 | [diff] [blame] | 109 | return (unsigned char)CurChar; |
Chris Lattner | aa739d2 | 2009-03-13 07:05:43 +0000 | [diff] [blame] | 110 | case 0: { |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 111 | // A nul character in the stream is either the end of the current buffer or |
| 112 | // a random nul in the file. Disambiguate that here. |
Rafael Espindola | 245fbdf | 2014-07-06 14:24:03 +0000 | [diff] [blame] | 113 | if (CurPtr-1 != CurBuf.end()) |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 114 | return 0; // Just whitespace. |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 115 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 116 | // Otherwise, return end of file. |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 117 | --CurPtr; // Another call to lex will return EOF again. |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 118 | return EOF; |
Chris Lattner | aa739d2 | 2009-03-13 07:05:43 +0000 | [diff] [blame] | 119 | } |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 120 | case '\n': |
| 121 | case '\r': |
| 122 | // Handle the newline character by ignoring it and incrementing the line |
| 123 | // count. However, be careful about 'dos style' files with \n\r in them. |
| 124 | // Only treat a \n\r or \r\n as a single line. |
| 125 | if ((*CurPtr == '\n' || (*CurPtr == '\r')) && |
| 126 | *CurPtr != CurChar) |
Chris Lattner | c181918 | 2007-11-18 05:48:46 +0000 | [diff] [blame] | 127 | ++CurPtr; // Eat the two char newline sequence. |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 128 | return '\n'; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 129 | } |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 130 | } |
| 131 | |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 132 | int TGLexer::peekNextChar(int Index) const { |
David Greene | a761f92 | 2011-10-19 13:03:35 +0000 | [diff] [blame] | 133 | return *(CurPtr + Index); |
| 134 | } |
| 135 | |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 136 | tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { |
Chris Lattner | 56a9fcf | 2007-11-19 07:43:52 +0000 | [diff] [blame] | 137 | TokStart = CurPtr; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 138 | // This always consumes at least one character. |
| 139 | int CurChar = getNextChar(); |
| 140 | |
| 141 | switch (CurChar) { |
| 142 | default: |
David Greene | d3d1cad | 2011-10-19 13:04:43 +0000 | [diff] [blame] | 143 | // Handle letters: [a-zA-Z_] |
| 144 | if (isalpha(CurChar) || CurChar == '_') |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 145 | return LexIdentifier(); |
David Greene | d3d1cad | 2011-10-19 13:04:43 +0000 | [diff] [blame] | 146 | |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 147 | // Unknown character, emit an error. |
| 148 | return ReturnError(TokStart, "Unexpected character"); |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 149 | case EOF: |
| 150 | // Lex next token, if we just left an include file. |
| 151 | // Note that leaving an include file means that the next |
| 152 | // symbol is located at the end of 'include "..."' |
| 153 | // construct, so LexToken() is called with default |
| 154 | // false parameter. |
| 155 | if (processEOF()) |
| 156 | return LexToken(); |
| 157 | |
| 158 | // Return EOF denoting the end of lexing. |
| 159 | return tgtok::Eof; |
| 160 | |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 161 | case ':': return tgtok::colon; |
| 162 | case ';': return tgtok::semi; |
| 163 | case '.': return tgtok::period; |
| 164 | case ',': return tgtok::comma; |
| 165 | case '<': return tgtok::less; |
| 166 | case '>': return tgtok::greater; |
| 167 | case ']': return tgtok::r_square; |
| 168 | case '{': return tgtok::l_brace; |
| 169 | case '}': return tgtok::r_brace; |
| 170 | case '(': return tgtok::l_paren; |
| 171 | case ')': return tgtok::r_paren; |
| 172 | case '=': return tgtok::equal; |
| 173 | case '?': return tgtok::question; |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 174 | case '#': |
| 175 | if (FileOrLineStart) { |
| 176 | tgtok::TokKind Kind = prepIsDirective(); |
| 177 | if (Kind != tgtok::Error) |
| 178 | return lexPreprocessor(Kind); |
| 179 | } |
| 180 | |
| 181 | return tgtok::paste; |
| 182 | |
| 183 | case '\r': |
| 184 | PrintFatalError("getNextChar() must never return '\r'"); |
| 185 | return tgtok::Error; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 186 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 187 | case 0: |
| 188 | case ' ': |
| 189 | case '\t': |
Vyacheslav Zakharin | cec2d66 | 2018-11-17 02:26:34 +0000 | [diff] [blame] | 190 | // Ignore whitespace. |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 191 | return LexToken(FileOrLineStart); |
| 192 | case '\n': |
| 193 | // Ignore whitespace, and identify the new line. |
| 194 | return LexToken(true); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 195 | case '/': |
| 196 | // If this is the start of a // comment, skip until the end of the line or |
| 197 | // the end of the buffer. |
| 198 | if (*CurPtr == '/') |
| 199 | SkipBCPLComment(); |
| 200 | else if (*CurPtr == '*') { |
| 201 | if (SkipCComment()) |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 202 | return tgtok::Error; |
| 203 | } else // Otherwise, this is an error. |
| 204 | return ReturnError(TokStart, "Unexpected character"); |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 205 | return LexToken(FileOrLineStart); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 206 | case '-': case '+': |
| 207 | case '0': case '1': case '2': case '3': case '4': case '5': case '6': |
David Greene | 7efe936 | 2011-10-19 13:03:39 +0000 | [diff] [blame] | 208 | case '7': case '8': case '9': { |
| 209 | int NextChar = 0; |
| 210 | if (isdigit(CurChar)) { |
| 211 | // Allow identifiers to start with a number if it is followed by |
| 212 | // an identifier. This can happen with paste operations like |
| 213 | // foo#8i. |
| 214 | int i = 0; |
| 215 | do { |
| 216 | NextChar = peekNextChar(i++); |
| 217 | } while (isdigit(NextChar)); |
| 218 | |
| 219 | if (NextChar == 'x' || NextChar == 'b') { |
| 220 | // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most |
| 221 | // likely a number. |
| 222 | int NextNextChar = peekNextChar(i); |
| 223 | switch (NextNextChar) { |
| 224 | default: |
| 225 | break; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 226 | case '0': case '1': |
David Greene | 7efe936 | 2011-10-19 13:03:39 +0000 | [diff] [blame] | 227 | if (NextChar == 'b') |
| 228 | return LexNumber(); |
Justin Bogner | 6673ea8 | 2016-08-17 05:10:15 +0000 | [diff] [blame] | 229 | LLVM_FALLTHROUGH; |
David Greene | 7efe936 | 2011-10-19 13:03:39 +0000 | [diff] [blame] | 230 | case '2': case '3': case '4': case '5': |
| 231 | case '6': case '7': case '8': case '9': |
| 232 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
| 233 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
| 234 | if (NextChar == 'x') |
| 235 | return LexNumber(); |
| 236 | break; |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | if (isalpha(NextChar) || NextChar == '_') |
| 242 | return LexIdentifier(); |
| 243 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 244 | return LexNumber(); |
David Greene | 7efe936 | 2011-10-19 13:03:39 +0000 | [diff] [blame] | 245 | } |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 246 | case '"': return LexString(); |
| 247 | case '$': return LexVarName(); |
| 248 | case '[': return LexBracket(); |
| 249 | case '!': return LexExclaim(); |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | /// LexString - Lex "[^"]*" |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 254 | tgtok::TokKind TGLexer::LexString() { |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 255 | const char *StrStart = CurPtr; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 256 | |
Chris Lattner | ea9f4df | 2009-03-13 21:03:27 +0000 | [diff] [blame] | 257 | CurStrVal = ""; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 258 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 259 | while (*CurPtr != '"') { |
| 260 | // If we hit the end of the buffer, report an error. |
Rafael Espindola | 245fbdf | 2014-07-06 14:24:03 +0000 | [diff] [blame] | 261 | if (*CurPtr == 0 && CurPtr == CurBuf.end()) |
Chris Lattner | c8a9bbc | 2007-11-19 07:38:58 +0000 | [diff] [blame] | 262 | return ReturnError(StrStart, "End of file in string literal"); |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 263 | |
Chris Lattner | c8a9bbc | 2007-11-19 07:38:58 +0000 | [diff] [blame] | 264 | if (*CurPtr == '\n' || *CurPtr == '\r') |
| 265 | return ReturnError(StrStart, "End of line in string literal"); |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 266 | |
Chris Lattner | ea9f4df | 2009-03-13 21:03:27 +0000 | [diff] [blame] | 267 | if (*CurPtr != '\\') { |
| 268 | CurStrVal += *CurPtr++; |
| 269 | continue; |
| 270 | } |
| 271 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 272 | ++CurPtr; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 273 | |
Chris Lattner | ea9f4df | 2009-03-13 21:03:27 +0000 | [diff] [blame] | 274 | switch (*CurPtr) { |
| 275 | case '\\': case '\'': case '"': |
| 276 | // These turn into their literal character. |
| 277 | CurStrVal += *CurPtr++; |
| 278 | break; |
Chris Lattner | e023bb6 | 2009-03-13 21:23:43 +0000 | [diff] [blame] | 279 | case 't': |
Chris Lattner | 7f3b28a | 2009-03-13 21:33:17 +0000 | [diff] [blame] | 280 | CurStrVal += '\t'; |
Chris Lattner | e023bb6 | 2009-03-13 21:23:43 +0000 | [diff] [blame] | 281 | ++CurPtr; |
| 282 | break; |
| 283 | case 'n': |
Chris Lattner | 7f3b28a | 2009-03-13 21:33:17 +0000 | [diff] [blame] | 284 | CurStrVal += '\n'; |
Chris Lattner | e023bb6 | 2009-03-13 21:23:43 +0000 | [diff] [blame] | 285 | ++CurPtr; |
| 286 | break; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 287 | |
Chris Lattner | ea9f4df | 2009-03-13 21:03:27 +0000 | [diff] [blame] | 288 | case '\n': |
| 289 | case '\r': |
| 290 | return ReturnError(CurPtr, "escaped newlines not supported in tblgen"); |
| 291 | |
| 292 | // If we hit the end of the buffer, report an error. |
| 293 | case '\0': |
Rafael Espindola | 245fbdf | 2014-07-06 14:24:03 +0000 | [diff] [blame] | 294 | if (CurPtr == CurBuf.end()) |
Chris Lattner | ea9f4df | 2009-03-13 21:03:27 +0000 | [diff] [blame] | 295 | return ReturnError(StrStart, "End of file in string literal"); |
Justin Bogner | 7d7a23e | 2016-08-17 20:30:52 +0000 | [diff] [blame] | 296 | LLVM_FALLTHROUGH; |
Chris Lattner | ea9f4df | 2009-03-13 21:03:27 +0000 | [diff] [blame] | 297 | default: |
| 298 | return ReturnError(CurPtr, "invalid escape in string literal"); |
| 299 | } |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 300 | } |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 301 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 302 | ++CurPtr; |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 303 | return tgtok::StrVal; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 304 | } |
| 305 | |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 306 | tgtok::TokKind TGLexer::LexVarName() { |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 307 | if (!isalpha(CurPtr[0]) && CurPtr[0] != '_') |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 308 | return ReturnError(TokStart, "Invalid variable name"); |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 309 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 310 | // Otherwise, we're ok, consume the rest of the characters. |
| 311 | const char *VarNameStart = CurPtr++; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 312 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 313 | while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') |
| 314 | ++CurPtr; |
| 315 | |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 316 | CurStrVal.assign(VarNameStart, CurPtr); |
| 317 | return tgtok::VarName; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 318 | } |
| 319 | |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 320 | tgtok::TokKind TGLexer::LexIdentifier() { |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 321 | // The first letter is [a-zA-Z_]. |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 322 | const char *IdentStart = TokStart; |
Benjamin Kramer | 37d42af | 2011-10-06 18:23:56 +0000 | [diff] [blame] | 323 | |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 324 | // Match the rest of the identifier regex: [0-9a-zA-Z_]* |
David Greene | d3d1cad | 2011-10-19 13:04:43 +0000 | [diff] [blame] | 325 | while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') |
Chris Lattner | c2b0875 | 2010-10-05 22:59:29 +0000 | [diff] [blame] | 326 | ++CurPtr; |
Benjamin Kramer | 37d42af | 2011-10-06 18:23:56 +0000 | [diff] [blame] | 327 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 328 | // Check to see if this identifier is a keyword. |
Benjamin Kramer | 37d42af | 2011-10-06 18:23:56 +0000 | [diff] [blame] | 329 | StringRef Str(IdentStart, CurPtr-IdentStart); |
| 330 | |
Benjamin Kramer | 37d42af | 2011-10-06 18:23:56 +0000 | [diff] [blame] | 331 | if (Str == "include") { |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 332 | if (LexInclude()) return tgtok::Error; |
| 333 | return Lex(); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 334 | } |
Benjamin Kramer | 37d42af | 2011-10-06 18:23:56 +0000 | [diff] [blame] | 335 | |
Benjamin Kramer | ee57318 | 2011-10-06 18:53:43 +0000 | [diff] [blame] | 336 | tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str) |
| 337 | .Case("int", tgtok::Int) |
| 338 | .Case("bit", tgtok::Bit) |
| 339 | .Case("bits", tgtok::Bits) |
| 340 | .Case("string", tgtok::String) |
| 341 | .Case("list", tgtok::List) |
| 342 | .Case("code", tgtok::Code) |
| 343 | .Case("dag", tgtok::Dag) |
| 344 | .Case("class", tgtok::Class) |
| 345 | .Case("def", tgtok::Def) |
David Greene | cebb4ee | 2012-02-22 16:09:41 +0000 | [diff] [blame] | 346 | .Case("foreach", tgtok::Foreach) |
Benjamin Kramer | ee57318 | 2011-10-06 18:53:43 +0000 | [diff] [blame] | 347 | .Case("defm", tgtok::Defm) |
Nicolai Haehnle | d66fa2a | 2018-03-09 12:24:42 +0000 | [diff] [blame] | 348 | .Case("defset", tgtok::Defset) |
Benjamin Kramer | ee57318 | 2011-10-06 18:53:43 +0000 | [diff] [blame] | 349 | .Case("multiclass", tgtok::MultiClass) |
| 350 | .Case("field", tgtok::Field) |
| 351 | .Case("let", tgtok::Let) |
| 352 | .Case("in", tgtok::In) |
| 353 | .Default(tgtok::Id); |
| 354 | |
| 355 | if (Kind == tgtok::Id) |
| 356 | CurStrVal.assign(Str.begin(), Str.end()); |
| 357 | return Kind; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 358 | } |
| 359 | |
| 360 | /// LexInclude - We just read the "include" token. Get the string token that |
| 361 | /// comes next and enter the include. |
| 362 | bool TGLexer::LexInclude() { |
| 363 | // The token after the include must be a string. |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 364 | tgtok::TokKind Tok = LexToken(); |
| 365 | if (Tok == tgtok::Error) return true; |
| 366 | if (Tok != tgtok::StrVal) { |
| 367 | PrintError(getLoc(), "Expected filename after include"); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 368 | return true; |
| 369 | } |
| 370 | |
| 371 | // Get the string. |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 372 | std::string Filename = CurStrVal; |
Joerg Sonnenberger | dd13790 | 2011-06-01 13:10:15 +0000 | [diff] [blame] | 373 | std::string IncludedFile; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 374 | |
Joerg Sonnenberger | dd13790 | 2011-06-01 13:10:15 +0000 | [diff] [blame] | 375 | CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr), |
| 376 | IncludedFile); |
Alp Toker | 1508c82 | 2014-07-06 10:33:31 +0000 | [diff] [blame] | 377 | if (!CurBuffer) { |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 378 | PrintError(getLoc(), "Could not find include file '" + Filename + "'"); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 379 | return true; |
| 380 | } |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 381 | |
Sean Silva | a170f52 | 2013-02-07 04:30:39 +0000 | [diff] [blame] | 382 | DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile); |
| 383 | if (Found != Dependencies.end()) { |
| 384 | PrintError(getLoc(), |
| 385 | "File '" + IncludedFile + "' has already been included."); |
| 386 | SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note, |
| 387 | "previously included here"); |
| 388 | return true; |
| 389 | } |
| 390 | Dependencies.insert(std::make_pair(IncludedFile, getLoc())); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 391 | // Save the line number and lex buffer of the includer. |
Rafael Espindola | 245fbdf | 2014-07-06 14:24:03 +0000 | [diff] [blame] | 392 | CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); |
| 393 | CurPtr = CurBuf.begin(); |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 394 | |
| 395 | PrepIncludeStack.push_back( |
| 396 | make_unique<std::vector<PreprocessorControlDesc>>()); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 397 | return false; |
| 398 | } |
| 399 | |
| 400 | void TGLexer::SkipBCPLComment() { |
| 401 | ++CurPtr; // skip the second slash. |
Eugene Zelenko | 9feaa97 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 402 | while (true) { |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 403 | switch (*CurPtr) { |
| 404 | case '\n': |
| 405 | case '\r': |
| 406 | return; // Newline is end of comment. |
| 407 | case 0: |
| 408 | // If this is the end of the buffer, end the comment. |
Rafael Espindola | 245fbdf | 2014-07-06 14:24:03 +0000 | [diff] [blame] | 409 | if (CurPtr == CurBuf.end()) |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 410 | return; |
| 411 | break; |
| 412 | } |
| 413 | // Otherwise, skip the character. |
| 414 | ++CurPtr; |
| 415 | } |
| 416 | } |
| 417 | |
| 418 | /// SkipCComment - This skips C-style /**/ comments. The only difference from C |
| 419 | /// is that we allow nesting. |
| 420 | bool TGLexer::SkipCComment() { |
| 421 | ++CurPtr; // skip the star. |
| 422 | unsigned CommentDepth = 1; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 423 | |
Eugene Zelenko | 9feaa97 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 424 | while (true) { |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 425 | int CurChar = getNextChar(); |
| 426 | switch (CurChar) { |
| 427 | case EOF: |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 428 | PrintError(TokStart, "Unterminated comment!"); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 429 | return true; |
| 430 | case '*': |
| 431 | // End of the comment? |
| 432 | if (CurPtr[0] != '/') break; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 433 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 434 | ++CurPtr; // End the */. |
| 435 | if (--CommentDepth == 0) |
| 436 | return false; |
| 437 | break; |
| 438 | case '/': |
| 439 | // Start of a nested comment? |
| 440 | if (CurPtr[0] != '*') break; |
| 441 | ++CurPtr; |
| 442 | ++CommentDepth; |
| 443 | break; |
| 444 | } |
| 445 | } |
| 446 | } |
| 447 | |
| 448 | /// LexNumber - Lex: |
| 449 | /// [-+]?[0-9]+ |
| 450 | /// 0x[0-9a-fA-F]+ |
| 451 | /// 0b[01]+ |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 452 | tgtok::TokKind TGLexer::LexNumber() { |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 453 | if (CurPtr[-1] == '0') { |
| 454 | if (CurPtr[0] == 'x') { |
| 455 | ++CurPtr; |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 456 | const char *NumStart = CurPtr; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 457 | while (isxdigit(CurPtr[0])) |
| 458 | ++CurPtr; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 459 | |
Chris Lattner | c8a9bbc | 2007-11-19 07:38:58 +0000 | [diff] [blame] | 460 | // Requires at least one hex digit. |
| 461 | if (CurPtr == NumStart) |
Chris Lattner | 4226bb0 | 2009-06-21 19:22:49 +0000 | [diff] [blame] | 462 | return ReturnError(TokStart, "Invalid hexadecimal number"); |
Chris Lattner | c8a9bbc | 2007-11-19 07:38:58 +0000 | [diff] [blame] | 463 | |
Dan Gohman | 63f9720 | 2008-10-17 01:33:43 +0000 | [diff] [blame] | 464 | errno = 0; |
Craig Topper | 8a0d1c8 | 2014-04-09 04:50:04 +0000 | [diff] [blame] | 465 | CurIntVal = strtoll(NumStart, nullptr, 16); |
Dan Gohman | 63f9720 | 2008-10-17 01:33:43 +0000 | [diff] [blame] | 466 | if (errno == EINVAL) |
Chris Lattner | 4226bb0 | 2009-06-21 19:22:49 +0000 | [diff] [blame] | 467 | return ReturnError(TokStart, "Invalid hexadecimal number"); |
Dan Gohman | 63f9720 | 2008-10-17 01:33:43 +0000 | [diff] [blame] | 468 | if (errno == ERANGE) { |
| 469 | errno = 0; |
Craig Topper | 8a0d1c8 | 2014-04-09 04:50:04 +0000 | [diff] [blame] | 470 | CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16); |
Dan Gohman | 63f9720 | 2008-10-17 01:33:43 +0000 | [diff] [blame] | 471 | if (errno == EINVAL) |
Chris Lattner | 4226bb0 | 2009-06-21 19:22:49 +0000 | [diff] [blame] | 472 | return ReturnError(TokStart, "Invalid hexadecimal number"); |
Dan Gohman | 63f9720 | 2008-10-17 01:33:43 +0000 | [diff] [blame] | 473 | if (errno == ERANGE) |
Chris Lattner | 4226bb0 | 2009-06-21 19:22:49 +0000 | [diff] [blame] | 474 | return ReturnError(TokStart, "Hexadecimal number out of range"); |
Dan Gohman | 63f9720 | 2008-10-17 01:33:43 +0000 | [diff] [blame] | 475 | } |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 476 | return tgtok::IntVal; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 477 | } else if (CurPtr[0] == 'b') { |
| 478 | ++CurPtr; |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 479 | const char *NumStart = CurPtr; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 480 | while (CurPtr[0] == '0' || CurPtr[0] == '1') |
| 481 | ++CurPtr; |
Chris Lattner | c8a9bbc | 2007-11-19 07:38:58 +0000 | [diff] [blame] | 482 | |
| 483 | // Requires at least one binary digit. |
| 484 | if (CurPtr == NumStart) |
| 485 | return ReturnError(CurPtr-2, "Invalid binary number"); |
Craig Topper | 8a0d1c8 | 2014-04-09 04:50:04 +0000 | [diff] [blame] | 486 | CurIntVal = strtoll(NumStart, nullptr, 2); |
Pete Cooper | 42c1227 | 2014-08-07 05:47:00 +0000 | [diff] [blame] | 487 | return tgtok::BinaryIntVal; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 488 | } |
| 489 | } |
| 490 | |
| 491 | // Check for a sign without a digit. |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 492 | if (!isdigit(CurPtr[0])) { |
| 493 | if (CurPtr[-1] == '-') |
| 494 | return tgtok::minus; |
| 495 | else if (CurPtr[-1] == '+') |
| 496 | return tgtok::plus; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 497 | } |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 498 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 499 | while (isdigit(CurPtr[0])) |
| 500 | ++CurPtr; |
Craig Topper | 8a0d1c8 | 2014-04-09 04:50:04 +0000 | [diff] [blame] | 501 | CurIntVal = strtoll(TokStart, nullptr, 10); |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 502 | return tgtok::IntVal; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 503 | } |
| 504 | |
| 505 | /// LexBracket - We just read '['. If this is a code block, return it, |
| 506 | /// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]' |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 507 | tgtok::TokKind TGLexer::LexBracket() { |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 508 | if (CurPtr[0] != '{') |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 509 | return tgtok::l_square; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 510 | ++CurPtr; |
| 511 | const char *CodeStart = CurPtr; |
Eugene Zelenko | 9feaa97 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 512 | while (true) { |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 513 | int Char = getNextChar(); |
| 514 | if (Char == EOF) break; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 515 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 516 | if (Char != '}') continue; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 517 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 518 | Char = getNextChar(); |
| 519 | if (Char == EOF) break; |
| 520 | if (Char == ']') { |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 521 | CurStrVal.assign(CodeStart, CurPtr-2); |
| 522 | return tgtok::CodeFragment; |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 523 | } |
| 524 | } |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 525 | |
Chris Lattner | c8a9bbc | 2007-11-19 07:38:58 +0000 | [diff] [blame] | 526 | return ReturnError(CodeStart-2, "Unterminated Code Block"); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 527 | } |
| 528 | |
| 529 | /// LexExclaim - Lex '!' and '![a-zA-Z]+'. |
Chris Lattner | f460165 | 2007-11-22 20:49:04 +0000 | [diff] [blame] | 530 | tgtok::TokKind TGLexer::LexExclaim() { |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 531 | if (!isalpha(*CurPtr)) |
Bill Wendling | dd2b6cb | 2010-12-08 13:03:15 +0000 | [diff] [blame] | 532 | return ReturnError(CurPtr - 1, "Invalid \"!operator\""); |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 533 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 534 | const char *Start = CurPtr++; |
| 535 | while (isalpha(*CurPtr)) |
| 536 | ++CurPtr; |
Nicolai Haehnle | 9ae21b3 | 2018-03-09 18:32:04 +0000 | [diff] [blame] | 537 | |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 538 | // Check to see which operator this is. |
Bill Wendling | cd466f5 | 2010-12-08 20:02:49 +0000 | [diff] [blame] | 539 | tgtok::TokKind Kind = |
| 540 | StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start)) |
| 541 | .Case("eq", tgtok::XEq) |
Nicolai Haehnle | af0de50 | 2018-03-14 11:00:57 +0000 | [diff] [blame] | 542 | .Case("ne", tgtok::XNe) |
| 543 | .Case("le", tgtok::XLe) |
| 544 | .Case("lt", tgtok::XLt) |
| 545 | .Case("ge", tgtok::XGe) |
| 546 | .Case("gt", tgtok::XGt) |
Bill Wendling | cd466f5 | 2010-12-08 20:02:49 +0000 | [diff] [blame] | 547 | .Case("if", tgtok::XIf) |
Nicolai Haehnle | a2472db | 2018-03-09 12:24:06 +0000 | [diff] [blame] | 548 | .Case("isa", tgtok::XIsA) |
David Greene | 1434f66 | 2011-01-07 17:05:37 +0000 | [diff] [blame] | 549 | .Case("head", tgtok::XHead) |
| 550 | .Case("tail", tgtok::XTail) |
Nicolai Haehnle | c343502 | 2018-02-23 10:46:07 +0000 | [diff] [blame] | 551 | .Case("size", tgtok::XSize) |
Bill Wendling | cd466f5 | 2010-12-08 20:02:49 +0000 | [diff] [blame] | 552 | .Case("con", tgtok::XConcat) |
Nicolai Haehnle | 2318764 | 2018-03-14 11:00:26 +0000 | [diff] [blame] | 553 | .Case("dag", tgtok::XDag) |
Hal Finkel | d23a41c | 2013-01-25 14:49:08 +0000 | [diff] [blame] | 554 | .Case("add", tgtok::XADD) |
Joerg Sonnenberger | c754b57 | 2014-08-05 09:43:25 +0000 | [diff] [blame] | 555 | .Case("and", tgtok::XAND) |
Matt Arsenault | ee23318 | 2016-11-15 06:49:28 +0000 | [diff] [blame] | 556 | .Case("or", tgtok::XOR) |
Bill Wendling | cd466f5 | 2010-12-08 20:02:49 +0000 | [diff] [blame] | 557 | .Case("shl", tgtok::XSHL) |
| 558 | .Case("sra", tgtok::XSRA) |
| 559 | .Case("srl", tgtok::XSRL) |
| 560 | .Case("cast", tgtok::XCast) |
David Greene | 1434f66 | 2011-01-07 17:05:37 +0000 | [diff] [blame] | 561 | .Case("empty", tgtok::XEmpty) |
Bill Wendling | cd466f5 | 2010-12-08 20:02:49 +0000 | [diff] [blame] | 562 | .Case("subst", tgtok::XSubst) |
Nicolai Haehnle | 8498a49 | 2018-03-06 13:49:16 +0000 | [diff] [blame] | 563 | .Case("foldl", tgtok::XFoldl) |
Bill Wendling | cd466f5 | 2010-12-08 20:02:49 +0000 | [diff] [blame] | 564 | .Case("foreach", tgtok::XForEach) |
Daniel Sanders | d80222a | 2014-05-07 10:13:19 +0000 | [diff] [blame] | 565 | .Case("listconcat", tgtok::XListConcat) |
Bill Wendling | cd466f5 | 2010-12-08 20:02:49 +0000 | [diff] [blame] | 566 | .Case("strconcat", tgtok::XStrConcat) |
| 567 | .Default(tgtok::Error); |
David Greene | d418c1b | 2009-05-14 20:54:48 +0000 | [diff] [blame] | 568 | |
Bill Wendling | cd466f5 | 2010-12-08 20:02:49 +0000 | [diff] [blame] | 569 | return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); |
Chris Lattner | a805874 | 2007-11-18 02:57:27 +0000 | [diff] [blame] | 570 | } |
Vyacheslav Zakharin | 6c99d2b | 2018-11-27 18:57:43 +0000 | [diff] [blame] | 571 | |
| 572 | bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { |
| 573 | // Report an error, if preprocessor control stack for the current |
| 574 | // file is not empty. |
| 575 | if (!PrepIncludeStack.back()->empty()) { |
| 576 | prepReportPreprocessorStackError(); |
| 577 | |
| 578 | return false; |
| 579 | } |
| 580 | |
| 581 | // Pop the preprocessing controls from the include stack. |
| 582 | if (PrepIncludeStack.empty()) { |
| 583 | PrintFatalError("Preprocessor include stack is empty"); |
| 584 | } |
| 585 | |
| 586 | PrepIncludeStack.pop_back(); |
| 587 | |
| 588 | if (IncludeStackMustBeEmpty) { |
| 589 | if (!PrepIncludeStack.empty()) |
| 590 | PrintFatalError("Preprocessor include stack is not empty"); |
| 591 | } else { |
| 592 | if (PrepIncludeStack.empty()) |
| 593 | PrintFatalError("Preprocessor include stack is empty"); |
| 594 | } |
| 595 | |
| 596 | return true; |
| 597 | } |
| 598 | |
| 599 | tgtok::TokKind TGLexer::prepIsDirective() const { |
| 600 | for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) { |
| 601 | int NextChar = *CurPtr; |
| 602 | bool Match = true; |
| 603 | unsigned I = 0; |
| 604 | for (; I < strlen(PreprocessorDirs[ID].Word); ++I) { |
| 605 | if (NextChar != PreprocessorDirs[ID].Word[I]) { |
| 606 | Match = false; |
| 607 | break; |
| 608 | } |
| 609 | |
| 610 | NextChar = peekNextChar(I + 1); |
| 611 | } |
| 612 | |
| 613 | // Check for whitespace after the directive. If there is no whitespace, |
| 614 | // then we do not recognize it as a preprocessing directive. |
| 615 | if (Match) { |
| 616 | tgtok::TokKind Kind = PreprocessorDirs[ID].Kind; |
| 617 | |
| 618 | // New line and EOF may follow only #else/#endif. It will be reported |
| 619 | // as an error for #ifdef/#define after the call to prepLexMacroName(). |
| 620 | if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF || |
| 621 | NextChar == '\n' || |
| 622 | // It looks like TableGen does not support '\r' as the actual |
| 623 | // carriage return, e.g. getNextChar() treats a single '\r' |
| 624 | // as '\n'. So we do the same here. |
| 625 | NextChar == '\r') |
| 626 | return Kind; |
| 627 | |
| 628 | // Allow comments after some directives, e.g.: |
| 629 | // #else// OR #else/**/ |
| 630 | // #endif// OR #endif/**/ |
| 631 | // |
| 632 | // Note that we do allow comments after #ifdef/#define here, e.g. |
| 633 | // #ifdef/**/ AND #ifdef// |
| 634 | // #define/**/ AND #define// |
| 635 | // |
| 636 | // These cases will be reported as incorrect after calling |
| 637 | // prepLexMacroName(). We could have supported C-style comments |
| 638 | // after #ifdef/#define, but this would complicate the code |
| 639 | // for little benefit. |
| 640 | if (NextChar == '/') { |
| 641 | NextChar = peekNextChar(I + 1); |
| 642 | |
| 643 | if (NextChar == '*' || NextChar == '/') |
| 644 | return Kind; |
| 645 | |
| 646 | // Pretend that we do not recognize the directive. |
| 647 | } |
| 648 | } |
| 649 | } |
| 650 | |
| 651 | return tgtok::Error; |
| 652 | } |
| 653 | |
| 654 | bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) { |
| 655 | TokStart = CurPtr; |
| 656 | |
| 657 | for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) |
| 658 | if (PreprocessorDirs[ID].Kind == Kind) { |
| 659 | // Advance CurPtr to the end of the preprocessing word. |
| 660 | CurPtr += strlen(PreprocessorDirs[ID].Word); |
| 661 | return true; |
| 662 | } |
| 663 | |
| 664 | PrintFatalError("Unsupported preprocessing token in " |
| 665 | "prepEatPreprocessorDirective()"); |
| 666 | return false; |
| 667 | } |
| 668 | |
| 669 | tgtok::TokKind TGLexer::lexPreprocessor( |
| 670 | tgtok::TokKind Kind, bool ReturnNextLiveToken) { |
| 671 | |
| 672 | // We must be looking at a preprocessing directive. Eat it! |
| 673 | if (!prepEatPreprocessorDirective(Kind)) |
| 674 | PrintFatalError("lexPreprocessor() called for unknown " |
| 675 | "preprocessor directive"); |
| 676 | |
| 677 | if (Kind == tgtok::Ifdef) { |
| 678 | StringRef MacroName = prepLexMacroName(); |
| 679 | if (MacroName.empty()) |
| 680 | return ReturnError(TokStart, "Expected macro name after #ifdef"); |
| 681 | |
| 682 | bool MacroIsDefined = DefinedMacros.count(MacroName) != 0; |
| 683 | |
| 684 | // Regardless of whether we are processing tokens or not, |
| 685 | // we put the #ifdef control on stack. |
| 686 | PrepIncludeStack.back()->push_back( |
| 687 | {Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)}); |
| 688 | |
| 689 | if (!prepSkipDirectiveEnd()) |
| 690 | return ReturnError(CurPtr, |
| 691 | "Only comments are supported after #ifdef NAME"); |
| 692 | |
| 693 | // If we were not processing tokens before this #ifdef, |
| 694 | // then just return back to the lines skipping code. |
| 695 | if (!ReturnNextLiveToken) |
| 696 | return Kind; |
| 697 | |
| 698 | // If we were processing tokens before this #ifdef, |
| 699 | // and the macro is defined, then just return the next token. |
| 700 | if (MacroIsDefined) |
| 701 | return LexToken(); |
| 702 | |
| 703 | // We were processing tokens before this #ifdef, and the macro |
| 704 | // is not defined, so we have to start skipping the lines. |
| 705 | // If the skipping is successful, it will return the token following |
| 706 | // either #else or #endif corresponding to this #ifdef. |
| 707 | if (prepSkipRegion(ReturnNextLiveToken)) |
| 708 | return LexToken(); |
| 709 | |
| 710 | return tgtok::Error; |
| 711 | } else if (Kind == tgtok::Else) { |
| 712 | // Check if this #else is correct before calling prepSkipDirectiveEnd(), |
| 713 | // which will move CurPtr away from the beginning of #else. |
| 714 | if (PrepIncludeStack.back()->empty()) |
| 715 | return ReturnError(TokStart, "#else without #ifdef"); |
| 716 | |
| 717 | PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back(); |
| 718 | |
| 719 | if (IfdefEntry.Kind != tgtok::Ifdef) { |
| 720 | PrintError(TokStart, "double #else"); |
| 721 | return ReturnError(IfdefEntry.SrcPos, "Previous #else is here"); |
| 722 | } |
| 723 | |
| 724 | // Replace the corresponding #ifdef's control with its negation |
| 725 | // on the control stack. |
| 726 | PrepIncludeStack.back()->pop_back(); |
| 727 | PrepIncludeStack.back()->push_back( |
| 728 | {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)}); |
| 729 | |
| 730 | if (!prepSkipDirectiveEnd()) |
| 731 | return ReturnError(CurPtr, "Only comments are supported after #else"); |
| 732 | |
| 733 | // If we were processing tokens before this #else, |
| 734 | // we have to start skipping lines until the matching #endif. |
| 735 | if (ReturnNextLiveToken) { |
| 736 | if (prepSkipRegion(ReturnNextLiveToken)) |
| 737 | return LexToken(); |
| 738 | |
| 739 | return tgtok::Error; |
| 740 | } |
| 741 | |
| 742 | // Return to the lines skipping code. |
| 743 | return Kind; |
| 744 | } else if (Kind == tgtok::Endif) { |
| 745 | // Check if this #endif is correct before calling prepSkipDirectiveEnd(), |
| 746 | // which will move CurPtr away from the beginning of #endif. |
| 747 | if (PrepIncludeStack.back()->empty()) |
| 748 | return ReturnError(TokStart, "#endif without #ifdef"); |
| 749 | |
| 750 | auto &IfdefOrElseEntry = PrepIncludeStack.back()->back(); |
| 751 | |
| 752 | if (IfdefOrElseEntry.Kind != tgtok::Ifdef && |
| 753 | IfdefOrElseEntry.Kind != tgtok::Else) { |
| 754 | PrintFatalError("Invalid preprocessor control on the stack"); |
| 755 | return tgtok::Error; |
| 756 | } |
| 757 | |
| 758 | if (!prepSkipDirectiveEnd()) |
| 759 | return ReturnError(CurPtr, "Only comments are supported after #endif"); |
| 760 | |
| 761 | PrepIncludeStack.back()->pop_back(); |
| 762 | |
| 763 | // If we were processing tokens before this #endif, then |
| 764 | // we should continue it. |
| 765 | if (ReturnNextLiveToken) { |
| 766 | return LexToken(); |
| 767 | } |
| 768 | |
| 769 | // Return to the lines skipping code. |
| 770 | return Kind; |
| 771 | } else if (Kind == tgtok::Define) { |
| 772 | StringRef MacroName = prepLexMacroName(); |
| 773 | if (MacroName.empty()) |
| 774 | return ReturnError(TokStart, "Expected macro name after #define"); |
| 775 | |
| 776 | if (!DefinedMacros.insert(MacroName).second) |
| 777 | PrintWarning(getLoc(), |
| 778 | "Duplicate definition of macro: " + Twine(MacroName)); |
| 779 | |
| 780 | if (!prepSkipDirectiveEnd()) |
| 781 | return ReturnError(CurPtr, |
| 782 | "Only comments are supported after #define NAME"); |
| 783 | |
| 784 | if (!ReturnNextLiveToken) { |
| 785 | PrintFatalError("#define must be ignored during the lines skipping"); |
| 786 | return tgtok::Error; |
| 787 | } |
| 788 | |
| 789 | return LexToken(); |
| 790 | } |
| 791 | |
| 792 | PrintFatalError("Preprocessing directive is not supported"); |
| 793 | return tgtok::Error; |
| 794 | } |
| 795 | |
| 796 | bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { |
| 797 | if (!MustNeverBeFalse) |
| 798 | PrintFatalError("Invalid recursion."); |
| 799 | |
| 800 | do { |
| 801 | // Skip all symbols to the line end. |
| 802 | prepSkipToLineEnd(); |
| 803 | |
| 804 | // Find the first non-whitespace symbol in the next line(s). |
| 805 | if (!prepSkipLineBegin()) |
| 806 | return false; |
| 807 | |
| 808 | // If the first non-blank/comment symbol on the line is '#', |
| 809 | // it may be a start of preprocessing directive. |
| 810 | // |
| 811 | // If it is not '#' just go to the next line. |
| 812 | if (*CurPtr == '#') |
| 813 | ++CurPtr; |
| 814 | else |
| 815 | continue; |
| 816 | |
| 817 | tgtok::TokKind Kind = prepIsDirective(); |
| 818 | |
| 819 | // If we did not find a preprocessing directive or it is #define, |
| 820 | // then just skip to the next line. We do not have to do anything |
| 821 | // for #define in the line-skipping mode. |
| 822 | if (Kind == tgtok::Error || Kind == tgtok::Define) |
| 823 | continue; |
| 824 | |
| 825 | tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false); |
| 826 | |
| 827 | // If lexPreprocessor() encountered an error during lexing this |
| 828 | // preprocessor idiom, then return false to the calling lexPreprocessor(). |
| 829 | // This will force tgtok::Error to be returned to the tokens processing. |
| 830 | if (ProcessedKind == tgtok::Error) |
| 831 | return false; |
| 832 | |
| 833 | if (Kind != ProcessedKind) |
| 834 | PrintFatalError("prepIsDirective() and lexPreprocessor() " |
| 835 | "returned different token kinds"); |
| 836 | |
| 837 | // If this preprocessing directive enables tokens processing, |
| 838 | // then return to the lexPreprocessor() and get to the next token. |
| 839 | // We can move from line-skipping mode to processing tokens only |
| 840 | // due to #else or #endif. |
| 841 | if (prepIsProcessingEnabled()) { |
| 842 | if (Kind != tgtok::Else && Kind != tgtok::Endif) { |
| 843 | PrintFatalError("Tokens processing was enabled by an unexpected " |
| 844 | "preprocessing directive"); |
| 845 | return false; |
| 846 | } |
| 847 | |
| 848 | return true; |
| 849 | } |
| 850 | } while (CurPtr != CurBuf.end()); |
| 851 | |
| 852 | // We have reached the end of the file, but never left the lines-skipping |
| 853 | // mode. This means there is no matching #endif. |
| 854 | prepReportPreprocessorStackError(); |
| 855 | return false; |
| 856 | } |
| 857 | |
| 858 | StringRef TGLexer::prepLexMacroName() { |
| 859 | // Skip whitespaces between the preprocessing directive and the macro name. |
| 860 | while (*CurPtr == ' ' || *CurPtr == '\t') |
| 861 | ++CurPtr; |
| 862 | |
| 863 | TokStart = CurPtr; |
| 864 | // Macro names start with [a-zA-Z_]. |
| 865 | if (*CurPtr != '_' && !isalpha(*CurPtr)) |
| 866 | return ""; |
| 867 | |
| 868 | // Match the rest of the identifier regex: [0-9a-zA-Z_]* |
| 869 | while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') |
| 870 | ++CurPtr; |
| 871 | |
| 872 | return StringRef(TokStart, CurPtr - TokStart); |
| 873 | } |
| 874 | |
| 875 | bool TGLexer::prepSkipLineBegin() { |
| 876 | while (CurPtr != CurBuf.end()) { |
| 877 | switch (*CurPtr) { |
| 878 | case ' ': |
| 879 | case '\t': |
| 880 | case '\n': |
| 881 | case '\r': |
| 882 | break; |
| 883 | |
| 884 | case '/': { |
| 885 | int NextChar = peekNextChar(1); |
| 886 | if (NextChar == '*') { |
| 887 | // Skip C-style comment. |
| 888 | // Note that we do not care about skipping the C++-style comments. |
| 889 | // If the line contains "//", it may not contain any processable |
| 890 | // preprocessing directive. Just return CurPtr pointing to |
| 891 | // the first '/' in this case. We also do not care about |
| 892 | // incorrect symbols after the first '/' - we are in lines-skipping |
| 893 | // mode, so incorrect code is allowed to some extent. |
| 894 | |
| 895 | // Set TokStart to the beginning of the comment to enable proper |
| 896 | // diagnostic printing in case of error in SkipCComment(). |
| 897 | TokStart = CurPtr; |
| 898 | |
| 899 | // CurPtr must point to '*' before call to SkipCComment(). |
| 900 | ++CurPtr; |
| 901 | if (SkipCComment()) |
| 902 | return false; |
| 903 | } else { |
| 904 | // CurPtr points to the non-whitespace '/'. |
| 905 | return true; |
| 906 | } |
| 907 | |
| 908 | // We must not increment CurPtr after the comment was lexed. |
| 909 | continue; |
| 910 | } |
| 911 | |
| 912 | default: |
| 913 | return true; |
| 914 | } |
| 915 | |
| 916 | ++CurPtr; |
| 917 | } |
| 918 | |
| 919 | // We have reached the end of the file. Return to the lines skipping |
| 920 | // code, and allow it to handle the EOF as needed. |
| 921 | return true; |
| 922 | } |
| 923 | |
| 924 | bool TGLexer::prepSkipDirectiveEnd() { |
| 925 | while (CurPtr != CurBuf.end()) { |
| 926 | switch (*CurPtr) { |
| 927 | case ' ': |
| 928 | case '\t': |
| 929 | break; |
| 930 | |
| 931 | case '\n': |
| 932 | case '\r': |
| 933 | return true; |
| 934 | |
| 935 | case '/': { |
| 936 | int NextChar = peekNextChar(1); |
| 937 | if (NextChar == '/') { |
| 938 | // Skip C++-style comment. |
| 939 | // We may just return true now, but let's skip to the line/buffer end |
| 940 | // to simplify the method specification. |
| 941 | ++CurPtr; |
| 942 | SkipBCPLComment(); |
| 943 | } else if (NextChar == '*') { |
| 944 | // When we are skipping C-style comment at the end of a preprocessing |
| 945 | // directive, we can skip several lines. If any meaningful TD token |
| 946 | // follows the end of the C-style comment on the same line, it will |
| 947 | // be considered as an invalid usage of TD token. |
| 948 | // For example, we want to forbid usages like this one: |
| 949 | // #define MACRO class Class {} |
| 950 | // But with C-style comments we also disallow the following: |
| 951 | // #define MACRO /* This macro is used |
| 952 | // to ... */ class Class {} |
| 953 | // One can argue that this should be allowed, but it does not seem |
| 954 | // to be worth of the complication. Moreover, this matches |
| 955 | // the C preprocessor behavior. |
| 956 | |
| 957 | // Set TokStart to the beginning of the comment to enable proper |
| 958 | // diagnostic printer in case of error in SkipCComment(). |
| 959 | TokStart = CurPtr; |
| 960 | ++CurPtr; |
| 961 | if (SkipCComment()) |
| 962 | return false; |
| 963 | } else { |
| 964 | TokStart = CurPtr; |
| 965 | PrintError(CurPtr, "Unexpected character"); |
| 966 | return false; |
| 967 | } |
| 968 | |
| 969 | // We must not increment CurPtr after the comment was lexed. |
| 970 | continue; |
| 971 | } |
| 972 | |
| 973 | default: |
| 974 | // Do not allow any non-whitespaces after the directive. |
| 975 | TokStart = CurPtr; |
| 976 | return false; |
| 977 | } |
| 978 | |
| 979 | ++CurPtr; |
| 980 | } |
| 981 | |
| 982 | return true; |
| 983 | } |
| 984 | |
| 985 | void TGLexer::prepSkipToLineEnd() { |
| 986 | while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) |
| 987 | ++CurPtr; |
| 988 | } |
| 989 | |
| 990 | bool TGLexer::prepIsProcessingEnabled() { |
| 991 | for (auto I = PrepIncludeStack.back()->rbegin(), |
| 992 | E = PrepIncludeStack.back()->rend(); |
| 993 | I != E; ++I) { |
| 994 | if (!I->IsDefined) |
| 995 | return false; |
| 996 | } |
| 997 | |
| 998 | return true; |
| 999 | } |
| 1000 | |
| 1001 | void TGLexer::prepReportPreprocessorStackError() { |
| 1002 | if (PrepIncludeStack.back()->empty()) |
| 1003 | PrintFatalError("prepReportPreprocessorStackError() called with " |
| 1004 | "empty control stack"); |
| 1005 | |
| 1006 | auto &PrepControl = PrepIncludeStack.back()->back(); |
| 1007 | PrintError(CurBuf.end(), "Reached EOF without matching #endif"); |
| 1008 | PrintError(PrepControl.SrcPos, "The latest preprocessor control is here"); |
| 1009 | |
| 1010 | TokStart = CurPtr; |
| 1011 | } |