Blame - lib/TableGen/TGLexer.cpp - platform_external_llvm80

blob: 16aeee561075c54a2f103d8fc185305bdc1a55e6 [file] [log] [blame]

Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	1	//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
Chris Lattner	3060910	2007-12-29 20:37:13 +0000	[diff] [blame]	5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Implement the Lexer for TableGen.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
Chris Lattner	6aaca04	2007-11-18 05:25:45 +0000	[diff] [blame]	14	#include "TGLexer.h"
Bill Wendling	cd466f5	2010-12-08 20:02:49 +0000	[diff] [blame]	15	#include "llvm/ADT/StringSwitch.h"
				16	#include "llvm/ADT/Twine.h"
Chandler Carruth	d04a8d4	2012-12-03 16:50:05 +0000	[diff] [blame]	17	#include "llvm/Config/config.h" // for strtoull()/strtoll() define
Eugene Zelenko	9feaa97	2016-08-23 17:14:32 +0000	[diff] [blame]	18	#include "llvm/Support/Compiler.h"
Chandler Carruth	d04a8d4	2012-12-03 16:50:05 +0000	[diff] [blame]	19	#include "llvm/Support/MemoryBuffer.h"
				20	#include "llvm/Support/SourceMgr.h"
				21	#include "llvm/TableGen/Error.h"
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	22	#include <algorithm>
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	23	#include <cctype>
Chandler Carruth	d04a8d4	2012-12-03 16:50:05 +0000	[diff] [blame]	24	#include <cerrno>
Eugene Zelenko	9feaa97	2016-08-23 17:14:32 +0000	[diff] [blame]	25	#include <cstdint>
Duncan Sands	4520dd2	2008-10-08 07:23:46 +0000	[diff] [blame]	26	#include <cstdio>
Anton Korobeynikov	ae9f3a3	2008-02-20 11:08:44 +0000	[diff] [blame]	27	#include <cstdlib>
				28	#include <cstring>
Dylan Noblesmith	8cc300c	2011-12-22 23:08:39 +0000	[diff] [blame]	29
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	30	using namespace llvm;
				31
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	32	namespace {
				33	// A list of supported preprocessing directives with their
				34	// internal token kinds and names.
				35	struct {
				36	tgtok::TokKind Kind;
				37	const char *Word;
				38	} PreprocessorDirs[] = {
				39	{ tgtok::Ifdef, "ifdef" },
				40	{ tgtok::Else, "else" },
				41	{ tgtok::Endif, "endif" },
				42	{ tgtok::Define, "define" }
				43	};
				44	} // end anonymous namespace
				45
				46	TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
Alp Toker	1508c82	2014-07-06 10:33:31 +0000	[diff] [blame]	47	CurBuffer = SrcMgr.getMainFileID();
Rafael Espindola	245fbdf	2014-07-06 14:24:03 +0000	[diff] [blame]	48	CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
				49	CurPtr = CurBuf.begin();
Craig Topper	8a0d1c8	2014-04-09 04:50:04 +0000	[diff] [blame]	50	TokStart = nullptr;
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	51
				52	// Pretend that we enter the "top-level" include file.
				53	PrepIncludeStack.push_back(
				54	make_unique<std::vector<PreprocessorControlDesc>>());
				55
				56	// Put all macros defined in the command line into the DefinedMacros set.
				57	std::for_each(Macros.begin(), Macros.end(),
				58	[this](const std::string &MacroName) {
				59	DefinedMacros.insert(MacroName);
				60	});
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	61	}
				62
Chris Lattner	1e3a8a4	2009-06-21 03:39:35 +0000	[diff] [blame]	63	SMLoc TGLexer::getLoc() const {
				64	return SMLoc::getFromPointer(TokStart);
Chris Lattner	1c8ae59	2009-03-13 16:01:53 +0000	[diff] [blame]	65	}
				66
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	67	/// ReturnError - Set the error to the specified string at the specified
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	68	/// location. This is defined to always return tgtok::Error.
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	69	tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) {
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	70	PrintError(Loc, Msg);
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	71	return tgtok::Error;
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	72	}
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	73
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	74	tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
				75	return ReturnError(SMLoc::getFromPointer(Loc), Msg);
				76	}
				77
				78	bool TGLexer::processEOF() {
				79	SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
				80	if (ParentIncludeLoc != SMLoc()) {
				81	// If prepExitInclude() detects a problem with the preprocessing
				82	// control stack, it will return false. Pretend that we reached
				83	// the final EOF and stop lexing more tokens by returning false
				84	// to LexToken().
				85	if (!prepExitInclude(false))
				86	return false;
				87
				88	CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
				89	CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
				90	CurPtr = ParentIncludeLoc.getPointer();
				91	// Make sure TokStart points into the parent file's buffer.
				92	// LexToken() assigns to it before calling getNextChar(),
				93	// so it is pointing into the included file now.
				94	TokStart = CurPtr;
				95	return true;
				96	}
				97
				98	// Pretend that we exit the "top-level" include file.
				99	// Note that in case of an error (e.g. control stack imbalance)
				100	// the routine will issue a fatal error.
				101	prepExitInclude(true);
				102	return false;
				103	}
				104
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	105	int TGLexer::getNextChar() {
				106	char CurChar = *CurPtr++;
				107	switch (CurChar) {
				108	default:
Chris Lattner	c181918	2007-11-18 05:48:46 +0000	[diff] [blame]	109	return (unsigned char)CurChar;
Chris Lattner	aa739d2	2009-03-13 07:05:43 +0000	[diff] [blame]	110	case 0: {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	111	// A nul character in the stream is either the end of the current buffer or
				112	// a random nul in the file. Disambiguate that here.
Rafael Espindola	245fbdf	2014-07-06 14:24:03 +0000	[diff] [blame]	113	if (CurPtr-1 != CurBuf.end())
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	114	return 0; // Just whitespace.
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	115
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	116	// Otherwise, return end of file.
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	117	--CurPtr; // Another call to lex will return EOF again.
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	118	return EOF;
Chris Lattner	aa739d2	2009-03-13 07:05:43 +0000	[diff] [blame]	119	}
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	120	case '\n':
				121	case '\r':
				122	// Handle the newline character by ignoring it and incrementing the line
				123	// count. However, be careful about 'dos style' files with \n\r in them.
				124	// Only treat a \n\r or \r\n as a single line.
				125	if ((CurPtr == '\n' \|\| (CurPtr == '\r')) &&
				126	*CurPtr != CurChar)
Chris Lattner	c181918	2007-11-18 05:48:46 +0000	[diff] [blame]	127	++CurPtr; // Eat the two char newline sequence.
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	128	return '\n';
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	129	}
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	130	}
				131
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	132	int TGLexer::peekNextChar(int Index) const {
David Greene	a761f92	2011-10-19 13:03:35 +0000	[diff] [blame]	133	return *(CurPtr + Index);
				134	}
				135
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	136	tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
Chris Lattner	56a9fcf	2007-11-19 07:43:52 +0000	[diff] [blame]	137	TokStart = CurPtr;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	138	// This always consumes at least one character.
				139	int CurChar = getNextChar();
				140
				141	switch (CurChar) {
				142	default:
David Greene	d3d1cad	2011-10-19 13:04:43 +0000	[diff] [blame]	143	// Handle letters: [a-zA-Z_]
				144	if (isalpha(CurChar) \|\| CurChar == '_')
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	145	return LexIdentifier();
David Greene	d3d1cad	2011-10-19 13:04:43 +0000	[diff] [blame]	146
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	147	// Unknown character, emit an error.
				148	return ReturnError(TokStart, "Unexpected character");
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	149	case EOF:
				150	// Lex next token, if we just left an include file.
				151	// Note that leaving an include file means that the next
				152	// symbol is located at the end of 'include "..."'
				153	// construct, so LexToken() is called with default
				154	// false parameter.
				155	if (processEOF())
				156	return LexToken();
				157
				158	// Return EOF denoting the end of lexing.
				159	return tgtok::Eof;
				160
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	161	case ':': return tgtok::colon;
				162	case ';': return tgtok::semi;
				163	case '.': return tgtok::period;
				164	case ',': return tgtok::comma;
				165	case '<': return tgtok::less;
				166	case '>': return tgtok::greater;
				167	case ']': return tgtok::r_square;
				168	case '{': return tgtok::l_brace;
				169	case '}': return tgtok::r_brace;
				170	case '(': return tgtok::l_paren;
				171	case ')': return tgtok::r_paren;
				172	case '=': return tgtok::equal;
				173	case '?': return tgtok::question;
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	174	case '#':
				175	if (FileOrLineStart) {
				176	tgtok::TokKind Kind = prepIsDirective();
				177	if (Kind != tgtok::Error)
				178	return lexPreprocessor(Kind);
				179	}
				180
				181	return tgtok::paste;
				182
				183	case '\r':
				184	PrintFatalError("getNextChar() must never return '\r'");
				185	return tgtok::Error;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	186
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	187	case 0:
				188	case ' ':
				189	case '\t':
Vyacheslav Zakharin	cec2d66	2018-11-17 02:26:34 +0000	[diff] [blame]	190	// Ignore whitespace.
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	191	return LexToken(FileOrLineStart);
				192	case '\n':
				193	// Ignore whitespace, and identify the new line.
				194	return LexToken(true);
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	195	case '/':
				196	// If this is the start of a // comment, skip until the end of the line or
				197	// the end of the buffer.
				198	if (*CurPtr == '/')
				199	SkipBCPLComment();
				200	else if (CurPtr == '') {
				201	if (SkipCComment())
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	202	return tgtok::Error;
				203	} else // Otherwise, this is an error.
				204	return ReturnError(TokStart, "Unexpected character");
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	205	return LexToken(FileOrLineStart);
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	206	case '-': case '+':
				207	case '0': case '1': case '2': case '3': case '4': case '5': case '6':
David Greene	7efe936	2011-10-19 13:03:39 +0000	[diff] [blame]	208	case '7': case '8': case '9': {
				209	int NextChar = 0;
				210	if (isdigit(CurChar)) {
				211	// Allow identifiers to start with a number if it is followed by
				212	// an identifier. This can happen with paste operations like
				213	// foo#8i.
				214	int i = 0;
				215	do {
				216	NextChar = peekNextChar(i++);
				217	} while (isdigit(NextChar));
				218
				219	if (NextChar == 'x' \|\| NextChar == 'b') {
				220	// If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
				221	// likely a number.
				222	int NextNextChar = peekNextChar(i);
				223	switch (NextNextChar) {
				224	default:
				225	break;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	226	case '0': case '1':
David Greene	7efe936	2011-10-19 13:03:39 +0000	[diff] [blame]	227	if (NextChar == 'b')
				228	return LexNumber();
Justin Bogner	6673ea8	2016-08-17 05:10:15 +0000	[diff] [blame]	229	LLVM_FALLTHROUGH;
David Greene	7efe936	2011-10-19 13:03:39 +0000	[diff] [blame]	230	case '2': case '3': case '4': case '5':
				231	case '6': case '7': case '8': case '9':
				232	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
				233	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
				234	if (NextChar == 'x')
				235	return LexNumber();
				236	break;
				237	}
				238	}
				239	}
				240
				241	if (isalpha(NextChar) \|\| NextChar == '_')
				242	return LexIdentifier();
				243
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	244	return LexNumber();
David Greene	7efe936	2011-10-19 13:03:39 +0000	[diff] [blame]	245	}
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	246	case '"': return LexString();
				247	case '$': return LexVarName();
				248	case '[': return LexBracket();
				249	case '!': return LexExclaim();
				250	}
				251	}
				252
				253	/// LexString - Lex "[^"]*"
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	254	tgtok::TokKind TGLexer::LexString() {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	255	const char *StrStart = CurPtr;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	256
Chris Lattner	ea9f4df	2009-03-13 21:03:27 +0000	[diff] [blame]	257	CurStrVal = "";
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	258
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	259	while (*CurPtr != '"') {
				260	// If we hit the end of the buffer, report an error.
Rafael Espindola	245fbdf	2014-07-06 14:24:03 +0000	[diff] [blame]	261	if (*CurPtr == 0 && CurPtr == CurBuf.end())
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	262	return ReturnError(StrStart, "End of file in string literal");
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	263
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	264	if (CurPtr == '\n' \|\| CurPtr == '\r')
				265	return ReturnError(StrStart, "End of line in string literal");
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	266
Chris Lattner	ea9f4df	2009-03-13 21:03:27 +0000	[diff] [blame]	267	if (*CurPtr != '\\') {
				268	CurStrVal += *CurPtr++;
				269	continue;
				270	}
				271
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	272	++CurPtr;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	273
Chris Lattner	ea9f4df	2009-03-13 21:03:27 +0000	[diff] [blame]	274	switch (*CurPtr) {
				275	case '\\': case '\'': case '"':
				276	// These turn into their literal character.
				277	CurStrVal += *CurPtr++;
				278	break;
Chris Lattner	e023bb6	2009-03-13 21:23:43 +0000	[diff] [blame]	279	case 't':
Chris Lattner	7f3b28a	2009-03-13 21:33:17 +0000	[diff] [blame]	280	CurStrVal += '\t';
Chris Lattner	e023bb6	2009-03-13 21:23:43 +0000	[diff] [blame]	281	++CurPtr;
				282	break;
				283	case 'n':
Chris Lattner	7f3b28a	2009-03-13 21:33:17 +0000	[diff] [blame]	284	CurStrVal += '\n';
Chris Lattner	e023bb6	2009-03-13 21:23:43 +0000	[diff] [blame]	285	++CurPtr;
				286	break;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	287
Chris Lattner	ea9f4df	2009-03-13 21:03:27 +0000	[diff] [blame]	288	case '\n':
				289	case '\r':
				290	return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
				291
				292	// If we hit the end of the buffer, report an error.
				293	case '\0':
Rafael Espindola	245fbdf	2014-07-06 14:24:03 +0000	[diff] [blame]	294	if (CurPtr == CurBuf.end())
Chris Lattner	ea9f4df	2009-03-13 21:03:27 +0000	[diff] [blame]	295	return ReturnError(StrStart, "End of file in string literal");
Justin Bogner	7d7a23e	2016-08-17 20:30:52 +0000	[diff] [blame]	296	LLVM_FALLTHROUGH;
Chris Lattner	ea9f4df	2009-03-13 21:03:27 +0000	[diff] [blame]	297	default:
				298	return ReturnError(CurPtr, "invalid escape in string literal");
				299	}
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	300	}
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	301
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	302	++CurPtr;
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	303	return tgtok::StrVal;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	304	}
				305
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	306	tgtok::TokKind TGLexer::LexVarName() {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	307	if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	308	return ReturnError(TokStart, "Invalid variable name");
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	309
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	310	// Otherwise, we're ok, consume the rest of the characters.
				311	const char *VarNameStart = CurPtr++;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	312
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	313	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
				314	++CurPtr;
				315
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	316	CurStrVal.assign(VarNameStart, CurPtr);
				317	return tgtok::VarName;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	318	}
				319
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	320	tgtok::TokKind TGLexer::LexIdentifier() {
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	321	// The first letter is [a-zA-Z_].
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	322	const char *IdentStart = TokStart;
Benjamin Kramer	37d42af	2011-10-06 18:23:56 +0000	[diff] [blame]	323
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	324	// Match the rest of the identifier regex: [0-9a-zA-Z_]*
David Greene	d3d1cad	2011-10-19 13:04:43 +0000	[diff] [blame]	325	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
Chris Lattner	c2b0875	2010-10-05 22:59:29 +0000	[diff] [blame]	326	++CurPtr;
Benjamin Kramer	37d42af	2011-10-06 18:23:56 +0000	[diff] [blame]	327
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	328	// Check to see if this identifier is a keyword.
Benjamin Kramer	37d42af	2011-10-06 18:23:56 +0000	[diff] [blame]	329	StringRef Str(IdentStart, CurPtr-IdentStart);
				330
Benjamin Kramer	37d42af	2011-10-06 18:23:56 +0000	[diff] [blame]	331	if (Str == "include") {
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	332	if (LexInclude()) return tgtok::Error;
				333	return Lex();
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	334	}
Benjamin Kramer	37d42af	2011-10-06 18:23:56 +0000	[diff] [blame]	335
Benjamin Kramer	ee57318	2011-10-06 18:53:43 +0000	[diff] [blame]	336	tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
				337	.Case("int", tgtok::Int)
				338	.Case("bit", tgtok::Bit)
				339	.Case("bits", tgtok::Bits)
				340	.Case("string", tgtok::String)
				341	.Case("list", tgtok::List)
				342	.Case("code", tgtok::Code)
				343	.Case("dag", tgtok::Dag)
				344	.Case("class", tgtok::Class)
				345	.Case("def", tgtok::Def)
David Greene	cebb4ee	2012-02-22 16:09:41 +0000	[diff] [blame]	346	.Case("foreach", tgtok::Foreach)
Benjamin Kramer	ee57318	2011-10-06 18:53:43 +0000	[diff] [blame]	347	.Case("defm", tgtok::Defm)
Nicolai Haehnle	d66fa2a	2018-03-09 12:24:42 +0000	[diff] [blame]	348	.Case("defset", tgtok::Defset)
Benjamin Kramer	ee57318	2011-10-06 18:53:43 +0000	[diff] [blame]	349	.Case("multiclass", tgtok::MultiClass)
				350	.Case("field", tgtok::Field)
				351	.Case("let", tgtok::Let)
				352	.Case("in", tgtok::In)
				353	.Default(tgtok::Id);
				354
				355	if (Kind == tgtok::Id)
				356	CurStrVal.assign(Str.begin(), Str.end());
				357	return Kind;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	358	}
				359
				360	/// LexInclude - We just read the "include" token. Get the string token that
				361	/// comes next and enter the include.
				362	bool TGLexer::LexInclude() {
				363	// The token after the include must be a string.
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	364	tgtok::TokKind Tok = LexToken();
				365	if (Tok == tgtok::Error) return true;
				366	if (Tok != tgtok::StrVal) {
				367	PrintError(getLoc(), "Expected filename after include");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	368	return true;
				369	}
				370
				371	// Get the string.
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	372	std::string Filename = CurStrVal;
Joerg Sonnenberger	dd13790	2011-06-01 13:10:15 +0000	[diff] [blame]	373	std::string IncludedFile;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	374
Joerg Sonnenberger	dd13790	2011-06-01 13:10:15 +0000	[diff] [blame]	375	CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
				376	IncludedFile);
Alp Toker	1508c82	2014-07-06 10:33:31 +0000	[diff] [blame]	377	if (!CurBuffer) {
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	378	PrintError(getLoc(), "Could not find include file '" + Filename + "'");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	379	return true;
				380	}
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	381
Sean Silva	a170f52	2013-02-07 04:30:39 +0000	[diff] [blame]	382	DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile);
				383	if (Found != Dependencies.end()) {
				384	PrintError(getLoc(),
				385	"File '" + IncludedFile + "' has already been included.");
				386	SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note,
				387	"previously included here");
				388	return true;
				389	}
				390	Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	391	// Save the line number and lex buffer of the includer.
Rafael Espindola	245fbdf	2014-07-06 14:24:03 +0000	[diff] [blame]	392	CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
				393	CurPtr = CurBuf.begin();
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	394
				395	PrepIncludeStack.push_back(
				396	make_unique<std::vector<PreprocessorControlDesc>>());
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	397	return false;
				398	}
				399
				400	void TGLexer::SkipBCPLComment() {
				401	++CurPtr; // skip the second slash.
Eugene Zelenko	9feaa97	2016-08-23 17:14:32 +0000	[diff] [blame]	402	while (true) {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	403	switch (*CurPtr) {
				404	case '\n':
				405	case '\r':
				406	return; // Newline is end of comment.
				407	case 0:
				408	// If this is the end of the buffer, end the comment.
Rafael Espindola	245fbdf	2014-07-06 14:24:03 +0000	[diff] [blame]	409	if (CurPtr == CurBuf.end())
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	410	return;
				411	break;
				412	}
				413	// Otherwise, skip the character.
				414	++CurPtr;
				415	}
				416	}
				417
				418	/// SkipCComment - This skips C-style /**/ comments. The only difference from C
				419	/// is that we allow nesting.
				420	bool TGLexer::SkipCComment() {
				421	++CurPtr; // skip the star.
				422	unsigned CommentDepth = 1;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	423
Eugene Zelenko	9feaa97	2016-08-23 17:14:32 +0000	[diff] [blame]	424	while (true) {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	425	int CurChar = getNextChar();
				426	switch (CurChar) {
				427	case EOF:
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	428	PrintError(TokStart, "Unterminated comment!");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	429	return true;
				430	case '*':
				431	// End of the comment?
				432	if (CurPtr[0] != '/') break;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	433
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	434	++CurPtr; // End the */.
				435	if (--CommentDepth == 0)
				436	return false;
				437	break;
				438	case '/':
				439	// Start of a nested comment?
				440	if (CurPtr[0] != '*') break;
				441	++CurPtr;
				442	++CommentDepth;
				443	break;
				444	}
				445	}
				446	}
				447
				448	/// LexNumber - Lex:
				449	/// [-+]?[0-9]+
				450	/// 0x[0-9a-fA-F]+
				451	/// 0b[01]+
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	452	tgtok::TokKind TGLexer::LexNumber() {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	453	if (CurPtr[-1] == '0') {
				454	if (CurPtr[0] == 'x') {
				455	++CurPtr;
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	456	const char *NumStart = CurPtr;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	457	while (isxdigit(CurPtr[0]))
				458	++CurPtr;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	459
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	460	// Requires at least one hex digit.
				461	if (CurPtr == NumStart)
Chris Lattner	4226bb0	2009-06-21 19:22:49 +0000	[diff] [blame]	462	return ReturnError(TokStart, "Invalid hexadecimal number");
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	463
Dan Gohman	63f9720	2008-10-17 01:33:43 +0000	[diff] [blame]	464	errno = 0;
Craig Topper	8a0d1c8	2014-04-09 04:50:04 +0000	[diff] [blame]	465	CurIntVal = strtoll(NumStart, nullptr, 16);
Dan Gohman	63f9720	2008-10-17 01:33:43 +0000	[diff] [blame]	466	if (errno == EINVAL)
Chris Lattner	4226bb0	2009-06-21 19:22:49 +0000	[diff] [blame]	467	return ReturnError(TokStart, "Invalid hexadecimal number");
Dan Gohman	63f9720	2008-10-17 01:33:43 +0000	[diff] [blame]	468	if (errno == ERANGE) {
				469	errno = 0;
Craig Topper	8a0d1c8	2014-04-09 04:50:04 +0000	[diff] [blame]	470	CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
Dan Gohman	63f9720	2008-10-17 01:33:43 +0000	[diff] [blame]	471	if (errno == EINVAL)
Chris Lattner	4226bb0	2009-06-21 19:22:49 +0000	[diff] [blame]	472	return ReturnError(TokStart, "Invalid hexadecimal number");
Dan Gohman	63f9720	2008-10-17 01:33:43 +0000	[diff] [blame]	473	if (errno == ERANGE)
Chris Lattner	4226bb0	2009-06-21 19:22:49 +0000	[diff] [blame]	474	return ReturnError(TokStart, "Hexadecimal number out of range");
Dan Gohman	63f9720	2008-10-17 01:33:43 +0000	[diff] [blame]	475	}
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	476	return tgtok::IntVal;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	477	} else if (CurPtr[0] == 'b') {
				478	++CurPtr;
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	479	const char *NumStart = CurPtr;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	480	while (CurPtr[0] == '0' \|\| CurPtr[0] == '1')
				481	++CurPtr;
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	482
				483	// Requires at least one binary digit.
				484	if (CurPtr == NumStart)
				485	return ReturnError(CurPtr-2, "Invalid binary number");
Craig Topper	8a0d1c8	2014-04-09 04:50:04 +0000	[diff] [blame]	486	CurIntVal = strtoll(NumStart, nullptr, 2);
Pete Cooper	42c1227	2014-08-07 05:47:00 +0000	[diff] [blame]	487	return tgtok::BinaryIntVal;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	488	}
				489	}
				490
				491	// Check for a sign without a digit.
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	492	if (!isdigit(CurPtr[0])) {
				493	if (CurPtr[-1] == '-')
				494	return tgtok::minus;
				495	else if (CurPtr[-1] == '+')
				496	return tgtok::plus;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	497	}
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	498
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	499	while (isdigit(CurPtr[0]))
				500	++CurPtr;
Craig Topper	8a0d1c8	2014-04-09 04:50:04 +0000	[diff] [blame]	501	CurIntVal = strtoll(TokStart, nullptr, 10);
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	502	return tgtok::IntVal;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	503	}
				504
				505	/// LexBracket - We just read '['. If this is a code block, return it,
				506	/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ \| }[^]] )* }]'
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	507	tgtok::TokKind TGLexer::LexBracket() {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	508	if (CurPtr[0] != '{')
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	509	return tgtok::l_square;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	510	++CurPtr;
				511	const char *CodeStart = CurPtr;
Eugene Zelenko	9feaa97	2016-08-23 17:14:32 +0000	[diff] [blame]	512	while (true) {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	513	int Char = getNextChar();
				514	if (Char == EOF) break;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	515
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	516	if (Char != '}') continue;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	517
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	518	Char = getNextChar();
				519	if (Char == EOF) break;
				520	if (Char == ']') {
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	521	CurStrVal.assign(CodeStart, CurPtr-2);
				522	return tgtok::CodeFragment;
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	523	}
				524	}
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	525
Chris Lattner	c8a9bbc	2007-11-19 07:38:58 +0000	[diff] [blame]	526	return ReturnError(CodeStart-2, "Unterminated Code Block");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	527	}
				528
				529	/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
Chris Lattner	f460165	2007-11-22 20:49:04 +0000	[diff] [blame]	530	tgtok::TokKind TGLexer::LexExclaim() {
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	531	if (!isalpha(*CurPtr))
Bill Wendling	dd2b6cb	2010-12-08 13:03:15 +0000	[diff] [blame]	532	return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	533
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	534	const char *Start = CurPtr++;
				535	while (isalpha(*CurPtr))
				536	++CurPtr;
Nicolai Haehnle	9ae21b3	2018-03-09 18:32:04 +0000	[diff] [blame]	537
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	538	// Check to see which operator this is.
Bill Wendling	cd466f5	2010-12-08 20:02:49 +0000	[diff] [blame]	539	tgtok::TokKind Kind =
				540	StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
				541	.Case("eq", tgtok::XEq)
Nicolai Haehnle	af0de50	2018-03-14 11:00:57 +0000	[diff] [blame]	542	.Case("ne", tgtok::XNe)
				543	.Case("le", tgtok::XLe)
				544	.Case("lt", tgtok::XLt)
				545	.Case("ge", tgtok::XGe)
				546	.Case("gt", tgtok::XGt)
Bill Wendling	cd466f5	2010-12-08 20:02:49 +0000	[diff] [blame]	547	.Case("if", tgtok::XIf)
Nicolai Haehnle	a2472db	2018-03-09 12:24:06 +0000	[diff] [blame]	548	.Case("isa", tgtok::XIsA)
David Greene	1434f66	2011-01-07 17:05:37 +0000	[diff] [blame]	549	.Case("head", tgtok::XHead)
				550	.Case("tail", tgtok::XTail)
Nicolai Haehnle	c343502	2018-02-23 10:46:07 +0000	[diff] [blame]	551	.Case("size", tgtok::XSize)
Bill Wendling	cd466f5	2010-12-08 20:02:49 +0000	[diff] [blame]	552	.Case("con", tgtok::XConcat)
Nicolai Haehnle	2318764	2018-03-14 11:00:26 +0000	[diff] [blame]	553	.Case("dag", tgtok::XDag)
Hal Finkel	d23a41c	2013-01-25 14:49:08 +0000	[diff] [blame]	554	.Case("add", tgtok::XADD)
Joerg Sonnenberger	c754b57	2014-08-05 09:43:25 +0000	[diff] [blame]	555	.Case("and", tgtok::XAND)
Matt Arsenault	ee23318	2016-11-15 06:49:28 +0000	[diff] [blame]	556	.Case("or", tgtok::XOR)
Bill Wendling	cd466f5	2010-12-08 20:02:49 +0000	[diff] [blame]	557	.Case("shl", tgtok::XSHL)
				558	.Case("sra", tgtok::XSRA)
				559	.Case("srl", tgtok::XSRL)
				560	.Case("cast", tgtok::XCast)
David Greene	1434f66	2011-01-07 17:05:37 +0000	[diff] [blame]	561	.Case("empty", tgtok::XEmpty)
Bill Wendling	cd466f5	2010-12-08 20:02:49 +0000	[diff] [blame]	562	.Case("subst", tgtok::XSubst)
Nicolai Haehnle	8498a49	2018-03-06 13:49:16 +0000	[diff] [blame]	563	.Case("foldl", tgtok::XFoldl)
Bill Wendling	cd466f5	2010-12-08 20:02:49 +0000	[diff] [blame]	564	.Case("foreach", tgtok::XForEach)
Daniel Sanders	d80222a	2014-05-07 10:13:19 +0000	[diff] [blame]	565	.Case("listconcat", tgtok::XListConcat)
Bill Wendling	cd466f5	2010-12-08 20:02:49 +0000	[diff] [blame]	566	.Case("strconcat", tgtok::XStrConcat)
				567	.Default(tgtok::Error);
David Greene	d418c1b	2009-05-14 20:54:48 +0000	[diff] [blame]	568
Bill Wendling	cd466f5	2010-12-08 20:02:49 +0000	[diff] [blame]	569	return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
Chris Lattner	a805874	2007-11-18 02:57:27 +0000	[diff] [blame]	570	}
Vyacheslav Zakharin	6c99d2b	2018-11-27 18:57:43 +0000	[diff] [blame]	571
				572	bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
				573	// Report an error, if preprocessor control stack for the current
				574	// file is not empty.
				575	if (!PrepIncludeStack.back()->empty()) {
				576	prepReportPreprocessorStackError();
				577
				578	return false;
				579	}
				580
				581	// Pop the preprocessing controls from the include stack.
				582	if (PrepIncludeStack.empty()) {
				583	PrintFatalError("Preprocessor include stack is empty");
				584	}
				585
				586	PrepIncludeStack.pop_back();
				587
				588	if (IncludeStackMustBeEmpty) {
				589	if (!PrepIncludeStack.empty())
				590	PrintFatalError("Preprocessor include stack is not empty");
				591	} else {
				592	if (PrepIncludeStack.empty())
				593	PrintFatalError("Preprocessor include stack is empty");
				594	}
				595
				596	return true;
				597	}
				598
				599	tgtok::TokKind TGLexer::prepIsDirective() const {
				600	for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID) {
				601	int NextChar = *CurPtr;
				602	bool Match = true;
				603	unsigned I = 0;
				604	for (; I < strlen(PreprocessorDirs[ID].Word); ++I) {
				605	if (NextChar != PreprocessorDirs[ID].Word[I]) {
				606	Match = false;
				607	break;
				608	}
				609
				610	NextChar = peekNextChar(I + 1);
				611	}
				612
				613	// Check for whitespace after the directive. If there is no whitespace,
				614	// then we do not recognize it as a preprocessing directive.
				615	if (Match) {
				616	tgtok::TokKind Kind = PreprocessorDirs[ID].Kind;
				617
				618	// New line and EOF may follow only #else/#endif. It will be reported
				619	// as an error for #ifdef/#define after the call to prepLexMacroName().
				620	if (NextChar == ' ' \|\| NextChar == '\t' \|\| NextChar == EOF \|\|
				621	NextChar == '\n' \|\|
				622	// It looks like TableGen does not support '\r' as the actual
				623	// carriage return, e.g. getNextChar() treats a single '\r'
				624	// as '\n'. So we do the same here.
				625	NextChar == '\r')
				626	return Kind;
				627
				628	// Allow comments after some directives, e.g.:
				629	// #else// OR #else/**/
				630	// #endif// OR #endif/**/
				631	//
				632	// Note that we do allow comments after #ifdef/#define here, e.g.
				633	// #ifdef/**/ AND #ifdef//
				634	// #define/**/ AND #define//
				635	//
				636	// These cases will be reported as incorrect after calling
				637	// prepLexMacroName(). We could have supported C-style comments
				638	// after #ifdef/#define, but this would complicate the code
				639	// for little benefit.
				640	if (NextChar == '/') {
				641	NextChar = peekNextChar(I + 1);
				642
				643	if (NextChar == '*' \|\| NextChar == '/')
				644	return Kind;
				645
				646	// Pretend that we do not recognize the directive.
				647	}
				648	}
				649	}
				650
				651	return tgtok::Error;
				652	}
				653
				654	bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
				655	TokStart = CurPtr;
				656
				657	for (unsigned ID = 0; ID < llvm::array_lengthof(PreprocessorDirs); ++ID)
				658	if (PreprocessorDirs[ID].Kind == Kind) {
				659	// Advance CurPtr to the end of the preprocessing word.
				660	CurPtr += strlen(PreprocessorDirs[ID].Word);
				661	return true;
				662	}
				663
				664	PrintFatalError("Unsupported preprocessing token in "
				665	"prepEatPreprocessorDirective()");
				666	return false;
				667	}
				668
				669	tgtok::TokKind TGLexer::lexPreprocessor(
				670	tgtok::TokKind Kind, bool ReturnNextLiveToken) {
				671
				672	// We must be looking at a preprocessing directive. Eat it!
				673	if (!prepEatPreprocessorDirective(Kind))
				674	PrintFatalError("lexPreprocessor() called for unknown "
				675	"preprocessor directive");
				676
				677	if (Kind == tgtok::Ifdef) {
				678	StringRef MacroName = prepLexMacroName();
				679	if (MacroName.empty())
				680	return ReturnError(TokStart, "Expected macro name after #ifdef");
				681
				682	bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
				683
				684	// Regardless of whether we are processing tokens or not,
				685	// we put the #ifdef control on stack.
				686	PrepIncludeStack.back()->push_back(
				687	{Kind, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
				688
				689	if (!prepSkipDirectiveEnd())
				690	return ReturnError(CurPtr,
				691	"Only comments are supported after #ifdef NAME");
				692
				693	// If we were not processing tokens before this #ifdef,
				694	// then just return back to the lines skipping code.
				695	if (!ReturnNextLiveToken)
				696	return Kind;
				697
				698	// If we were processing tokens before this #ifdef,
				699	// and the macro is defined, then just return the next token.
				700	if (MacroIsDefined)
				701	return LexToken();
				702
				703	// We were processing tokens before this #ifdef, and the macro
				704	// is not defined, so we have to start skipping the lines.
				705	// If the skipping is successful, it will return the token following
				706	// either #else or #endif corresponding to this #ifdef.
				707	if (prepSkipRegion(ReturnNextLiveToken))
				708	return LexToken();
				709
				710	return tgtok::Error;
				711	} else if (Kind == tgtok::Else) {
				712	// Check if this #else is correct before calling prepSkipDirectiveEnd(),
				713	// which will move CurPtr away from the beginning of #else.
				714	if (PrepIncludeStack.back()->empty())
				715	return ReturnError(TokStart, "#else without #ifdef");
				716
				717	PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back();
				718
				719	if (IfdefEntry.Kind != tgtok::Ifdef) {
				720	PrintError(TokStart, "double #else");
				721	return ReturnError(IfdefEntry.SrcPos, "Previous #else is here");
				722	}
				723
				724	// Replace the corresponding #ifdef's control with its negation
				725	// on the control stack.
				726	PrepIncludeStack.back()->pop_back();
				727	PrepIncludeStack.back()->push_back(
				728	{Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)});
				729
				730	if (!prepSkipDirectiveEnd())
				731	return ReturnError(CurPtr, "Only comments are supported after #else");
				732
				733	// If we were processing tokens before this #else,
				734	// we have to start skipping lines until the matching #endif.
				735	if (ReturnNextLiveToken) {
				736	if (prepSkipRegion(ReturnNextLiveToken))
				737	return LexToken();
				738
				739	return tgtok::Error;
				740	}
				741
				742	// Return to the lines skipping code.
				743	return Kind;
				744	} else if (Kind == tgtok::Endif) {
				745	// Check if this #endif is correct before calling prepSkipDirectiveEnd(),
				746	// which will move CurPtr away from the beginning of #endif.
				747	if (PrepIncludeStack.back()->empty())
				748	return ReturnError(TokStart, "#endif without #ifdef");
				749
				750	auto &IfdefOrElseEntry = PrepIncludeStack.back()->back();
				751
				752	if (IfdefOrElseEntry.Kind != tgtok::Ifdef &&
				753	IfdefOrElseEntry.Kind != tgtok::Else) {
				754	PrintFatalError("Invalid preprocessor control on the stack");
				755	return tgtok::Error;
				756	}
				757
				758	if (!prepSkipDirectiveEnd())
				759	return ReturnError(CurPtr, "Only comments are supported after #endif");
				760
				761	PrepIncludeStack.back()->pop_back();
				762
				763	// If we were processing tokens before this #endif, then
				764	// we should continue it.
				765	if (ReturnNextLiveToken) {
				766	return LexToken();
				767	}
				768
				769	// Return to the lines skipping code.
				770	return Kind;
				771	} else if (Kind == tgtok::Define) {
				772	StringRef MacroName = prepLexMacroName();
				773	if (MacroName.empty())
				774	return ReturnError(TokStart, "Expected macro name after #define");
				775
				776	if (!DefinedMacros.insert(MacroName).second)
				777	PrintWarning(getLoc(),
				778	"Duplicate definition of macro: " + Twine(MacroName));
				779
				780	if (!prepSkipDirectiveEnd())
				781	return ReturnError(CurPtr,
				782	"Only comments are supported after #define NAME");
				783
				784	if (!ReturnNextLiveToken) {
				785	PrintFatalError("#define must be ignored during the lines skipping");
				786	return tgtok::Error;
				787	}
				788
				789	return LexToken();
				790	}
				791
				792	PrintFatalError("Preprocessing directive is not supported");
				793	return tgtok::Error;
				794	}
				795
				796	bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
				797	if (!MustNeverBeFalse)
				798	PrintFatalError("Invalid recursion.");
				799
				800	do {
				801	// Skip all symbols to the line end.
				802	prepSkipToLineEnd();
				803
				804	// Find the first non-whitespace symbol in the next line(s).
				805	if (!prepSkipLineBegin())
				806	return false;
				807
				808	// If the first non-blank/comment symbol on the line is '#',
				809	// it may be a start of preprocessing directive.
				810	//
				811	// If it is not '#' just go to the next line.
				812	if (*CurPtr == '#')
				813	++CurPtr;
				814	else
				815	continue;
				816
				817	tgtok::TokKind Kind = prepIsDirective();
				818
				819	// If we did not find a preprocessing directive or it is #define,
				820	// then just skip to the next line. We do not have to do anything
				821	// for #define in the line-skipping mode.
				822	if (Kind == tgtok::Error \|\| Kind == tgtok::Define)
				823	continue;
				824
				825	tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
				826
				827	// If lexPreprocessor() encountered an error during lexing this
				828	// preprocessor idiom, then return false to the calling lexPreprocessor().
				829	// This will force tgtok::Error to be returned to the tokens processing.
				830	if (ProcessedKind == tgtok::Error)
				831	return false;
				832
				833	if (Kind != ProcessedKind)
				834	PrintFatalError("prepIsDirective() and lexPreprocessor() "
				835	"returned different token kinds");
				836
				837	// If this preprocessing directive enables tokens processing,
				838	// then return to the lexPreprocessor() and get to the next token.
				839	// We can move from line-skipping mode to processing tokens only
				840	// due to #else or #endif.
				841	if (prepIsProcessingEnabled()) {
				842	if (Kind != tgtok::Else && Kind != tgtok::Endif) {
				843	PrintFatalError("Tokens processing was enabled by an unexpected "
				844	"preprocessing directive");
				845	return false;
				846	}
				847
				848	return true;
				849	}
				850	} while (CurPtr != CurBuf.end());
				851
				852	// We have reached the end of the file, but never left the lines-skipping
				853	// mode. This means there is no matching #endif.
				854	prepReportPreprocessorStackError();
				855	return false;
				856	}
				857
				858	StringRef TGLexer::prepLexMacroName() {
				859	// Skip whitespaces between the preprocessing directive and the macro name.
				860	while (CurPtr == ' ' \|\| CurPtr == '\t')
				861	++CurPtr;
				862
				863	TokStart = CurPtr;
				864	// Macro names start with [a-zA-Z_].
				865	if (CurPtr != '_' && !isalpha(CurPtr))
				866	return "";
				867
				868	// Match the rest of the identifier regex: [0-9a-zA-Z_]*
				869	while (isalpha(CurPtr) \|\| isdigit(CurPtr) \|\| *CurPtr == '_')
				870	++CurPtr;
				871
				872	return StringRef(TokStart, CurPtr - TokStart);
				873	}
				874
				875	bool TGLexer::prepSkipLineBegin() {
				876	while (CurPtr != CurBuf.end()) {
				877	switch (*CurPtr) {
				878	case ' ':
				879	case '\t':
				880	case '\n':
				881	case '\r':
				882	break;
				883
				884	case '/': {
				885	int NextChar = peekNextChar(1);
				886	if (NextChar == '*') {
				887	// Skip C-style comment.
				888	// Note that we do not care about skipping the C++-style comments.
				889	// If the line contains "//", it may not contain any processable
				890	// preprocessing directive. Just return CurPtr pointing to
				891	// the first '/' in this case. We also do not care about
				892	// incorrect symbols after the first '/' - we are in lines-skipping
				893	// mode, so incorrect code is allowed to some extent.
				894
				895	// Set TokStart to the beginning of the comment to enable proper
				896	// diagnostic printing in case of error in SkipCComment().
				897	TokStart = CurPtr;
				898
				899	// CurPtr must point to '*' before call to SkipCComment().
				900	++CurPtr;
				901	if (SkipCComment())
				902	return false;
				903	} else {
				904	// CurPtr points to the non-whitespace '/'.
				905	return true;
				906	}
				907
				908	// We must not increment CurPtr after the comment was lexed.
				909	continue;
				910	}
				911
				912	default:
				913	return true;
				914	}
				915
				916	++CurPtr;
				917	}
				918
				919	// We have reached the end of the file. Return to the lines skipping
				920	// code, and allow it to handle the EOF as needed.
				921	return true;
				922	}
				923
				924	bool TGLexer::prepSkipDirectiveEnd() {
				925	while (CurPtr != CurBuf.end()) {
				926	switch (*CurPtr) {
				927	case ' ':
				928	case '\t':
				929	break;
				930
				931	case '\n':
				932	case '\r':
				933	return true;
				934
				935	case '/': {
				936	int NextChar = peekNextChar(1);
				937	if (NextChar == '/') {
				938	// Skip C++-style comment.
				939	// We may just return true now, but let's skip to the line/buffer end
				940	// to simplify the method specification.
				941	++CurPtr;
				942	SkipBCPLComment();
				943	} else if (NextChar == '*') {
				944	// When we are skipping C-style comment at the end of a preprocessing
				945	// directive, we can skip several lines. If any meaningful TD token
				946	// follows the end of the C-style comment on the same line, it will
				947	// be considered as an invalid usage of TD token.
				948	// For example, we want to forbid usages like this one:
				949	// #define MACRO class Class {}
				950	// But with C-style comments we also disallow the following:
				951	// #define MACRO /* This macro is used
				952	// to ... */ class Class {}
				953	// One can argue that this should be allowed, but it does not seem
				954	// to be worth of the complication. Moreover, this matches
				955	// the C preprocessor behavior.
				956
				957	// Set TokStart to the beginning of the comment to enable proper
				958	// diagnostic printer in case of error in SkipCComment().
				959	TokStart = CurPtr;
				960	++CurPtr;
				961	if (SkipCComment())
				962	return false;
				963	} else {
				964	TokStart = CurPtr;
				965	PrintError(CurPtr, "Unexpected character");
				966	return false;
				967	}
				968
				969	// We must not increment CurPtr after the comment was lexed.
				970	continue;
				971	}
				972
				973	default:
				974	// Do not allow any non-whitespaces after the directive.
				975	TokStart = CurPtr;
				976	return false;
				977	}
				978
				979	++CurPtr;
				980	}
				981
				982	return true;
				983	}
				984
				985	void TGLexer::prepSkipToLineEnd() {
				986	while (CurPtr != '\n' && CurPtr != '\r' && CurPtr != CurBuf.end())
				987	++CurPtr;
				988	}
				989
				990	bool TGLexer::prepIsProcessingEnabled() {
				991	for (auto I = PrepIncludeStack.back()->rbegin(),
				992	E = PrepIncludeStack.back()->rend();
				993	I != E; ++I) {
				994	if (!I->IsDefined)
				995	return false;
				996	}
				997
				998	return true;
				999	}
				1000
				1001	void TGLexer::prepReportPreprocessorStackError() {
				1002	if (PrepIncludeStack.back()->empty())
				1003	PrintFatalError("prepReportPreprocessorStackError() called with "
				1004	"empty control stack");
				1005
				1006	auto &PrepControl = PrepIncludeStack.back()->back();
				1007	PrintError(CurBuf.end(), "Reached EOF without matching #endif");
				1008	PrintError(PrepControl.SrcPos, "The latest preprocessor control is here");
				1009
				1010	TokStart = CurPtr;
				1011	}