20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/ADT/StringSwitch.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/SourceMgr.h"
33 return c ==
'$' || c ==
'.' || c ==
'_' || c ==
'-';
38 : sourceMgr(sourceMgr), context(context), codeCompleteLoc(
nullptr) {
39 auto bufferID = sourceMgr.getMainFileID();
43 if (sourceMgr.getNumBuffers() > 1) {
44 unsigned lastFileID = sourceMgr.getNumBuffers();
45 const llvm::MemoryBuffer *main = sourceMgr.getMemoryBuffer(bufferID);
46 const llvm::MemoryBuffer *last = sourceMgr.getMemoryBuffer(lastFileID);
47 if (main->getBufferStart() <= last->getBufferStart() &&
48 main->getBufferEnd() >= last->getBufferEnd()) {
49 bufferID = lastFileID;
52 curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
53 curPtr = curBuffer.begin();
56 if (codeCompleteContext)
57 codeCompleteLoc = codeCompleteContext->getCodeCompleteLoc().getPointer();
64 unsigned mainFileID = sourceMgr.getMainFileID();
66 auto [lineNo, column] = sourceMgr.getLineAndColumn(loc);
67 auto *buffer = sourceMgr.getMemoryBuffer(mainFileID);
74Token Lexer::emitError(
const char *loc,
const Twine &message) {
77 return formToken(Token::error, loc);
81 const char *curBufferEnd = curBuffer.end();
83 const char *tokStart = curPtr;
86 if (tokStart == codeCompleteLoc)
87 return formToken(Token::code_complete, tokStart);
89 if (tokStart == curBufferEnd)
90 return formToken(Token::eof, tokStart);
96 if (isalpha(curPtr[-1]))
97 return lexBareIdentifierOrKeyword(tokStart);
100 return emitError(tokStart,
"unexpected character");
111 return lexBareIdentifierOrKeyword(tokStart);
116 if (curPtr - 1 == curBufferEnd)
117 return formToken(Token::eof, tokStart);
121 return formToken(Token::colon, tokStart);
123 return formToken(Token::comma, tokStart);
125 return lexEllipsis(tokStart);
127 return formToken(Token::l_paren, tokStart);
129 return formToken(Token::r_paren, tokStart);
131 if (*curPtr ==
'-' && *(curPtr + 1) ==
'#') {
133 return formToken(Token::file_metadata_begin, tokStart);
135 return formToken(Token::l_brace, tokStart);
137 return formToken(Token::r_brace, tokStart);
139 return formToken(Token::l_square, tokStart);
141 return formToken(Token::r_square, tokStart);
143 return formToken(Token::less, tokStart);
145 return formToken(Token::greater, tokStart);
147 return formToken(Token::equal, tokStart);
150 return formToken(Token::plus, tokStart);
152 return formToken(Token::star, tokStart);
154 if (*curPtr ==
'>') {
156 return formToken(Token::arrow, tokStart);
158 return formToken(Token::minus, tokStart);
161 return formToken(Token::question, tokStart);
164 return formToken(Token::vertical_bar, tokStart);
167 if (*curPtr ==
'/') {
171 return formToken(Token::slash, tokStart);
174 return lexAtIdentifier(tokStart);
177 if (*curPtr ==
'-' && *(curPtr + 1) ==
'}') {
179 return formToken(Token::file_metadata_end, tokStart);
185 return lexPrefixedIdentifier(tokStart);
187 return lexString(tokStart);
199 return lexNumber(tokStart);
208Token Lexer::lexAtIdentifier(
const char *tokStart) {
209 char cur = *curPtr++;
213 Token stringIdentifier = lexString(curPtr);
214 if (stringIdentifier.
is(Token::error))
215 return stringIdentifier;
216 return formToken(Token::at_identifier, tokStart);
220 if (!isalpha(cur) && cur !=
'_')
222 "@ identifier expected to start with letter or '_'");
224 while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr ==
'_' ||
225 *curPtr ==
'$' || *curPtr ==
'.')
227 return formToken(Token::at_identifier, tokStart);
235Token Lexer::lexBareIdentifierOrKeyword(
const char *tokStart) {
237 while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr ==
'_' ||
238 *curPtr ==
'$' || *curPtr ==
'.')
242 StringRef spelling(tokStart, curPtr - tokStart);
244 auto isAllDigit = [](StringRef str) {
245 return llvm::all_of(str, llvm::isDigit);
249 if ((spelling.size() > 1 && tokStart[0] ==
'i' &&
250 isAllDigit(spelling.drop_front())) ||
251 ((spelling.size() > 2 && tokStart[1] ==
'i' &&
252 (tokStart[0] ==
's' || tokStart[0] ==
'u')) &&
253 isAllDigit(spelling.drop_front(2))))
254 return Token(Token::inttype, spelling);
257#define TOK_KEYWORD(SPELLING) .Case(#SPELLING, Token::kw_##SPELLING)
258#include "TokenKinds.def"
259 .Default(Token::bare_identifier);
261 return Token(kind, spelling);
268void Lexer::skipComment() {
270 assert(*curPtr ==
'/');
273 const char *curBufferEnd = curBuffer.end();
275 if (curPtr == curBufferEnd)
285 if (curPtr - 1 == curBufferEnd) {
301Token Lexer::lexEllipsis(
const char *tokStart) {
302 assert(curPtr[-1] ==
'.');
304 if (curPtr == curBuffer.end() || *curPtr !=
'.' || *(curPtr + 1) !=
'.')
305 return emitError(curPtr,
"expected three consecutive dots for an ellipsis");
308 return formToken(Token::ellipsis, tokStart);
316Token Lexer::lexNumber(
const char *tokStart) {
317 assert(isdigit(curPtr[-1]));
320 if (curPtr[-1] ==
'0' && *curPtr ==
'x') {
323 if (!isxdigit(curPtr[1]))
324 return formToken(Token::integer, tokStart);
327 while (isxdigit(*curPtr))
330 return formToken(Token::integer, tokStart);
334 while (isdigit(*curPtr))
338 return formToken(Token::integer, tokStart);
342 while (isdigit(*curPtr))
345 if (*curPtr ==
'e' || *curPtr ==
'E') {
346 if (isdigit(
static_cast<unsigned char>(curPtr[1])) ||
347 ((curPtr[1] ==
'-' || curPtr[1] ==
'+') &&
348 isdigit(
static_cast<unsigned char>(curPtr[2])))) {
350 while (isdigit(*curPtr))
354 return formToken(Token::floatliteral, tokStart);
366Token Lexer::lexPrefixedIdentifier(
const char *tokStart) {
371 kind = Token::hash_identifier;
372 errorKind =
"invalid attribute name";
375 kind = Token::percent_identifier;
376 errorKind =
"invalid SSA name";
379 kind = Token::caret_identifier;
380 errorKind =
"invalid block name";
383 kind = Token::exclamation_identifier;
384 errorKind =
"invalid type identifier";
387 llvm_unreachable(
"invalid caller");
391 if (isdigit(*curPtr)) {
393 while (isdigit(*curPtr))
395 }
else if (isalpha(*curPtr) ||
isPunct(*curPtr)) {
398 }
while (isalpha(*curPtr) || isdigit(*curPtr) ||
isPunct(*curPtr));
399 }
else if (curPtr == codeCompleteLoc) {
400 return formToken(Token::code_complete, tokStart);
402 return emitError(curPtr - 1, errorKind);
406 if (codeCompleteLoc && codeCompleteLoc >= tokStart &&
407 codeCompleteLoc <= curPtr) {
408 return Token(Token::code_complete,
409 StringRef(tokStart, codeCompleteLoc - tokStart));
412 return formToken(kind, tokStart);
420Token Lexer::lexString(
const char *tokStart) {
421 assert(curPtr[-1] ==
'"');
423 const char *curBufferEnd = curBuffer.end();
429 if (curPtr == codeCompleteLoc)
430 return formToken(Token::code_complete, tokStart);
434 return formToken(Token::string, tokStart);
438 if (curPtr - 1 != curBufferEnd)
444 return emitError(curPtr - 1,
"expected '\"' in string literal");
447 if (*curPtr ==
'"' || *curPtr ==
'\\' || *curPtr ==
'n' || *curPtr ==
't')
449 else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1]))
453 return emitError(curPtr - 1,
"unknown escape in string literal");
static bool isPunct(char c)
This class provides an abstract interface into the parser for hooking in code completion events.
static FileLineColLoc get(StringAttr filename, unsigned line, unsigned column)
Location getEncodedSourceLocation(SMLoc loc)
Encode the specified source location information into a Location object for attachment to the IR or e...
const llvm::SourceMgr & getSourceMgr()
Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context, AsmParserCodeCompleteContext *codeCompleteContext)
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This represents a token in the MLIR syntax.
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
llvm::StringSwitch< T, R > StringSwitch