20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/SourceMgr.h"
33 return c ==
'$' || c ==
'.' || c ==
'_' || c ==
'-';
38 : sourceMgr(sourceMgr), context(context), codeCompleteLoc(nullptr) {
39 auto bufferID = sourceMgr.getMainFileID();
40 curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
41 curPtr = curBuffer.begin();
44 if (codeCompleteContext)
52 unsigned mainFileID = sourceMgr.getMainFileID();
56 auto &bufferInfo = sourceMgr.getBufferInfo(mainFileID);
57 unsigned lineNo = bufferInfo.getLineNumber(loc.getPointer());
59 (loc.getPointer() - bufferInfo.getPointerForLineNumber(lineNo)) + 1;
60 auto *buffer = sourceMgr.getMemoryBuffer(mainFileID);
67 Token Lexer::emitError(
const char *loc,
const Twine &message) {
70 return formToken(Token::error, loc);
75 const char *tokStart = curPtr;
78 if (tokStart == codeCompleteLoc)
79 return formToken(Token::code_complete, tokStart);
85 if (isalpha(curPtr[-1]))
86 return lexBareIdentifierOrKeyword(tokStart);
89 return emitError(tokStart,
"unexpected character");
100 return lexBareIdentifierOrKeyword(tokStart);
105 if (curPtr - 1 == curBuffer.end())
106 return formToken(Token::eof, tokStart);
110 return formToken(Token::colon, tokStart);
112 return formToken(Token::comma, tokStart);
114 return lexEllipsis(tokStart);
116 return formToken(Token::l_paren, tokStart);
118 return formToken(Token::r_paren, tokStart);
120 if (*curPtr ==
'-' && *(curPtr + 1) ==
'#') {
122 return formToken(Token::file_metadata_begin, tokStart);
124 return formToken(Token::l_brace, tokStart);
126 return formToken(Token::r_brace, tokStart);
128 return formToken(Token::l_square, tokStart);
130 return formToken(Token::r_square, tokStart);
132 return formToken(Token::less, tokStart);
134 return formToken(Token::greater, tokStart);
136 return formToken(Token::equal, tokStart);
139 return formToken(Token::plus, tokStart);
141 return formToken(Token::star, tokStart);
143 if (*curPtr ==
'>') {
145 return formToken(Token::arrow, tokStart);
147 return formToken(Token::minus, tokStart);
150 return formToken(Token::question, tokStart);
153 return formToken(Token::vertical_bar, tokStart);
156 if (*curPtr ==
'/') {
160 return emitError(tokStart,
"unexpected character");
163 return lexAtIdentifier(tokStart);
166 if (*curPtr ==
'-' && *(curPtr + 1) ==
'}') {
168 return formToken(Token::file_metadata_end, tokStart);
174 return lexPrefixedIdentifier(tokStart);
176 return lexString(tokStart);
188 return lexNumber(tokStart);
197 Token Lexer::lexAtIdentifier(
const char *tokStart) {
198 char cur = *curPtr++;
202 Token stringIdentifier = lexString(curPtr);
203 if (stringIdentifier.
is(Token::error))
204 return stringIdentifier;
205 return formToken(Token::at_identifier, tokStart);
209 if (!isalpha(cur) && cur !=
'_')
210 return emitError(curPtr - 1,
211 "@ identifier expected to start with letter or '_'");
213 while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr ==
'_' ||
214 *curPtr ==
'$' || *curPtr ==
'.')
216 return formToken(Token::at_identifier, tokStart);
224 Token Lexer::lexBareIdentifierOrKeyword(
const char *tokStart) {
226 while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr ==
'_' ||
227 *curPtr ==
'$' || *curPtr ==
'.')
231 StringRef spelling(tokStart, curPtr - tokStart);
233 auto isAllDigit = [](StringRef str) {
234 return llvm::all_of(str, llvm::isDigit);
238 if ((spelling.size() > 1 && tokStart[0] ==
'i' &&
239 isAllDigit(spelling.drop_front())) ||
240 ((spelling.size() > 2 && tokStart[1] ==
'i' &&
241 (tokStart[0] ==
's' || tokStart[0] ==
'u')) &&
242 isAllDigit(spelling.drop_front(2))))
243 return Token(Token::inttype, spelling);
246 #define TOK_KEYWORD(SPELLING) .Case(#SPELLING, Token::kw_##SPELLING)
247 #include "TokenKinds.def"
248 .Default(Token::bare_identifier);
250 return Token(kind, spelling);
257 void Lexer::skipComment() {
259 assert(*curPtr ==
'/');
270 if (curPtr - 1 == curBuffer.end()) {
286 Token Lexer::lexEllipsis(
const char *tokStart) {
287 assert(curPtr[-1] ==
'.');
289 if (curPtr == curBuffer.end() || *curPtr !=
'.' || *(curPtr + 1) !=
'.')
290 return emitError(curPtr,
"expected three consecutive dots for an ellipsis");
293 return formToken(Token::ellipsis, tokStart);
301 Token Lexer::lexNumber(
const char *tokStart) {
302 assert(isdigit(curPtr[-1]));
305 if (curPtr[-1] ==
'0' && *curPtr ==
'x') {
308 if (!isxdigit(curPtr[1]))
309 return formToken(Token::integer, tokStart);
312 while (isxdigit(*curPtr))
315 return formToken(Token::integer, tokStart);
319 while (isdigit(*curPtr))
323 return formToken(Token::integer, tokStart);
327 while (isdigit(*curPtr))
330 if (*curPtr ==
'e' || *curPtr ==
'E') {
331 if (isdigit(
static_cast<unsigned char>(curPtr[1])) ||
332 ((curPtr[1] ==
'-' || curPtr[1] ==
'+') &&
333 isdigit(
static_cast<unsigned char>(curPtr[2])))) {
335 while (isdigit(*curPtr))
339 return formToken(Token::floatliteral, tokStart);
351 Token Lexer::lexPrefixedIdentifier(
const char *tokStart) {
356 kind = Token::hash_identifier;
357 errorKind =
"invalid attribute name";
360 kind = Token::percent_identifier;
361 errorKind =
"invalid SSA name";
364 kind = Token::caret_identifier;
365 errorKind =
"invalid block name";
368 kind = Token::exclamation_identifier;
369 errorKind =
"invalid type identifier";
372 llvm_unreachable(
"invalid caller");
376 if (isdigit(*curPtr)) {
378 while (isdigit(*curPtr))
380 }
else if (isalpha(*curPtr) ||
isPunct(*curPtr)) {
383 }
while (isalpha(*curPtr) || isdigit(*curPtr) ||
isPunct(*curPtr));
384 }
else if (curPtr == codeCompleteLoc) {
385 return formToken(Token::code_complete, tokStart);
387 return emitError(curPtr - 1, errorKind);
391 if (codeCompleteLoc && codeCompleteLoc >= tokStart &&
392 codeCompleteLoc <= curPtr) {
393 return Token(Token::code_complete,
394 StringRef(tokStart, codeCompleteLoc - tokStart));
397 return formToken(kind, tokStart);
405 Token Lexer::lexString(
const char *tokStart) {
406 assert(curPtr[-1] ==
'"');
413 if (curPtr == codeCompleteLoc)
414 return formToken(Token::code_complete, tokStart);
418 return formToken(Token::string, tokStart);
422 if (curPtr - 1 != curBuffer.end())
428 return emitError(curPtr - 1,
"expected '\"' in string literal");
431 if (*curPtr ==
'"' || *curPtr ==
'\\' || *curPtr ==
'n' || *curPtr ==
't')
433 else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1]))
437 return emitError(curPtr - 1,
"unknown escape in string literal");
static bool isPunct(char c)
This class provides an abstract interface into the parser for hooking in code completion events.
SMLoc getCodeCompleteLoc() const
Return the source location used to provide code completion.
static FileLineColLoc get(StringAttr filename, unsigned line, unsigned column)
const llvm::SourceMgr & getSourceMgr()
Location getEncodedSourceLocation(SMLoc loc)
Encode the specified source location information into a Location object for attachment to the IR or e...
Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context, AsmParserCodeCompleteContext *codeCompleteContext)
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This represents a token in the MLIR syntax.
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.