20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/SourceMgr.h"
33 return c ==
'$' || c ==
'.' || c ==
'_' || c ==
'-';
38 : sourceMgr(sourceMgr), context(context), codeCompleteLoc(nullptr) {
39 auto bufferID = sourceMgr.getMainFileID();
43 if (sourceMgr.getNumBuffers() > 1) {
44 unsigned lastFileID = sourceMgr.getNumBuffers();
45 const llvm::MemoryBuffer *main = sourceMgr.getMemoryBuffer(bufferID);
46 const llvm::MemoryBuffer *last = sourceMgr.getMemoryBuffer(lastFileID);
47 if (main->getBufferStart() <= last->getBufferStart() &&
48 main->getBufferEnd() >= last->getBufferEnd()) {
49 bufferID = lastFileID;
52 curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
53 curPtr = curBuffer.begin();
56 if (codeCompleteContext)
64 unsigned mainFileID = sourceMgr.getMainFileID();
68 auto &bufferInfo = sourceMgr.getBufferInfo(mainFileID);
69 unsigned lineNo = bufferInfo.getLineNumber(loc.getPointer());
71 (loc.getPointer() - bufferInfo.getPointerForLineNumber(lineNo)) + 1;
72 auto *buffer = sourceMgr.getMemoryBuffer(mainFileID);
79 Token Lexer::emitError(
const char *loc,
const Twine &message) {
82 return formToken(Token::error, loc);
86 const char *curBufferEnd = curBuffer.end();
88 const char *tokStart = curPtr;
91 if (tokStart == codeCompleteLoc)
92 return formToken(Token::code_complete, tokStart);
94 if (tokStart == curBufferEnd)
95 return formToken(Token::eof, tokStart);
101 if (isalpha(curPtr[-1]))
102 return lexBareIdentifierOrKeyword(tokStart);
105 return emitError(tokStart,
"unexpected character");
116 return lexBareIdentifierOrKeyword(tokStart);
121 if (curPtr - 1 == curBufferEnd)
122 return formToken(Token::eof, tokStart);
126 return formToken(Token::colon, tokStart);
128 return formToken(Token::comma, tokStart);
130 return lexEllipsis(tokStart);
132 return formToken(Token::l_paren, tokStart);
134 return formToken(Token::r_paren, tokStart);
136 if (*curPtr ==
'-' && *(curPtr + 1) ==
'#') {
138 return formToken(Token::file_metadata_begin, tokStart);
140 return formToken(Token::l_brace, tokStart);
142 return formToken(Token::r_brace, tokStart);
144 return formToken(Token::l_square, tokStart);
146 return formToken(Token::r_square, tokStart);
148 return formToken(Token::less, tokStart);
150 return formToken(Token::greater, tokStart);
152 return formToken(Token::equal, tokStart);
155 return formToken(Token::plus, tokStart);
157 return formToken(Token::star, tokStart);
159 if (*curPtr ==
'>') {
161 return formToken(Token::arrow, tokStart);
163 return formToken(Token::minus, tokStart);
166 return formToken(Token::question, tokStart);
169 return formToken(Token::vertical_bar, tokStart);
172 if (*curPtr ==
'/') {
176 return formToken(Token::slash, tokStart);
179 return lexAtIdentifier(tokStart);
182 if (*curPtr ==
'-' && *(curPtr + 1) ==
'}') {
184 return formToken(Token::file_metadata_end, tokStart);
190 return lexPrefixedIdentifier(tokStart);
192 return lexString(tokStart);
204 return lexNumber(tokStart);
213 Token Lexer::lexAtIdentifier(
const char *tokStart) {
214 char cur = *curPtr++;
218 Token stringIdentifier = lexString(curPtr);
219 if (stringIdentifier.
is(Token::error))
220 return stringIdentifier;
221 return formToken(Token::at_identifier, tokStart);
225 if (!isalpha(cur) && cur !=
'_')
226 return emitError(curPtr - 1,
227 "@ identifier expected to start with letter or '_'");
229 while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr ==
'_' ||
230 *curPtr ==
'$' || *curPtr ==
'.')
232 return formToken(Token::at_identifier, tokStart);
240 Token Lexer::lexBareIdentifierOrKeyword(
const char *tokStart) {
242 while (isalpha(*curPtr) || isdigit(*curPtr) || *curPtr ==
'_' ||
243 *curPtr ==
'$' || *curPtr ==
'.')
247 StringRef spelling(tokStart, curPtr - tokStart);
249 auto isAllDigit = [](StringRef str) {
250 return llvm::all_of(str, llvm::isDigit);
254 if ((spelling.size() > 1 && tokStart[0] ==
'i' &&
255 isAllDigit(spelling.drop_front())) ||
256 ((spelling.size() > 2 && tokStart[1] ==
'i' &&
257 (tokStart[0] ==
's' || tokStart[0] ==
'u')) &&
258 isAllDigit(spelling.drop_front(2))))
259 return Token(Token::inttype, spelling);
262 #define TOK_KEYWORD(SPELLING) .Case(#SPELLING, Token::kw_##SPELLING)
263 #include "TokenKinds.def"
264 .Default(Token::bare_identifier);
273 void Lexer::skipComment() {
275 assert(*curPtr ==
'/');
278 const char *curBufferEnd = curBuffer.end();
280 if (curPtr == curBufferEnd)
290 if (curPtr - 1 == curBufferEnd) {
306 Token Lexer::lexEllipsis(
const char *tokStart) {
307 assert(curPtr[-1] ==
'.');
309 if (curPtr == curBuffer.end() || *curPtr !=
'.' || *(curPtr + 1) !=
'.')
310 return emitError(curPtr,
"expected three consecutive dots for an ellipsis");
313 return formToken(Token::ellipsis, tokStart);
321 Token Lexer::lexNumber(
const char *tokStart) {
322 assert(isdigit(curPtr[-1]));
325 if (curPtr[-1] ==
'0' && *curPtr ==
'x') {
328 if (!isxdigit(curPtr[1]))
329 return formToken(Token::integer, tokStart);
332 while (isxdigit(*curPtr))
335 return formToken(Token::integer, tokStart);
339 while (isdigit(*curPtr))
343 return formToken(Token::integer, tokStart);
347 while (isdigit(*curPtr))
350 if (*curPtr ==
'e' || *curPtr ==
'E') {
351 if (isdigit(
static_cast<unsigned char>(curPtr[1])) ||
352 ((curPtr[1] ==
'-' || curPtr[1] ==
'+') &&
353 isdigit(
static_cast<unsigned char>(curPtr[2])))) {
355 while (isdigit(*curPtr))
359 return formToken(Token::floatliteral, tokStart);
371 Token Lexer::lexPrefixedIdentifier(
const char *tokStart) {
376 kind = Token::hash_identifier;
377 errorKind =
"invalid attribute name";
380 kind = Token::percent_identifier;
381 errorKind =
"invalid SSA name";
384 kind = Token::caret_identifier;
385 errorKind =
"invalid block name";
388 kind = Token::exclamation_identifier;
389 errorKind =
"invalid type identifier";
392 llvm_unreachable(
"invalid caller");
396 if (isdigit(*curPtr)) {
398 while (isdigit(*curPtr))
400 }
else if (isalpha(*curPtr) ||
isPunct(*curPtr)) {
403 }
while (isalpha(*curPtr) || isdigit(*curPtr) ||
isPunct(*curPtr));
404 }
else if (curPtr == codeCompleteLoc) {
405 return formToken(Token::code_complete, tokStart);
407 return emitError(curPtr - 1, errorKind);
411 if (codeCompleteLoc && codeCompleteLoc >= tokStart &&
412 codeCompleteLoc <= curPtr) {
413 return Token(Token::code_complete,
414 StringRef(tokStart, codeCompleteLoc - tokStart));
417 return formToken(
kind, tokStart);
425 Token Lexer::lexString(
const char *tokStart) {
426 assert(curPtr[-1] ==
'"');
428 const char *curBufferEnd = curBuffer.end();
434 if (curPtr == codeCompleteLoc)
435 return formToken(Token::code_complete, tokStart);
439 return formToken(Token::string, tokStart);
443 if (curPtr - 1 != curBufferEnd)
449 return emitError(curPtr - 1,
"expected '\"' in string literal");
452 if (*curPtr ==
'"' || *curPtr ==
'\\' || *curPtr ==
'n' || *curPtr ==
't')
454 else if (llvm::isHexDigit(*curPtr) && llvm::isHexDigit(curPtr[1]))
458 return emitError(curPtr - 1,
"unknown escape in string literal");
static bool isPunct(char c)
union mlir::linalg::@1243::ArityGroupAndKind::Kind kind
This class provides an abstract interface into the parser for hooking in code completion events.
SMLoc getCodeCompleteLoc() const
Return the source location used to provide code completion.
static FileLineColLoc get(StringAttr filename, unsigned line, unsigned column)
const llvm::SourceMgr & getSourceMgr()
Location getEncodedSourceLocation(SMLoc loc)
Encode the specified source location information into a Location object for attachment to the IR or e...
Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context, AsmParserCodeCompleteContext *codeCompleteContext)
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This represents a token in the MLIR syntax.
Include the generated interface declarations.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.