doxygen/DialectSymbolParser_8cpp_source.html

 //===- DialectSymbolParser.cpp - MLIR Dialect Symbol Parser  --------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements the parser for the dialect symbols, such as extended

 // attributes and types.

 //

 //===----------------------------------------------------------------------===//


 #include "AsmParserImpl.h"

 #include "Parser.h"

 #include "mlir/AsmParser/AsmParserState.h"

 #include "mlir/IR/AsmState.h"

 #include "mlir/IR/Attributes.h"

 #include "mlir/IR/BuiltinAttributeInterfaces.h"

 #include "mlir/IR/BuiltinAttributes.h"

 #include "mlir/IR/BuiltinTypes.h"

 #include "mlir/IR/Dialect.h"

 #include "mlir/IR/DialectImplementation.h"

 #include "mlir/IR/MLIRContext.h"

 #include "mlir/Support/LLVM.h"

 #include "llvm/Support/MemoryBuffer.h"

 #include "llvm/Support/SourceMgr.h"

 #include <cassert>

 #include <cstddef>

 #include <utility>


 using namespace mlir;

 using namespace mlir::detail;

 using llvm::MemoryBuffer;

 using llvm::SourceMgr;


 namespace {

 /// This class provides the main implementation of the DialectAsmParser that

 /// allows for dialects to parse attributes and types. This allows for dialect

 /// hooking into the main MLIR parsing logic.

 class CustomDialectAsmParser : public AsmParserImpl<DialectAsmParser> {

 public:

   CustomDialectAsmParser(StringRef fullSpec, Parser &parser)

       : AsmParserImpl<DialectAsmParser>(parser.getToken().getLoc(), parser),

         fullSpec(fullSpec) {}

   ~CustomDialectAsmParser() override = default;


   /// Returns the full specification of the symbol being parsed. This allows

   /// for using a separate parser if necessary.

   StringRef getFullSymbolSpec() const override { return fullSpec; }


 private:

   /// The full symbol specification.

   StringRef fullSpec;

 };

 } // namespace


 ///

 ///   pretty-dialect-sym-body ::= '<' pretty-dialect-sym-contents+ '>'

 ///   pretty-dialect-sym-contents ::= pretty-dialect-sym-body

 ///                                  | '(' pretty-dialect-sym-contents+ ')'

 ///                                  | '[' pretty-dialect-sym-contents+ ']'

 ///                                  | '{' pretty-dialect-sym-contents+ '}'

 ///                                  | '[^[<({>\])}\0]+'

 ///

 ParseResult Parser::parseDialectSymbolBody(StringRef &body,

                                            bool &isCodeCompletion) {

   // Symbol bodies are a relatively unstructured format that contains a series

   // of properly nested punctuation, with anything else in the middle. Scan

   // ahead to find it and consume it if successful, otherwise emit an error.

   const char *curPtr = getTokenSpelling().data();


   // Scan over the nested punctuation, bailing out on error and consuming until

   // we find the end. We know that we're currently looking at the '<', so we can

   // go until we find the matching '>' character.

   assert(*curPtr == '<');

   SmallVector<char, 8> nestedPunctuation;

   const char *codeCompleteLoc = state.lex.getCodeCompleteLoc();


   // Functor used to emit an unbalanced punctuation error.

   auto emitPunctError = [&] {

     return emitError() << "unbalanced '" << nestedPunctuation.back()

                        << "' character in pretty dialect name";

   };

   // Functor used to check for unbalanced punctuation.

   auto checkNestedPunctuation = [&](char expectedToken) -> ParseResult {

     if (nestedPunctuation.back() != expectedToken)

       return emitPunctError();

     nestedPunctuation.pop_back();

     return success();

   };

   const char *curBufferEnd = state.lex.getBufferEnd();

   do {

     // Handle code completions, which may appear in the middle of the symbol

     // body.

     if (curPtr == codeCompleteLoc) {

       isCodeCompletion = true;

       nestedPunctuation.clear();

       break;

     }


     if (curBufferEnd == curPtr) {

       if (!nestedPunctuation.empty())

         return emitPunctError();

       return emitError("unexpected nul or EOF in pretty dialect name");

     }


     char c = *curPtr++;

     switch (c) {

     case '\0':

       // This also handles the EOF case.

       if (!nestedPunctuation.empty())

         return emitPunctError();

       return emitError("unexpected nul or EOF in pretty dialect name");

     case '<':

     case '[':

     case '(':

     case '{':

       nestedPunctuation.push_back(c);

       continue;


     case '-':

       // The sequence `->` is treated as special token.

       if (*curPtr == '>')

         ++curPtr;

       continue;


     case '>':

       if (failed(checkNestedPunctuation('<')))

         return failure();

       break;

     case ']':

       if (failed(checkNestedPunctuation('[')))

         return failure();

       break;

     case ')':

       if (failed(checkNestedPunctuation('(')))

         return failure();

       break;

     case '}':

       if (failed(checkNestedPunctuation('{')))

         return failure();

       break;

     case '"': {

       // Dispatch to the lexer to lex past strings.

       resetToken(curPtr - 1);

       curPtr = state.curToken.getEndLoc().getPointer();


       // Handle code completions, which may appear in the middle of the symbol

       // body.

       if (state.curToken.isCodeCompletion()) {

         isCodeCompletion = true;

         nestedPunctuation.clear();

         break;

       }


       // Otherwise, ensure this token was actually a string.

       if (state.curToken.isNot(Token::string))

         return failure();

       break;

     }


     default:

       continue;

     }

   } while (!nestedPunctuation.empty());


   // Ok, we succeeded, remember where we stopped, reset the lexer to know it is

   // consuming all this stuff, and return.

   resetToken(curPtr);


   unsigned length = curPtr - body.begin();

   body = StringRef(body.data(), length);

   return success();

 }


 /// Parse an extended dialect symbol.

 template <typename Symbol, typename SymbolAliasMap, typename CreateFn>

 static Symbol parseExtendedSymbol(Parser &p, AsmParserState *asmState,

                                   SymbolAliasMap &aliases,

                                   CreateFn &&createSymbol) {

   Token tok = p.getToken();


   // Handle code completion of the extended symbol.

   StringRef identifier = tok.getSpelling().drop_front();

   if (tok.isCodeCompletion() && identifier.empty())

     return p.codeCompleteDialectSymbol(aliases);


   // Parse the dialect namespace.

   SMRange range = p.getToken().getLocRange();

   SMLoc loc = p.getToken().getLoc();

   p.consumeToken();


   // Check to see if this is a pretty name.

   auto [dialectName, symbolData] = identifier.split('.');

   bool isPrettyName = !symbolData.empty() || identifier.back() == '.';


   // Check to see if the symbol has trailing data, i.e. has an immediately

   // following '<'.

   bool hasTrailingData =

       p.getToken().is(Token::less) &&

       identifier.bytes_end() == p.getTokenSpelling().bytes_begin();


   // If there is no '<' token following this, and if the typename contains no

   // dot, then we are parsing a symbol alias.

   if (!hasTrailingData && !isPrettyName) {

     // Check for an alias for this type.

     auto aliasIt = aliases.find(identifier);

     if (aliasIt == aliases.end())

       return (p.emitWrongTokenError("undefined symbol alias id '" + identifier +

                                     "'"),

               nullptr);

     if (asmState) {

       if constexpr (std::is_same_v<Symbol, Type>)

         asmState->addTypeAliasUses(identifier, range);

       else

         asmState->addAttrAliasUses(identifier, range);

     }

     return aliasIt->second;

   }


   // If this isn't an alias, we are parsing a dialect-specific symbol. If the

   // name contains a dot, then this is the "pretty" form. If not, it is the

   // verbose form that looks like <...>.

   if (!isPrettyName) {

     // Point the symbol data to the end of the dialect name to start.

     symbolData = StringRef(dialectName.end(), 0);


     // Parse the body of the symbol.

     bool isCodeCompletion = false;

     if (p.parseDialectSymbolBody(symbolData, isCodeCompletion))

       return nullptr;

     symbolData = symbolData.drop_front();


     // If the body contained a code completion it won't have the trailing `>`

     // token, so don't drop it.

     if (!isCodeCompletion)

       symbolData = symbolData.drop_back();

   } else {

     loc = SMLoc::getFromPointer(symbolData.data());


     // If the dialect's symbol is followed immediately by a <, then lex the body

     // of it into prettyName.

     if (hasTrailingData && p.parseDialectSymbolBody(symbolData))

       return nullptr;

   }


   return createSymbol(dialectName, symbolData, loc);

 }


 /// Parse an extended attribute.

 ///

 ///   extended-attribute ::= (dialect-attribute | attribute-alias)

 ///   dialect-attribute  ::= `#` dialect-namespace `<` attr-data `>`

 ///                          (`:` type)?

 ///                        | `#` alias-name pretty-dialect-sym-body? (`:` type)?

 ///   attribute-alias    ::= `#` alias-name

 ///

 Attribute Parser::parseExtendedAttr(Type type) {

   MLIRContext *ctx = getContext();

   Attribute attr = parseExtendedSymbol<Attribute>(

       *this, state.asmState, state.symbols.attributeAliasDefinitions,

       [&](StringRef dialectName, StringRef symbolData, SMLoc loc) -> Attribute {

         // Parse an optional trailing colon type.

         Type attrType = type;

         if (consumeIf(Token::colon) && !(attrType = parseType()))

           return Attribute();


         // If we found a registered dialect, then ask it to parse the attribute.

         if (Dialect *dialect =

                 builder.getContext()->getOrLoadDialect(dialectName)) {

           // Temporarily reset the lexer to let the dialect parse the attribute.

           const char *curLexerPos = getToken().getLoc().getPointer();

           resetToken(symbolData.data());


           // Parse the attribute.

           CustomDialectAsmParser customParser(symbolData, *this);

           Attribute attr = dialect->parseAttribute(customParser, attrType);

           resetToken(curLexerPos);

           return attr;

         }


         // Otherwise, form a new opaque attribute.

         return OpaqueAttr::getChecked(

             [&] { return emitError(loc); }, StringAttr::get(ctx, dialectName),

             symbolData, attrType ? attrType : NoneType::get(ctx));

       });


   // Ensure that the attribute has the same type as requested.

   auto typedAttr = dyn_cast_or_null<TypedAttr>(attr);

   if (type && typedAttr && typedAttr.getType() != type) {

     emitError("attribute type different than expected: expected ")

         << type << ", but got " << typedAttr.getType();

     return nullptr;

   }

   return attr;

 }


 /// Parse an extended type.

 ///

 ///   extended-type ::= (dialect-type | type-alias)

 ///   dialect-type  ::= `!` dialect-namespace `<` `"` type-data `"` `>`

 ///   dialect-type  ::= `!` alias-name pretty-dialect-attribute-body?

 ///   type-alias    ::= `!` alias-name

 ///

 Type Parser::parseExtendedType() {

   MLIRContext *ctx = getContext();

   return parseExtendedSymbol<Type>(

       *this, state.asmState, state.symbols.typeAliasDefinitions,

       [&](StringRef dialectName, StringRef symbolData, SMLoc loc) -> Type {

         // If we found a registered dialect, then ask it to parse the type.

         if (auto *dialect = ctx->getOrLoadDialect(dialectName)) {

           // Temporarily reset the lexer to let the dialect parse the type.

           const char *curLexerPos = getToken().getLoc().getPointer();

           resetToken(symbolData.data());


           // Parse the type.

           CustomDialectAsmParser customParser(symbolData, *this);

           Type type = dialect->parseType(customParser);

           resetToken(curLexerPos);

           return type;

         }


         // Otherwise, form a new opaque type.

         return OpaqueType::getChecked([&] { return emitError(loc); },

                                       StringAttr::get(ctx, dialectName),

                                       symbolData);

       });

 }


 //===----------------------------------------------------------------------===//

 // mlir::parseAttribute/parseType

 //===----------------------------------------------------------------------===//


 /// Parses a symbol, of type 'T', and returns it if parsing was successful. If

 /// parsing failed, nullptr is returned.

 template <typename T, typename ParserFn>

 static T parseSymbol(StringRef inputStr, MLIRContext *context,

                      size_t *numReadOut, bool isKnownNullTerminated,

                      ParserFn &&parserFn) {

   // Set the buffer name to the string being parsed, so that it appears in error

   // diagnostics.

   auto memBuffer =

       isKnownNullTerminated

           ? MemoryBuffer::getMemBuffer(inputStr,

                                        /*BufferName=*/inputStr)

           : MemoryBuffer::getMemBufferCopy(inputStr, /*BufferName=*/inputStr);

   SourceMgr sourceMgr;

   sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());

   SymbolState aliasState;

   ParserConfig config(context);

   ParserState state(sourceMgr, config, aliasState, /*asmState=*/nullptr,

                     /*codeCompleteContext=*/nullptr);

   Parser parser(state);


   Token startTok = parser.getToken();

   T symbol = parserFn(parser);

   if (!symbol)

     return T();


   // Provide the number of bytes that were read.

   Token endTok = parser.getToken();

   size_t numRead =

       endTok.getLoc().getPointer() - startTok.getLoc().getPointer();

   if (numReadOut) {

     *numReadOut = numRead;

   } else if (numRead != inputStr.size()) {

     parser.emitError(endTok.getLoc()) << "found trailing characters: '"

                                       << inputStr.drop_front(numRead) << "'";

     return T();

   }

   return symbol;

 }


 Attribute mlir::parseAttribute(StringRef attrStr, MLIRContext *context,

                                Type type, size_t *numRead,

                                bool isKnownNullTerminated) {

   return parseSymbol<Attribute>(

       attrStr, context, numRead, isKnownNullTerminated,

       [type](Parser &parser) { return parser.parseAttribute(type); });

 }

 Type mlir::parseType(StringRef typeStr, MLIRContext *context, size_t *numRead,

                      bool isKnownNullTerminated) {

   return parseSymbol<Type>(typeStr, context, numRead, isKnownNullTerminated,

                            [](Parser &parser) { return parser.parseType(); });

 }

AsmParserImpl.h

AsmParserState.h

AsmState.h

Attributes.h

BuiltinAttributeInterfaces.h

DialectImplementation.h

parseExtendedSymbol
static Symbol parseExtendedSymbol(Parser &p, AsmParserState *asmState, SymbolAliasMap &aliases, CreateFn &&createSymbol)
Parse an extended dialect symbol.
Definition: DialectSymbolParser.cpp:179

parseSymbol
static T parseSymbol(StringRef inputStr, MLIRContext *context, size_t *numReadOut, bool isKnownNullTerminated, ParserFn &&parserFn)
Parses a symbol, of type 'T', and returns it if parsing was successful.
Definition: DialectSymbolParser.cpp:338

Dialect.h

getContext
static MLIRContext * getContext(OpFoldResult val)
Definition: IndexingUtils.cpp:296

MLIRContext.h

llvm::SmallVector
Definition: LLVM.h:72

mlir::AsmParserState
This class represents state from a parsed MLIR textual format string.
Definition: AsmParserState.h:29

mlir::AsmParserState::addTypeAliasUses
void addTypeAliasUses(StringRef name, SMRange locations)
Definition: AsmParserState.cpp:418

mlir::AsmParserState::addAttrAliasUses
void addAttrAliasUses(StringRef name, SMRange locations)
Definition: AsmParserState.cpp:406

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::DialectAsmParser
The DialectAsmParser has methods for interacting with the asm parser when parsing attributes and type...
Definition: DialectImplementation.h:56

mlir::Lexer::getBufferEnd
const char * getBufferEnd()
Returns the end of the buffer.
Definition: Lexer.h:44

mlir::Lexer::getCodeCompleteLoc
const char * getCodeCompleteLoc() const
Return the code completion location of the lexer, or nullptr if there is none.
Definition: Lexer.h:48

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::ParserConfig
This class represents a configuration for the MLIR assembly parser.
Definition: AsmState.h:469

mlir::Token
This represents a token in the MLIR syntax.
Definition: Token.h:20

mlir::Token::getLocRange
SMRange getLocRange() const
Definition: Token.cpp:30

mlir::Token::getLoc
SMLoc getLoc() const
Definition: Token.cpp:24

mlir::Token::is
bool is(Kind k) const
Definition: Token.h:38

mlir::Token::getEndLoc
SMLoc getEndLoc() const
Definition: Token.cpp:26

mlir::Token::isNot
bool isNot(Kind k) const
Definition: Token.h:50

mlir::Token::isCodeCompletion
bool isCodeCompletion() const
Returns true if the current token represents a code completion.
Definition: Token.h:62

mlir::Token::getSpelling
StringRef getSpelling() const
Definition: Token.h:34

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::detail::AsmParserImpl
This class provides the implementation of the generic parser methods within AsmParser.
Definition: AsmParserImpl.h:28

mlir::detail::Parser
This class implement support for parsing global entities like attributes and types.
Definition: Parser.h:27

mlir::detail::Parser::parseType
Type parseType()
Parse an arbitrary type.
Definition: TypeParser.cpp:74

mlir::detail::Parser::emitError
InFlightDiagnostic emitError(const Twine &message={})
Emit an error and return failure.
Definition: Parser.cpp:192

mlir::detail::Parser::state
ParserState & state
The Parser is subclassed and reinstantiated.
Definition: Parser.h:370

mlir::detail::Parser::parseAttribute
Attribute parseAttribute(Type type={})
Parse an arbitrary attribute with an optional type.
Definition: AttributeParser.cpp:48

mlir::detail::Parser::getTokenSpelling
StringRef getTokenSpelling() const
Definition: Parser.h:104

mlir::detail::Parser::consumeToken
void consumeToken()
Advance the current lexer onto the next token.
Definition: Parser.h:119

mlir::detail::Parser::parseDialectSymbolBody
ParseResult parseDialectSymbolBody(StringRef &body, bool &isCodeCompletion)
Parse the body of a dialect symbol, which starts and ends with <>'s, and may be recursive.
Definition: DialectSymbolParser.cpp:66

mlir::detail::Parser::getContext
MLIRContext * getContext() const
Definition: Parser.h:38

mlir::detail::Parser::emitWrongTokenError
InFlightDiagnostic emitWrongTokenError(const Twine &message={})
Emit an error about a "wrong token".
Definition: Parser.cpp:215

mlir::detail::Parser::resetToken
void resetToken(const char *tokPos)
Reset the parser to the given lexer position.
Definition: Parser.h:140

mlir::detail::Parser::parseExtendedAttr
Attribute parseExtendedAttr(Type type)
Parse an extended attribute.
Definition: DialectSymbolParser.cpp:259

mlir::detail::Parser::getToken
const Token & getToken() const
Return the current token the parser is inspecting.
Definition: Parser.h:103

mlir::detail::Parser::codeCompleteDialectSymbol
Attribute codeCompleteDialectSymbol(const llvm::StringMap< Attribute > &aliases)
Definition: Parser.cpp:555

Parser.h

BuiltinAttributes.h

BuiltinTypes.h

LLVM.h

mlir::detail
AttrTypeReplacer.
Definition: AliasAnalysis.h:174

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::config
const FrozenRewritePatternSet GreedyRewriteConfig config
Definition: GreedyPatternRewriteDriver.h:284

mlir::emitError
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
Definition: Diagnostics.cpp:328

mlir::parseAttribute
Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context, Type type={}, size_t *numRead=nullptr, bool isKnownNullTerminated=false)
This parses a single MLIR attribute to an MLIR context if it was valid.

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::parseType
Type parseType(llvm::StringRef typeStr, MLIRContext *context, size_t *numRead=nullptr, bool isKnownNullTerminated=false)
This parses a single MLIR type to an MLIR context if it was valid.

mlir::detail::ParserState
This class refers to all of the state maintained globally by the parser, such as the current lexer po...
Definition: ParserState.h:51

mlir::detail::ParserState::symbols
SymbolState & symbols
The current state for symbol parsing.
Definition: ParserState.h:75

mlir::detail::ParserState::lex
Lexer lex
The lexer for the source file we're parsing.
Definition: ParserState.h:66

mlir::detail::ParserState::curToken
Token curToken
This is the next token that hasn't been consumed yet.
Definition: ParserState.h:69

mlir::detail::ParserState::asmState
AsmParserState * asmState
An optional pointer to a struct containing high level parser state to be populated during parsing.
Definition: ParserState.h:83

mlir::detail::SymbolState
This class contains record of any parsed top-level symbols.
Definition: ParserState.h:28

mlir::detail::SymbolState::attributeAliasDefinitions
llvm::StringMap< Attribute > attributeAliasDefinitions
A map from attribute alias identifier to Attribute.
Definition: ParserState.h:30