MLIR  19.0.0git
Lexer.h
Go to the documentation of this file.
1 //===- Lexer.h - MLIR PDLL Frontend Lexer -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LIB_TOOLS_PDLL_PARSER_LEXER_H_
10 #define LIB_TOOLS_PDLL_PARSER_LEXER_H_
11 
12 #include "mlir/Support/LLVM.h"
13 #include "llvm/ADT/StringRef.h"
14 #include "llvm/Support/SMLoc.h"
15 
16 namespace llvm {
17 class SourceMgr;
18 } // namespace llvm
19 
20 namespace mlir {
21 struct LogicalResult;
22 
23 namespace pdll {
24 class CodeCompleteContext;
25 
26 namespace ast {
27 class DiagnosticEngine;
28 } // namespace ast
29 
30 //===----------------------------------------------------------------------===//
31 // Token
32 //===----------------------------------------------------------------------===//
33 
34 class Token {
35 public:
36  enum Kind {
37  /// Markers.
38  eof,
40  /// Token signifying a code completion location.
42  /// Token signifying a code completion location within a string.
44 
45  /// Keywords.
47  /// Dependent keywords, i.e. those that are treated as keywords depending on
48  /// the current parser context.
54 
55  /// General keywords.
74 
75  /// Punctuation.
79  dot,
83  /// Paired punctuation.
93 
94  /// Tokens.
99  string
100  };
101  Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}
102 
103  /// Given a token containing a string literal, return its value, including
104  /// removing the quote characters and unescaping the contents of the string.
105  std::string getStringValue() const;
106 
107  /// Returns true if the current token is a string literal.
108  bool isString() const { return isAny(Token::string, Token::string_block); }
109 
110  /// Returns true if the current token is a keyword.
111  bool isKeyword() const {
112  return kind > Token::KW_BEGIN && kind < Token::KW_END;
113  }
114 
115  /// Returns true if the current token is a keyword in a dependent context, and
116  /// in any other situation (e.g. variable names) may be treated as an
117  /// identifier.
118  bool isDependentKeyword() const {
119  return kind > Token::KW_DEPENDENT_BEGIN && kind < Token::KW_DEPENDENT_END;
120  }
121 
122  /// Return the bytes that make up this token.
123  StringRef getSpelling() const { return spelling; }
124 
125  /// Return the kind of this token.
126  Kind getKind() const { return kind; }
127 
128  /// Return true if this token is one of the specified kinds.
129  bool isAny(Kind k1, Kind k2) const { return is(k1) || is(k2); }
130  template <typename... T>
131  bool isAny(Kind k1, Kind k2, Kind k3, T... others) const {
132  return is(k1) || isAny(k2, k3, others...);
133  }
134 
135  /// Return if the token does not have the given kind.
136  bool isNot(Kind k) const { return k != kind; }
137  template <typename... T>
138  bool isNot(Kind k1, Kind k2, T... others) const {
139  return !isAny(k1, k2, others...);
140  }
141 
142  /// Return if the token has the given kind.
143  bool is(Kind k) const { return kind == k; }
144 
145  /// Return a location for the start of this token.
146  SMLoc getStartLoc() const { return SMLoc::getFromPointer(spelling.data()); }
147  /// Return a location at the end of this token.
148  SMLoc getEndLoc() const {
149  return SMLoc::getFromPointer(spelling.data() + spelling.size());
150  }
151  /// Return a location for the range of this token.
152  SMRange getLoc() const { return SMRange(getStartLoc(), getEndLoc()); }
153 
154 private:
155  /// Discriminator that indicates the kind of token this is.
156  Kind kind;
157 
158  /// A reference to the entire token contents; this is always a pointer into
159  /// a memory buffer owned by the source manager.
160  StringRef spelling;
161 };
162 
163 //===----------------------------------------------------------------------===//
164 // Lexer
165 //===----------------------------------------------------------------------===//
166 
167 class Lexer {
168 public:
169  Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine,
170  CodeCompleteContext *codeCompleteContext);
171  ~Lexer();
172 
173  /// Return a reference to the source manager used by the lexer.
174  llvm::SourceMgr &getSourceMgr() { return srcMgr; }
175 
176  /// Return a reference to the diagnostic engine used by the lexer.
177  ast::DiagnosticEngine &getDiagEngine() { return diagEngine; }
178 
179  /// Push an include of the given file. This will cause the lexer to start
180  /// processing the provided file. Returns failure if the file could not be
181  /// opened, success otherwise.
182  LogicalResult pushInclude(StringRef filename, SMRange includeLoc);
183 
184  /// Lex the next token and return it.
186 
187  /// Change the position of the lexer cursor. The next token we lex will start
188  /// at the designated point in the input.
189  void resetPointer(const char *newPointer) { curPtr = newPointer; }
190 
191  /// Emit an error to the lexer with the given location and message.
192  Token emitError(SMRange loc, const Twine &msg);
193  Token emitError(const char *loc, const Twine &msg);
194  Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc,
195  const Twine &note);
196 
197 private:
198  Token formToken(Token::Kind kind, const char *tokStart) {
199  return Token(kind, StringRef(tokStart, curPtr - tokStart));
200  }
201 
202  /// Return the next character in the stream.
203  int getNextChar();
204 
205  /// Lex methods.
206  void lexComment();
207  Token lexDirective(const char *tokStart);
208  Token lexIdentifier(const char *tokStart);
209  Token lexNumber(const char *tokStart);
210  Token lexString(const char *tokStart, bool isStringBlock);
211 
212  llvm::SourceMgr &srcMgr;
213  int curBufferID;
214  StringRef curBuffer;
215  const char *curPtr;
216 
217  /// The engine used to emit diagnostics during lexing/parsing.
218  ast::DiagnosticEngine &diagEngine;
219 
220  /// A flag indicating if we added a default diagnostic handler to the provided
221  /// diagEngine.
222  bool addedHandlerToDiagEngine;
223 
224  /// The optional code completion point within the input file.
225  const char *codeCompletionLocation;
226 };
227 } // namespace pdll
228 } // namespace mlir
229 
230 #endif // LIB_TOOLS_PDLL_PARSER_LEXER_H_
This class provides an abstract interface into the parser for hooking in code completion events.
Definition: CodeComplete.h:30
Token emitError(SMRange loc, const Twine &msg)
Emit an error to the lexer with the given location and message.
Definition: Lexer.cpp:121
Token emitError(const char *loc, const Twine &msg)
Token emitErrorAndNote(SMRange loc, const Twine &msg, SMRange noteLoc, const Twine &note)
Definition: Lexer.cpp:125
Token lexToken()
Lex the next token and return it.
llvm::SourceMgr & getSourceMgr()
Return a reference to the source manager used by the lexer.
Definition: Lexer.h:174
ast::DiagnosticEngine & getDiagEngine()
Return a reference to the diagnostic engine used by the lexer.
Definition: Lexer.h:177
void resetPointer(const char *newPointer)
Change the position of the lexer cursor.
Definition: Lexer.h:189
LogicalResult pushInclude(StringRef filename, SMRange includeLoc)
Push an include of the given file.
Definition: Lexer.cpp:108
Lexer(llvm::SourceMgr &mgr, ast::DiagnosticEngine &diagEngine, CodeCompleteContext *codeCompleteContext)
Definition: Lexer.cpp:75
bool isNot(Kind k) const
Return if the token does not have the given kind.
Definition: Lexer.h:136
bool isNot(Kind k1, Kind k2, T... others) const
Definition: Lexer.h:138
bool is(Kind k) const
Return if the token has the given kind.
Definition: Lexer.h:143
StringRef getSpelling() const
Return the bytes that make up this token.
Definition: Lexer.h:123
SMRange getLoc() const
Return a location for the range of this token.
Definition: Lexer.h:152
Token(Kind kind, StringRef spelling)
Definition: Lexer.h:101
bool isString() const
Returns true if the current token is a string literal.
Definition: Lexer.h:108
bool isDependentKeyword() const
Returns true if the current token is a keyword in a dependent context, and in any other situation (e....
Definition: Lexer.h:118
@ code_complete_string
Token signifying a code completion location within a string.
Definition: Lexer.h:43
@ directive
Tokens.
Definition: Lexer.h:95
@ eof
Markers.
Definition: Lexer.h:38
@ code_complete
Token signifying a code completion location.
Definition: Lexer.h:41
@ arrow
Punctuation.
Definition: Lexer.h:76
@ KW_BEGIN
Keywords.
Definition: Lexer.h:46
@ KW_DEPENDENT_BEGIN
Dependent keywords, i.e.
Definition: Lexer.h:49
@ less
Paired punctuation.
Definition: Lexer.h:84
@ KW_DEPENDENT_END
Definition: Lexer.h:53
@ kw_Attr
General keywords.
Definition: Lexer.h:56
SMLoc getEndLoc() const
Return a location at the end of this token.
Definition: Lexer.h:148
SMLoc getStartLoc() const
Return a location for the start of this token.
Definition: Lexer.h:146
bool isAny(Kind k1, Kind k2, Kind k3, T... others) const
Definition: Lexer.h:131
bool isAny(Kind k1, Kind k2) const
Return true if this token is one of the specified kinds.
Definition: Lexer.h:129
bool isKeyword() const
Returns true if the current token is a keyword.
Definition: Lexer.h:111
Kind getKind() const
Return the kind of this token.
Definition: Lexer.h:126
std::string getStringValue() const
Given a token containing a string literal, return its value, including removing the quote characters ...
This class manages the construction and emission of PDLL diagnostics.
Definition: Diagnostic.h:140
Include the generated interface declarations.
Definition: CallGraph.h:229
Include the generated interface declarations.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26