MLIR  19.0.0git
Token.cpp
Go to the documentation of this file.
1 //===- Token.cpp - MLIR Token Implementation ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Token class for the MLIR textual form.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "Token.h"
14 #include "mlir/Support/LLVM.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/Support/ErrorHandling.h"
17 #include <cassert>
18 #include <cstdint>
19 #include <optional>
20 #include <string>
21 
22 using namespace mlir;
23 
24 SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }
25 
26 SMLoc Token::getEndLoc() const {
27  return SMLoc::getFromPointer(spelling.data() + spelling.size());
28 }
29 
30 SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
31 
32 /// For an integer token, return its value as an unsigned. If it doesn't fit,
33 /// return std::nullopt.
34 std::optional<unsigned> Token::getUnsignedIntegerValue() const {
35  bool isHex = spelling.size() > 1 && spelling[1] == 'x';
36 
37  unsigned result = 0;
38  if (spelling.getAsInteger(isHex ? 0 : 10, result))
39  return std::nullopt;
40  return result;
41 }
42 
43 /// For an integer token, return its value as a uint64_t. If it doesn't fit,
44 /// return std::nullopt.
45 std::optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) {
46  bool isHex = spelling.size() > 1 && spelling[1] == 'x';
47 
48  uint64_t result = 0;
49  if (spelling.getAsInteger(isHex ? 0 : 10, result))
50  return std::nullopt;
51  return result;
52 }
53 
54 /// For a floatliteral, return its value as a double. Return std::nullopt if the
55 /// value underflows or overflows.
56 std::optional<double> Token::getFloatingPointValue() const {
57  double result = 0;
58  if (spelling.getAsDouble(result))
59  return std::nullopt;
60  return result;
61 }
62 
63 /// For an inttype token, return its bitwidth.
64 std::optional<unsigned> Token::getIntTypeBitwidth() const {
65  assert(getKind() == inttype);
66  unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2);
67  unsigned result = 0;
68  if (spelling.drop_front(bitwidthStart).getAsInteger(10, result))
69  return std::nullopt;
70  return result;
71 }
72 
73 std::optional<bool> Token::getIntTypeSignedness() const {
74  assert(getKind() == inttype);
75  if (spelling[0] == 'i')
76  return std::nullopt;
77  if (spelling[0] == 's')
78  return true;
79  assert(spelling[0] == 'u');
80  return false;
81 }
82 
83 /// Given a token containing a string literal, return its value, including
84 /// removing the quote characters and unescaping the contents of the string. The
85 /// lexer has already verified that this token is valid.
86 std::string Token::getStringValue() const {
87  assert(getKind() == string || getKind() == code_complete ||
88  (getKind() == at_identifier && getSpelling()[1] == '"'));
89  // Start by dropping the quotes.
90  StringRef bytes = getSpelling().drop_front();
91  if (getKind() != Token::code_complete) {
92  bytes = bytes.drop_back();
93  if (getKind() == at_identifier)
94  bytes = bytes.drop_front();
95  }
96 
97  std::string result;
98  result.reserve(bytes.size());
99  for (unsigned i = 0, e = bytes.size(); i != e;) {
100  auto c = bytes[i++];
101  if (c != '\\') {
102  result.push_back(c);
103  continue;
104  }
105 
106  assert(i + 1 <= e && "invalid string should be caught by lexer");
107  auto c1 = bytes[i++];
108  switch (c1) {
109  case '"':
110  case '\\':
111  result.push_back(c1);
112  continue;
113  case 'n':
114  result.push_back('\n');
115  continue;
116  case 't':
117  result.push_back('\t');
118  continue;
119  default:
120  break;
121  }
122 
123  assert(i + 1 <= e && "invalid string should be caught by lexer");
124  auto c2 = bytes[i++];
125 
126  assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
127  result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
128  }
129 
130  return result;
131 }
132 
133 /// Given a token containing a hex string literal, return its value or
134 /// std::nullopt if the token does not contain a valid hex string.
135 std::optional<std::string> Token::getHexStringValue() const {
136  assert(getKind() == string);
137 
138  // Get the internal string data, without the quotes.
139  StringRef bytes = getSpelling().drop_front().drop_back();
140 
141  // Try to extract the binary data from the hex string. We expect the hex
142  // string to start with `0x` and have an even number of hex nibbles (nibbles
143  // should come in pairs).
144  std::string hex;
145  if (!bytes.consume_front("0x") || (bytes.size() & 1) ||
146  !llvm::tryGetFromHex(bytes, hex))
147  return std::nullopt;
148  return hex;
149 }
150 
151 /// Given a token containing a symbol reference, return the unescaped string
152 /// value.
153 std::string Token::getSymbolReference() const {
154  assert(is(Token::at_identifier) && "expected valid @-identifier");
155  StringRef nameStr = getSpelling().drop_front();
156 
157  // Check to see if the reference is a string literal, or a bare identifier.
158  if (nameStr.front() == '"')
159  return getStringValue();
160  return std::string(nameStr);
161 }
162 
163 /// Given a hash_identifier token like #123, try to parse the number out of
164 /// the identifier, returning std::nullopt if it is a named identifier like #x
165 /// or if the integer doesn't fit.
166 std::optional<unsigned> Token::getHashIdentifierNumber() const {
167  assert(getKind() == hash_identifier);
168  unsigned result = 0;
169  if (spelling.drop_front().getAsInteger(10, result))
170  return std::nullopt;
171  return result;
172 }
173 
174 /// Given a punctuation or keyword token kind, return the spelling of the
175 /// token as a string. Warning: This will abort on markers, identifiers and
176 /// literal tokens since they have no fixed spelling.
177 StringRef Token::getTokenSpelling(Kind kind) {
178  switch (kind) {
179  default:
180  llvm_unreachable("This token kind has no fixed spelling");
181 #define TOK_PUNCTUATION(NAME, SPELLING) \
182  case NAME: \
183  return SPELLING;
184 #define TOK_KEYWORD(SPELLING) \
185  case kw_##SPELLING: \
186  return #SPELLING;
187 #include "TokenKinds.def"
188  }
189 }
190 
191 /// Return true if this is one of the keyword token kinds (e.g. kw_if).
192 bool Token::isKeyword() const {
193  switch (kind) {
194  default:
195  return false;
196 #define TOK_KEYWORD(SPELLING) \
197  case kw_##SPELLING: \
198  return true;
199 #include "TokenKinds.def"
200  }
201 }
202 
204  if (!isCodeCompletion() || spelling.empty())
205  return false;
206  switch (kind) {
207  case Kind::string:
208  return spelling[0] == '"';
209  case Kind::hash_identifier:
210  return spelling[0] == '#';
211  case Kind::percent_identifier:
212  return spelling[0] == '%';
213  case Kind::caret_identifier:
214  return spelling[0] == '^';
215  case Kind::exclamation_identifier:
216  return spelling[0] == '!';
217  default:
218  return false;
219  }
220 }
bool isCodeCompletionFor(Kind kind) const
Returns true if the current token represents a code completion for the "normal" token type.
Definition: Token.cpp:203
SMRange getLocRange() const
Definition: Token.cpp:30
bool isKeyword() const
Return true if this is one of the keyword token kinds (e.g. kw_if).
Definition: Token.cpp:192
static StringRef getTokenSpelling(Kind kind)
Given a punctuation or keyword token kind, return the spelling of the token as a string.
Definition: Token.cpp:177
SMLoc getLoc() const
Definition: Token.cpp:24
bool is(Kind k) const
Definition: Token.h:38
std::string getStringValue() const
Given a token containing a string literal, return its value, including removing the quote characters ...
Definition: Token.cpp:86
std::string getSymbolReference() const
Given a token containing a symbol reference, return the unescaped string value.
Definition: Token.cpp:153
std::optional< unsigned > getUnsignedIntegerValue() const
For an integer token, return its value as an unsigned.
Definition: Token.cpp:34
std::optional< uint64_t > getUInt64IntegerValue() const
Definition: Token.h:83
std::optional< double > getFloatingPointValue() const
For a floatliteral token, return its value as a double.
Definition: Token.cpp:56
Kind getKind() const
Definition: Token.h:37
SMLoc getEndLoc() const
Definition: Token.cpp:26
std::optional< unsigned > getHashIdentifierNumber() const
Given a hash_identifier token like #123, try to parse the number out of the identifier,...
Definition: Token.cpp:166
bool isCodeCompletion() const
Returns true if the current token represents a code completion.
Definition: Token.h:62
StringRef getSpelling() const
Definition: Token.h:34
std::optional< bool > getIntTypeSignedness() const
For an inttype token, return its signedness semantics: std::nullopt means no signedness semantics; tr...
Definition: Token.cpp:73
std::optional< unsigned > getIntTypeBitwidth() const
For an inttype token, return its bitwidth.
Definition: Token.cpp:64
std::optional< std::string > getHexStringValue() const
Given a token containing a hex string literal, return its value or std::nullopt if the token does not...
Definition: Token.cpp:135
Include the generated interface declarations.