MLIR  18.0.0git
Token.cpp
Go to the documentation of this file.
1 //===- Token.cpp - MLIR Token Implementation ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Token class for the MLIR textual form.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "Token.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include <optional>
16 
17 using namespace mlir;
18 
19 SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }
20 
21 SMLoc Token::getEndLoc() const {
22  return SMLoc::getFromPointer(spelling.data() + spelling.size());
23 }
24 
25 SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
26 
27 /// For an integer token, return its value as an unsigned. If it doesn't fit,
28 /// return std::nullopt.
29 std::optional<unsigned> Token::getUnsignedIntegerValue() const {
30  bool isHex = spelling.size() > 1 && spelling[1] == 'x';
31 
32  unsigned result = 0;
33  if (spelling.getAsInteger(isHex ? 0 : 10, result))
34  return std::nullopt;
35  return result;
36 }
37 
38 /// For an integer token, return its value as a uint64_t. If it doesn't fit,
39 /// return std::nullopt.
40 std::optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) {
41  bool isHex = spelling.size() > 1 && spelling[1] == 'x';
42 
43  uint64_t result = 0;
44  if (spelling.getAsInteger(isHex ? 0 : 10, result))
45  return std::nullopt;
46  return result;
47 }
48 
49 /// For a floatliteral, return its value as a double. Return std::nullopt if the
50 /// value underflows or overflows.
51 std::optional<double> Token::getFloatingPointValue() const {
52  double result = 0;
53  if (spelling.getAsDouble(result))
54  return std::nullopt;
55  return result;
56 }
57 
58 /// For an inttype token, return its bitwidth.
59 std::optional<unsigned> Token::getIntTypeBitwidth() const {
60  assert(getKind() == inttype);
61  unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2);
62  unsigned result = 0;
63  if (spelling.drop_front(bitwidthStart).getAsInteger(10, result))
64  return std::nullopt;
65  return result;
66 }
67 
68 std::optional<bool> Token::getIntTypeSignedness() const {
69  assert(getKind() == inttype);
70  if (spelling[0] == 'i')
71  return std::nullopt;
72  if (spelling[0] == 's')
73  return true;
74  assert(spelling[0] == 'u');
75  return false;
76 }
77 
78 /// Given a token containing a string literal, return its value, including
79 /// removing the quote characters and unescaping the contents of the string. The
80 /// lexer has already verified that this token is valid.
81 std::string Token::getStringValue() const {
82  assert(getKind() == string || getKind() == code_complete ||
83  (getKind() == at_identifier && getSpelling()[1] == '"'));
84  // Start by dropping the quotes.
85  StringRef bytes = getSpelling().drop_front();
86  if (getKind() != Token::code_complete) {
87  bytes = bytes.drop_back();
88  if (getKind() == at_identifier)
89  bytes = bytes.drop_front();
90  }
91 
92  std::string result;
93  result.reserve(bytes.size());
94  for (unsigned i = 0, e = bytes.size(); i != e;) {
95  auto c = bytes[i++];
96  if (c != '\\') {
97  result.push_back(c);
98  continue;
99  }
100 
101  assert(i + 1 <= e && "invalid string should be caught by lexer");
102  auto c1 = bytes[i++];
103  switch (c1) {
104  case '"':
105  case '\\':
106  result.push_back(c1);
107  continue;
108  case 'n':
109  result.push_back('\n');
110  continue;
111  case 't':
112  result.push_back('\t');
113  continue;
114  default:
115  break;
116  }
117 
118  assert(i + 1 <= e && "invalid string should be caught by lexer");
119  auto c2 = bytes[i++];
120 
121  assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
122  result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
123  }
124 
125  return result;
126 }
127 
128 /// Given a token containing a hex string literal, return its value or
129 /// std::nullopt if the token does not contain a valid hex string.
130 std::optional<std::string> Token::getHexStringValue() const {
131  assert(getKind() == string);
132 
133  // Get the internal string data, without the quotes.
134  StringRef bytes = getSpelling().drop_front().drop_back();
135 
136  // Try to extract the binary data from the hex string. We expect the hex
137  // string to start with `0x` and have an even number of hex nibbles (nibbles
138  // should come in pairs).
139  std::string hex;
140  if (!bytes.consume_front("0x") || (bytes.size() & 1) ||
141  !llvm::tryGetFromHex(bytes, hex))
142  return std::nullopt;
143  return hex;
144 }
145 
146 /// Given a token containing a symbol reference, return the unescaped string
147 /// value.
148 std::string Token::getSymbolReference() const {
149  assert(is(Token::at_identifier) && "expected valid @-identifier");
150  StringRef nameStr = getSpelling().drop_front();
151 
152  // Check to see if the reference is a string literal, or a bare identifier.
153  if (nameStr.front() == '"')
154  return getStringValue();
155  return std::string(nameStr);
156 }
157 
158 /// Given a hash_identifier token like #123, try to parse the number out of
159 /// the identifier, returning std::nullopt if it is a named identifier like #x
160 /// or if the integer doesn't fit.
161 std::optional<unsigned> Token::getHashIdentifierNumber() const {
162  assert(getKind() == hash_identifier);
163  unsigned result = 0;
164  if (spelling.drop_front().getAsInteger(10, result))
165  return std::nullopt;
166  return result;
167 }
168 
169 /// Given a punctuation or keyword token kind, return the spelling of the
170 /// token as a string. Warning: This will abort on markers, identifiers and
171 /// literal tokens since they have no fixed spelling.
172 StringRef Token::getTokenSpelling(Kind kind) {
173  switch (kind) {
174  default:
175  llvm_unreachable("This token kind has no fixed spelling");
176 #define TOK_PUNCTUATION(NAME, SPELLING) \
177  case NAME: \
178  return SPELLING;
179 #define TOK_KEYWORD(SPELLING) \
180  case kw_##SPELLING: \
181  return #SPELLING;
182 #include "TokenKinds.def"
183  }
184 }
185 
186 /// Return true if this is one of the keyword token kinds (e.g. kw_if).
187 bool Token::isKeyword() const {
188  switch (kind) {
189  default:
190  return false;
191 #define TOK_KEYWORD(SPELLING) \
192  case kw_##SPELLING: \
193  return true;
194 #include "TokenKinds.def"
195  }
196 }
197 
199  if (!isCodeCompletion() || spelling.empty())
200  return false;
201  switch (kind) {
202  case Kind::string:
203  return spelling[0] == '"';
204  case Kind::hash_identifier:
205  return spelling[0] == '#';
206  case Kind::percent_identifier:
207  return spelling[0] == '%';
208  case Kind::caret_identifier:
209  return spelling[0] == '^';
210  case Kind::exclamation_identifier:
211  return spelling[0] == '!';
212  default:
213  return false;
214  }
215 }
bool isCodeCompletionFor(Kind kind) const
Returns true if the current token represents a code completion for the "normal" token type.
Definition: Token.cpp:198
SMRange getLocRange() const
Definition: Token.cpp:25
bool isKeyword() const
Return true if this is one of the keyword token kinds (e.g. kw_if).
Definition: Token.cpp:187
static StringRef getTokenSpelling(Kind kind)
Given a punctuation or keyword token kind, return the spelling of the token as a string.
Definition: Token.cpp:172
SMLoc getLoc() const
Definition: Token.cpp:19
bool is(Kind k) const
Definition: Token.h:38
std::string getStringValue() const
Given a token containing a string literal, return its value, including removing the quote characters ...
Definition: Token.cpp:81
std::string getSymbolReference() const
Given a token containing a symbol reference, return the unescaped string value.
Definition: Token.cpp:148
std::optional< unsigned > getUnsignedIntegerValue() const
For an integer token, return its value as an unsigned.
Definition: Token.cpp:29
std::optional< uint64_t > getUInt64IntegerValue() const
Definition: Token.h:83
std::optional< double > getFloatingPointValue() const
For a floatliteral token, return its value as a double.
Definition: Token.cpp:51
Kind getKind() const
Definition: Token.h:37
SMLoc getEndLoc() const
Definition: Token.cpp:21
std::optional< unsigned > getHashIdentifierNumber() const
Given a hash_identifier token like #123, try to parse the number out of the identifier,...
Definition: Token.cpp:161
bool isCodeCompletion() const
Returns true if the current token represents a code completion.
Definition: Token.h:62
StringRef getSpelling() const
Definition: Token.h:34
std::optional< bool > getIntTypeSignedness() const
For an inttype token, return its signedness semantics: std::nullopt means no signedness semantics; tr...
Definition: Token.cpp:68
std::optional< unsigned > getIntTypeBitwidth() const
For an inttype token, return its bitwidth.
Definition: Token.cpp:59
std::optional< std::string > getHexStringValue() const
Given a token containing a hex string literal, return its value or std::nullopt if the token does not...
Definition: Token.cpp:130
This header declares functions that assist transformations in the MemRef dialect.