MLIR  18.0.0git
BytecodeReader.cpp
Go to the documentation of this file.
1 //===- BytecodeReader.cpp - MLIR Bytecode Reader --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
13 #include "mlir/Bytecode/Encoding.h"
14 #include "mlir/IR/BuiltinOps.h"
15 #include "mlir/IR/Diagnostics.h"
17 #include "mlir/IR/Verifier.h"
18 #include "mlir/IR/Visitors.h"
19 #include "mlir/Support/LLVM.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/ScopeExit.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/MemoryBufferRef.h"
27 #include "llvm/Support/SourceMgr.h"
28 
29 #include <cstddef>
30 #include <list>
31 #include <memory>
32 #include <numeric>
33 #include <optional>
34 
35 #define DEBUG_TYPE "mlir-bytecode-reader"
36 
37 using namespace mlir;
38 
39 /// Stringify the given section ID.
40 static std::string toString(bytecode::Section::ID sectionID) {
41  switch (sectionID) {
43  return "String (0)";
45  return "Dialect (1)";
47  return "AttrType (2)";
49  return "AttrTypeOffset (3)";
51  return "IR (4)";
53  return "Resource (5)";
55  return "ResourceOffset (6)";
57  return "DialectVersions (7)";
59  return "Properties (8)";
60  default:
61  return ("Unknown (" + Twine(static_cast<unsigned>(sectionID)) + ")").str();
62  }
63 }
64 
65 /// Returns true if the given top-level section ID is optional.
66 static bool isSectionOptional(bytecode::Section::ID sectionID, int version) {
67  switch (sectionID) {
73  return false;
77  return true;
79  return version < bytecode::kNativePropertiesEncoding;
80  default:
81  llvm_unreachable("unknown section ID");
82  }
83 }
84 
85 //===----------------------------------------------------------------------===//
86 // EncodingReader
87 //===----------------------------------------------------------------------===//
88 
89 namespace {
90 class EncodingReader {
91 public:
92  explicit EncodingReader(ArrayRef<uint8_t> contents, Location fileLoc)
93  : buffer(contents), dataIt(buffer.begin()), fileLoc(fileLoc) {}
94  explicit EncodingReader(StringRef contents, Location fileLoc)
95  : EncodingReader({reinterpret_cast<const uint8_t *>(contents.data()),
96  contents.size()},
97  fileLoc) {}
98 
99  /// Returns true if the entire section has been read.
100  bool empty() const { return dataIt == buffer.end(); }
101 
102  /// Returns the remaining size of the bytecode.
103  size_t size() const { return buffer.end() - dataIt; }
104 
105  /// Align the current reader position to the specified alignment.
106  LogicalResult alignTo(unsigned alignment) {
107  if (!llvm::isPowerOf2_32(alignment))
108  return emitError("expected alignment to be a power-of-two");
109 
110  auto isUnaligned = [&](const uint8_t *ptr) {
111  return ((uintptr_t)ptr & (alignment - 1)) != 0;
112  };
113 
114  // Shift the reader position to the next alignment boundary.
115  while (isUnaligned(dataIt)) {
116  uint8_t padding;
117  if (failed(parseByte(padding)))
118  return failure();
119  if (padding != bytecode::kAlignmentByte) {
120  return emitError("expected alignment byte (0xCB), but got: '0x" +
121  llvm::utohexstr(padding) + "'");
122  }
123  }
124 
125  // Ensure the data iterator is now aligned. This case is unlikely because we
126  // *just* went through the effort to align the data iterator.
127  if (LLVM_UNLIKELY(isUnaligned(dataIt))) {
128  return emitError("expected data iterator aligned to ", alignment,
129  ", but got pointer: '0x" +
130  llvm::utohexstr((uintptr_t)dataIt) + "'");
131  }
132 
133  return success();
134  }
135 
136  /// Emit an error using the given arguments.
137  template <typename... Args>
138  InFlightDiagnostic emitError(Args &&...args) const {
139  return ::emitError(fileLoc).append(std::forward<Args>(args)...);
140  }
141  InFlightDiagnostic emitError() const { return ::emitError(fileLoc); }
142 
143  /// Parse a single byte from the stream.
144  template <typename T>
145  LogicalResult parseByte(T &value) {
146  if (empty())
147  return emitError("attempting to parse a byte at the end of the bytecode");
148  value = static_cast<T>(*dataIt++);
149  return success();
150  }
151  /// Parse a range of bytes of 'length' into the given result.
152  LogicalResult parseBytes(size_t length, ArrayRef<uint8_t> &result) {
153  if (length > size()) {
154  return emitError("attempting to parse ", length, " bytes when only ",
155  size(), " remain");
156  }
157  result = {dataIt, length};
158  dataIt += length;
159  return success();
160  }
161  /// Parse a range of bytes of 'length' into the given result, which can be
162  /// assumed to be large enough to hold `length`.
163  LogicalResult parseBytes(size_t length, uint8_t *result) {
164  if (length > size()) {
165  return emitError("attempting to parse ", length, " bytes when only ",
166  size(), " remain");
167  }
168  memcpy(result, dataIt, length);
169  dataIt += length;
170  return success();
171  }
172 
173  /// Parse an aligned blob of data, where the alignment was encoded alongside
174  /// the data.
175  LogicalResult parseBlobAndAlignment(ArrayRef<uint8_t> &data,
176  uint64_t &alignment) {
177  uint64_t dataSize;
178  if (failed(parseVarInt(alignment)) || failed(parseVarInt(dataSize)) ||
179  failed(alignTo(alignment)))
180  return failure();
181  return parseBytes(dataSize, data);
182  }
183 
184  /// Parse a variable length encoded integer from the byte stream. The first
185  /// encoded byte contains a prefix in the low bits indicating the encoded
186  /// length of the value. This length prefix is a bit sequence of '0's followed
187  /// by a '1'. The number of '0' bits indicate the number of _additional_ bytes
188  /// (not including the prefix byte). All remaining bits in the first byte,
189  /// along with all of the bits in additional bytes, provide the value of the
190  /// integer encoded in little-endian order.
191  LogicalResult parseVarInt(uint64_t &result) {
192  // Parse the first byte of the encoding, which contains the length prefix.
193  if (failed(parseByte(result)))
194  return failure();
195 
196  // Handle the overwhelmingly common case where the value is stored in a
197  // single byte. In this case, the first bit is the `1` marker bit.
198  if (LLVM_LIKELY(result & 1)) {
199  result >>= 1;
200  return success();
201  }
202 
203  // Handle the overwhelming uncommon case where the value required all 8
204  // bytes (i.e. a really really big number). In this case, the marker byte is
205  // all zeros: `00000000`.
206  if (LLVM_UNLIKELY(result == 0)) {
207  llvm::support::ulittle64_t resultLE;
208  if (failed(parseBytes(sizeof(resultLE),
209  reinterpret_cast<uint8_t *>(&resultLE))))
210  return failure();
211  result = resultLE;
212  return success();
213  }
214  return parseMultiByteVarInt(result);
215  }
216 
217  /// Parse a signed variable length encoded integer from the byte stream. A
218  /// signed varint is encoded as a normal varint with zigzag encoding applied,
219  /// i.e. the low bit of the value is used to indicate the sign.
220  LogicalResult parseSignedVarInt(uint64_t &result) {
221  if (failed(parseVarInt(result)))
222  return failure();
223  // Essentially (but using unsigned): (x >> 1) ^ -(x & 1)
224  result = (result >> 1) ^ (~(result & 1) + 1);
225  return success();
226  }
227 
228  /// Parse a variable length encoded integer whose low bit is used to encode an
229  /// unrelated flag, i.e: `(integerValue << 1) | (flag ? 1 : 0)`.
230  LogicalResult parseVarIntWithFlag(uint64_t &result, bool &flag) {
231  if (failed(parseVarInt(result)))
232  return failure();
233  flag = result & 1;
234  result >>= 1;
235  return success();
236  }
237 
238  /// Skip the first `length` bytes within the reader.
239  LogicalResult skipBytes(size_t length) {
240  if (length > size()) {
241  return emitError("attempting to skip ", length, " bytes when only ",
242  size(), " remain");
243  }
244  dataIt += length;
245  return success();
246  }
247 
248  /// Parse a null-terminated string into `result` (without including the NUL
249  /// terminator).
250  LogicalResult parseNullTerminatedString(StringRef &result) {
251  const char *startIt = (const char *)dataIt;
252  const char *nulIt = (const char *)memchr(startIt, 0, size());
253  if (!nulIt)
254  return emitError(
255  "malformed null-terminated string, no null character found");
256 
257  result = StringRef(startIt, nulIt - startIt);
258  dataIt = (const uint8_t *)nulIt + 1;
259  return success();
260  }
261 
262  /// Parse a section header, placing the kind of section in `sectionID` and the
263  /// contents of the section in `sectionData`.
264  LogicalResult parseSection(bytecode::Section::ID &sectionID,
265  ArrayRef<uint8_t> &sectionData) {
266  uint8_t sectionIDAndHasAlignment;
267  uint64_t length;
268  if (failed(parseByte(sectionIDAndHasAlignment)) ||
269  failed(parseVarInt(length)))
270  return failure();
271 
272  // Extract the section ID and whether the section is aligned. The high bit
273  // of the ID is the alignment flag.
274  sectionID = static_cast<bytecode::Section::ID>(sectionIDAndHasAlignment &
275  0b01111111);
276  bool hasAlignment = sectionIDAndHasAlignment & 0b10000000;
277 
278  // Check that the section is actually valid before trying to process its
279  // data.
280  if (sectionID >= bytecode::Section::kNumSections)
281  return emitError("invalid section ID: ", unsigned(sectionID));
282 
283  // Process the section alignment if present.
284  if (hasAlignment) {
285  uint64_t alignment;
286  if (failed(parseVarInt(alignment)) || failed(alignTo(alignment)))
287  return failure();
288  }
289 
290  // Parse the actual section data.
291  return parseBytes(static_cast<size_t>(length), sectionData);
292  }
293 
294  Location getLoc() const { return fileLoc; }
295 
296 private:
297  /// Parse a variable length encoded integer from the byte stream. This method
298  /// is a fallback when the number of bytes used to encode the value is greater
299  /// than 1, but less than the max (9). The provided `result` value can be
300  /// assumed to already contain the first byte of the value.
301  /// NOTE: This method is marked noinline to avoid pessimizing the common case
302  /// of single byte encoding.
303  LLVM_ATTRIBUTE_NOINLINE LogicalResult parseMultiByteVarInt(uint64_t &result) {
304  // Count the number of trailing zeros in the marker byte, this indicates the
305  // number of trailing bytes that are part of the value. We use `uint32_t`
306  // here because we only care about the first byte, and so that be actually
307  // get ctz intrinsic calls when possible (the `uint8_t` overload uses a loop
308  // implementation).
309  uint32_t numBytes = llvm::countr_zero<uint32_t>(result);
310  assert(numBytes > 0 && numBytes <= 7 &&
311  "unexpected number of trailing zeros in varint encoding");
312 
313  // Parse in the remaining bytes of the value.
314  llvm::support::ulittle64_t resultLE(result);
315  if (failed(
316  parseBytes(numBytes, reinterpret_cast<uint8_t *>(&resultLE) + 1)))
317  return failure();
318 
319  // Shift out the low-order bits that were used to mark how the value was
320  // encoded.
321  result = resultLE >> (numBytes + 1);
322  return success();
323  }
324 
325  /// The bytecode buffer.
326  ArrayRef<uint8_t> buffer;
327 
328  /// The current iterator within the 'buffer'.
329  const uint8_t *dataIt;
330 
331  /// A location for the bytecode used to report errors.
332  Location fileLoc;
333 };
334 } // namespace
335 
336 /// Resolve an index into the given entry list. `entry` may either be a
337 /// reference, in which case it is assigned to the corresponding value in
338 /// `entries`, or a pointer, in which case it is assigned to the address of the
339 /// element in `entries`.
340 template <typename RangeT, typename T>
341 static LogicalResult resolveEntry(EncodingReader &reader, RangeT &entries,
342  uint64_t index, T &entry,
343  StringRef entryStr) {
344  if (index >= entries.size())
345  return reader.emitError("invalid ", entryStr, " index: ", index);
346 
347  // If the provided entry is a pointer, resolve to the address of the entry.
348  if constexpr (std::is_convertible_v<llvm::detail::ValueOfRange<RangeT>, T>)
349  entry = entries[index];
350  else
351  entry = &entries[index];
352  return success();
353 }
354 
355 /// Parse and resolve an index into the given entry list.
356 template <typename RangeT, typename T>
357 static LogicalResult parseEntry(EncodingReader &reader, RangeT &entries,
358  T &entry, StringRef entryStr) {
359  uint64_t entryIdx;
360  if (failed(reader.parseVarInt(entryIdx)))
361  return failure();
362  return resolveEntry(reader, entries, entryIdx, entry, entryStr);
363 }
364 
365 //===----------------------------------------------------------------------===//
366 // StringSectionReader
367 //===----------------------------------------------------------------------===//
368 
369 namespace {
370 /// This class is used to read references to the string section from the
371 /// bytecode.
372 class StringSectionReader {
373 public:
374  /// Initialize the string section reader with the given section data.
375  LogicalResult initialize(Location fileLoc, ArrayRef<uint8_t> sectionData);
376 
377  /// Parse a shared string from the string section. The shared string is
378  /// encoded using an index to a corresponding string in the string section.
379  LogicalResult parseString(EncodingReader &reader, StringRef &result) {
380  return parseEntry(reader, strings, result, "string");
381  }
382 
383  /// Parse a shared string from the string section. The shared string is
384  /// encoded using an index to a corresponding string in the string section.
385  /// This variant parses a flag compressed with the index.
386  LogicalResult parseStringWithFlag(EncodingReader &reader, StringRef &result,
387  bool &flag) {
388  uint64_t entryIdx;
389  if (failed(reader.parseVarIntWithFlag(entryIdx, flag)))
390  return failure();
391  return parseStringAtIndex(reader, entryIdx, result);
392  }
393 
394  /// Parse a shared string from the string section. The shared string is
395  /// encoded using an index to a corresponding string in the string section.
396  LogicalResult parseStringAtIndex(EncodingReader &reader, uint64_t index,
397  StringRef &result) {
398  return resolveEntry(reader, strings, index, result, "string");
399  }
400 
401 private:
402  /// The table of strings referenced within the bytecode file.
403  SmallVector<StringRef> strings;
404 };
405 } // namespace
406 
407 LogicalResult StringSectionReader::initialize(Location fileLoc,
408  ArrayRef<uint8_t> sectionData) {
409  EncodingReader stringReader(sectionData, fileLoc);
410 
411  // Parse the number of strings in the section.
412  uint64_t numStrings;
413  if (failed(stringReader.parseVarInt(numStrings)))
414  return failure();
415  strings.resize(numStrings);
416 
417  // Parse each of the strings. The sizes of the strings are encoded in reverse
418  // order, so that's the order we populate the table.
419  size_t stringDataEndOffset = sectionData.size();
420  for (StringRef &string : llvm::reverse(strings)) {
421  uint64_t stringSize;
422  if (failed(stringReader.parseVarInt(stringSize)))
423  return failure();
424  if (stringDataEndOffset < stringSize) {
425  return stringReader.emitError(
426  "string size exceeds the available data size");
427  }
428 
429  // Extract the string from the data, dropping the null character.
430  size_t stringOffset = stringDataEndOffset - stringSize;
431  string = StringRef(
432  reinterpret_cast<const char *>(sectionData.data() + stringOffset),
433  stringSize - 1);
434  stringDataEndOffset = stringOffset;
435  }
436 
437  // Check that the only remaining data was for the strings, i.e. the reader
438  // should be at the same offset as the first string.
439  if ((sectionData.size() - stringReader.size()) != stringDataEndOffset) {
440  return stringReader.emitError("unexpected trailing data between the "
441  "offsets for strings and their data");
442  }
443  return success();
444 }
445 
446 //===----------------------------------------------------------------------===//
447 // BytecodeDialect
448 //===----------------------------------------------------------------------===//
449 
450 namespace {
451 class DialectReader;
452 
453 /// This struct represents a dialect entry within the bytecode.
454 struct BytecodeDialect {
455  /// Load the dialect into the provided context if it hasn't been loaded yet.
456  /// Returns failure if the dialect couldn't be loaded *and* the provided
457  /// context does not allow unregistered dialects. The provided reader is used
458  /// for error emission if necessary.
459  LogicalResult load(const DialectReader &reader, MLIRContext *ctx);
460 
461  /// Return the loaded dialect, or nullptr if the dialect is unknown. This can
462  /// only be called after `load`.
463  Dialect *getLoadedDialect() const {
464  assert(dialect &&
465  "expected `load` to be invoked before `getLoadedDialect`");
466  return *dialect;
467  }
468 
469  /// The loaded dialect entry. This field is std::nullopt if we haven't
470  /// attempted to load, nullptr if we failed to load, otherwise the loaded
471  /// dialect.
472  std::optional<Dialect *> dialect;
473 
474  /// The bytecode interface of the dialect, or nullptr if the dialect does not
475  /// implement the bytecode interface. This field should only be checked if the
476  /// `dialect` field is not std::nullopt.
477  const BytecodeDialectInterface *interface = nullptr;
478 
479  /// The name of the dialect.
480  StringRef name;
481 
482  /// A buffer containing the encoding of the dialect version parsed.
483  ArrayRef<uint8_t> versionBuffer;
484 
485  /// Lazy loaded dialect version from the handle above.
486  std::unique_ptr<DialectVersion> loadedVersion;
487 };
488 
489 /// This struct represents an operation name entry within the bytecode.
490 struct BytecodeOperationName {
491  BytecodeOperationName(BytecodeDialect *dialect, StringRef name,
492  std::optional<bool> wasRegistered)
493  : dialect(dialect), name(name), wasRegistered(wasRegistered) {}
494 
495  /// The loaded operation name, or std::nullopt if it hasn't been processed
496  /// yet.
497  std::optional<OperationName> opName;
498 
499  /// The dialect that owns this operation name.
500  BytecodeDialect *dialect;
501 
502  /// The name of the operation, without the dialect prefix.
503  StringRef name;
504 
505  /// Whether this operation was registered when the bytecode was produced.
506  /// This flag is populated when bytecode version >=kNativePropertiesEncoding.
507  std::optional<bool> wasRegistered;
508 };
509 } // namespace
510 
511 /// Parse a single dialect group encoded in the byte stream.
513  EncodingReader &reader,
514  MutableArrayRef<std::unique_ptr<BytecodeDialect>> dialects,
515  function_ref<LogicalResult(BytecodeDialect *)> entryCallback) {
516  // Parse the dialect and the number of entries in the group.
517  std::unique_ptr<BytecodeDialect> *dialect;
518  if (failed(parseEntry(reader, dialects, dialect, "dialect")))
519  return failure();
520  uint64_t numEntries;
521  if (failed(reader.parseVarInt(numEntries)))
522  return failure();
523 
524  for (uint64_t i = 0; i < numEntries; ++i)
525  if (failed(entryCallback(dialect->get())))
526  return failure();
527  return success();
528 }
529 
530 //===----------------------------------------------------------------------===//
531 // ResourceSectionReader
532 //===----------------------------------------------------------------------===//
533 
534 namespace {
535 /// This class is used to read the resource section from the bytecode.
536 class ResourceSectionReader {
537 public:
538  /// Initialize the resource section reader with the given section data.
540  initialize(Location fileLoc, const ParserConfig &config,
541  MutableArrayRef<std::unique_ptr<BytecodeDialect>> dialects,
542  StringSectionReader &stringReader, ArrayRef<uint8_t> sectionData,
543  ArrayRef<uint8_t> offsetSectionData, DialectReader &dialectReader,
544  const std::shared_ptr<llvm::SourceMgr> &bufferOwnerRef);
545 
546  /// Parse a dialect resource handle from the resource section.
547  LogicalResult parseResourceHandle(EncodingReader &reader,
548  AsmDialectResourceHandle &result) {
549  return parseEntry(reader, dialectResources, result, "resource handle");
550  }
551 
552 private:
553  /// The table of dialect resources within the bytecode file.
554  SmallVector<AsmDialectResourceHandle> dialectResources;
555  llvm::StringMap<std::string> dialectResourceHandleRenamingMap;
556 };
557 
558 class ParsedResourceEntry : public AsmParsedResourceEntry {
559 public:
560  ParsedResourceEntry(StringRef key, AsmResourceEntryKind kind,
561  EncodingReader &reader, StringSectionReader &stringReader,
562  const std::shared_ptr<llvm::SourceMgr> &bufferOwnerRef)
563  : key(key), kind(kind), reader(reader), stringReader(stringReader),
564  bufferOwnerRef(bufferOwnerRef) {}
565  ~ParsedResourceEntry() override = default;
566 
567  StringRef getKey() const final { return key; }
568 
569  InFlightDiagnostic emitError() const final { return reader.emitError(); }
570 
571  AsmResourceEntryKind getKind() const final { return kind; }
572 
573  FailureOr<bool> parseAsBool() const final {
574  if (kind != AsmResourceEntryKind::Bool)
575  return emitError() << "expected a bool resource entry, but found a "
576  << toString(kind) << " entry instead";
577 
578  bool value;
579  if (failed(reader.parseByte(value)))
580  return failure();
581  return value;
582  }
583  FailureOr<std::string> parseAsString() const final {
584  if (kind != AsmResourceEntryKind::String)
585  return emitError() << "expected a string resource entry, but found a "
586  << toString(kind) << " entry instead";
587 
588  StringRef string;
589  if (failed(stringReader.parseString(reader, string)))
590  return failure();
591  return string.str();
592  }
593 
595  parseAsBlob(BlobAllocatorFn allocator) const final {
596  if (kind != AsmResourceEntryKind::Blob)
597  return emitError() << "expected a blob resource entry, but found a "
598  << toString(kind) << " entry instead";
599 
600  ArrayRef<uint8_t> data;
601  uint64_t alignment;
602  if (failed(reader.parseBlobAndAlignment(data, alignment)))
603  return failure();
604 
605  // If we have an extendable reference to the buffer owner, we don't need to
606  // allocate a new buffer for the data, and can use the data directly.
607  if (bufferOwnerRef) {
608  ArrayRef<char> charData(reinterpret_cast<const char *>(data.data()),
609  data.size());
610 
611  // Allocate an unmanager buffer which captures a reference to the owner.
612  // For now we just mark this as immutable, but in the future we should
613  // explore marking this as mutable when desired.
615  charData, alignment,
616  [bufferOwnerRef = bufferOwnerRef](void *, size_t, size_t) {});
617  }
618 
619  // Allocate memory for the blob using the provided allocator and copy the
620  // data into it.
621  AsmResourceBlob blob = allocator(data.size(), alignment);
622  assert(llvm::isAddrAligned(llvm::Align(alignment), blob.getData().data()) &&
623  blob.isMutable() &&
624  "blob allocator did not return a properly aligned address");
625  memcpy(blob.getMutableData().data(), data.data(), data.size());
626  return blob;
627  }
628 
629 private:
630  StringRef key;
632  EncodingReader &reader;
633  StringSectionReader &stringReader;
634  const std::shared_ptr<llvm::SourceMgr> &bufferOwnerRef;
635 };
636 } // namespace
637 
638 template <typename T>
639 static LogicalResult
640 parseResourceGroup(Location fileLoc, bool allowEmpty,
641  EncodingReader &offsetReader, EncodingReader &resourceReader,
642  StringSectionReader &stringReader, T *handler,
643  const std::shared_ptr<llvm::SourceMgr> &bufferOwnerRef,
644  function_ref<StringRef(StringRef)> remapKey = {},
645  function_ref<LogicalResult(StringRef)> processKeyFn = {}) {
646  uint64_t numResources;
647  if (failed(offsetReader.parseVarInt(numResources)))
648  return failure();
649 
650  for (uint64_t i = 0; i < numResources; ++i) {
651  StringRef key;
653  uint64_t resourceOffset;
654  ArrayRef<uint8_t> data;
655  if (failed(stringReader.parseString(offsetReader, key)) ||
656  failed(offsetReader.parseVarInt(resourceOffset)) ||
657  failed(offsetReader.parseByte(kind)) ||
658  failed(resourceReader.parseBytes(resourceOffset, data)))
659  return failure();
660 
661  // Process the resource key.
662  if ((processKeyFn && failed(processKeyFn(key))))
663  return failure();
664 
665  // If the resource data is empty and we allow it, don't error out when
666  // parsing below, just skip it.
667  if (allowEmpty && data.empty())
668  continue;
669 
670  // Ignore the entry if we don't have a valid handler.
671  if (!handler)
672  continue;
673 
674  // Otherwise, parse the resource value.
675  EncodingReader entryReader(data, fileLoc);
676  key = remapKey(key);
677  ParsedResourceEntry entry(key, kind, entryReader, stringReader,
678  bufferOwnerRef);
679  if (failed(handler->parseResource(entry)))
680  return failure();
681  if (!entryReader.empty()) {
682  return entryReader.emitError(
683  "unexpected trailing bytes in resource entry '", key, "'");
684  }
685  }
686  return success();
687 }
688 
689 LogicalResult ResourceSectionReader::initialize(
690  Location fileLoc, const ParserConfig &config,
691  MutableArrayRef<std::unique_ptr<BytecodeDialect>> dialects,
692  StringSectionReader &stringReader, ArrayRef<uint8_t> sectionData,
693  ArrayRef<uint8_t> offsetSectionData, DialectReader &dialectReader,
694  const std::shared_ptr<llvm::SourceMgr> &bufferOwnerRef) {
695  EncodingReader resourceReader(sectionData, fileLoc);
696  EncodingReader offsetReader(offsetSectionData, fileLoc);
697 
698  // Read the number of external resource providers.
699  uint64_t numExternalResourceGroups;
700  if (failed(offsetReader.parseVarInt(numExternalResourceGroups)))
701  return failure();
702 
703  // Utility functor that dispatches to `parseResourceGroup`, but implicitly
704  // provides most of the arguments.
705  auto parseGroup = [&](auto *handler, bool allowEmpty = false,
706  function_ref<LogicalResult(StringRef)> keyFn = {}) {
707  auto resolveKey = [&](StringRef key) -> StringRef {
708  auto it = dialectResourceHandleRenamingMap.find(key);
709  if (it == dialectResourceHandleRenamingMap.end())
710  return "";
711  return it->second;
712  };
713 
714  return parseResourceGroup(fileLoc, allowEmpty, offsetReader, resourceReader,
715  stringReader, handler, bufferOwnerRef, resolveKey,
716  keyFn);
717  };
718 
719  // Read the external resources from the bytecode.
720  for (uint64_t i = 0; i < numExternalResourceGroups; ++i) {
721  StringRef key;
722  if (failed(stringReader.parseString(offsetReader, key)))
723  return failure();
724 
725  // Get the handler for these resources.
726  // TODO: Should we require handling external resources in some scenarios?
727  AsmResourceParser *handler = config.getResourceParser(key);
728  if (!handler) {
729  emitWarning(fileLoc) << "ignoring unknown external resources for '" << key
730  << "'";
731  }
732 
733  if (failed(parseGroup(handler)))
734  return failure();
735  }
736 
737  // Read the dialect resources from the bytecode.
738  MLIRContext *ctx = fileLoc->getContext();
739  while (!offsetReader.empty()) {
740  std::unique_ptr<BytecodeDialect> *dialect;
741  if (failed(parseEntry(offsetReader, dialects, dialect, "dialect")) ||
742  failed((*dialect)->load(dialectReader, ctx)))
743  return failure();
744  Dialect *loadedDialect = (*dialect)->getLoadedDialect();
745  if (!loadedDialect) {
746  return resourceReader.emitError()
747  << "dialect '" << (*dialect)->name << "' is unknown";
748  }
749  const auto *handler = dyn_cast<OpAsmDialectInterface>(loadedDialect);
750  if (!handler) {
751  return resourceReader.emitError()
752  << "unexpected resources for dialect '" << (*dialect)->name << "'";
753  }
754 
755  // Ensure that each resource is declared before being processed.
756  auto processResourceKeyFn = [&](StringRef key) -> LogicalResult {
758  handler->declareResource(key);
759  if (failed(handle)) {
760  return resourceReader.emitError()
761  << "unknown 'resource' key '" << key << "' for dialect '"
762  << (*dialect)->name << "'";
763  }
764  dialectResourceHandleRenamingMap[key] = handler->getResourceKey(*handle);
765  dialectResources.push_back(*handle);
766  return success();
767  };
768 
769  // Parse the resources for this dialect. We allow empty resources because we
770  // just treat these as declarations.
771  if (failed(parseGroup(handler, /*allowEmpty=*/true, processResourceKeyFn)))
772  return failure();
773  }
774 
775  return success();
776 }
777 
778 //===----------------------------------------------------------------------===//
779 // Attribute/Type Reader
780 //===----------------------------------------------------------------------===//
781 
782 namespace {
783 /// This class provides support for reading attribute and type entries from the
784 /// bytecode. Attribute and Type entries are read lazily on demand, so we use
785 /// this reader to manage when to actually parse them from the bytecode.
786 class AttrTypeReader {
787  /// This class represents a single attribute or type entry.
788  template <typename T>
789  struct Entry {
790  /// The entry, or null if it hasn't been resolved yet.
791  T entry = {};
792  /// The parent dialect of this entry.
793  BytecodeDialect *dialect = nullptr;
794  /// A flag indicating if the entry was encoded using a custom encoding,
795  /// instead of using the textual assembly format.
796  bool hasCustomEncoding = false;
797  /// The raw data of this entry in the bytecode.
798  ArrayRef<uint8_t> data;
799  };
800  using AttrEntry = Entry<Attribute>;
801  using TypeEntry = Entry<Type>;
802 
803 public:
804  AttrTypeReader(StringSectionReader &stringReader,
805  ResourceSectionReader &resourceReader,
806  const llvm::StringMap<BytecodeDialect *> &dialectsMap,
807  uint64_t &bytecodeVersion, Location fileLoc,
808  const ParserConfig &config)
809  : stringReader(stringReader), resourceReader(resourceReader),
810  dialectsMap(dialectsMap), fileLoc(fileLoc),
811  bytecodeVersion(bytecodeVersion), parserConfig(config) {}
812 
813  /// Initialize the attribute and type information within the reader.
815  initialize(MutableArrayRef<std::unique_ptr<BytecodeDialect>> dialects,
816  ArrayRef<uint8_t> sectionData,
817  ArrayRef<uint8_t> offsetSectionData);
818 
819  /// Resolve the attribute or type at the given index. Returns nullptr on
820  /// failure.
821  Attribute resolveAttribute(size_t index) {
822  return resolveEntry(attributes, index, "Attribute");
823  }
824  Type resolveType(size_t index) { return resolveEntry(types, index, "Type"); }
825 
826  /// Parse a reference to an attribute or type using the given reader.
827  LogicalResult parseAttribute(EncodingReader &reader, Attribute &result) {
828  uint64_t attrIdx;
829  if (failed(reader.parseVarInt(attrIdx)))
830  return failure();
831  result = resolveAttribute(attrIdx);
832  return success(!!result);
833  }
834  LogicalResult parseOptionalAttribute(EncodingReader &reader,
835  Attribute &result) {
836  uint64_t attrIdx;
837  bool flag;
838  if (failed(reader.parseVarIntWithFlag(attrIdx, flag)))
839  return failure();
840  if (!flag)
841  return success();
842  result = resolveAttribute(attrIdx);
843  return success(!!result);
844  }
845 
846  LogicalResult parseType(EncodingReader &reader, Type &result) {
847  uint64_t typeIdx;
848  if (failed(reader.parseVarInt(typeIdx)))
849  return failure();
850  result = resolveType(typeIdx);
851  return success(!!result);
852  }
853 
854  template <typename T>
855  LogicalResult parseAttribute(EncodingReader &reader, T &result) {
856  Attribute baseResult;
857  if (failed(parseAttribute(reader, baseResult)))
858  return failure();
859  if ((result = dyn_cast<T>(baseResult)))
860  return success();
861  return reader.emitError("expected attribute of type: ",
862  llvm::getTypeName<T>(), ", but got: ", baseResult);
863  }
864 
865 private:
866  /// Resolve the given entry at `index`.
867  template <typename T>
868  T resolveEntry(SmallVectorImpl<Entry<T>> &entries, size_t index,
869  StringRef entryType);
870 
871  /// Parse an entry using the given reader that was encoded using the textual
872  /// assembly format.
873  template <typename T>
874  LogicalResult parseAsmEntry(T &result, EncodingReader &reader,
875  StringRef entryType);
876 
877  /// Parse an entry using the given reader that was encoded using a custom
878  /// bytecode format.
879  template <typename T>
880  LogicalResult parseCustomEntry(Entry<T> &entry, EncodingReader &reader,
881  StringRef entryType);
882 
883  /// The string section reader used to resolve string references when parsing
884  /// custom encoded attribute/type entries.
885  StringSectionReader &stringReader;
886 
887  /// The resource section reader used to resolve resource references when
888  /// parsing custom encoded attribute/type entries.
889  ResourceSectionReader &resourceReader;
890 
891  /// The map of the loaded dialects used to retrieve dialect information, such
892  /// as the dialect version.
893  const llvm::StringMap<BytecodeDialect *> &dialectsMap;
894 
895  /// The set of attribute and type entries.
896  SmallVector<AttrEntry> attributes;
898 
899  /// A location used for error emission.
900  Location fileLoc;
901 
902  /// Current bytecode version being used.
903  uint64_t &bytecodeVersion;
904 
905  /// Reference to the parser configuration.
906  const ParserConfig &parserConfig;
907 };
908 
909 class DialectReader : public DialectBytecodeReader {
910 public:
911  DialectReader(AttrTypeReader &attrTypeReader,
912  StringSectionReader &stringReader,
913  ResourceSectionReader &resourceReader,
914  const llvm::StringMap<BytecodeDialect *> &dialectsMap,
915  EncodingReader &reader, uint64_t &bytecodeVersion)
916  : attrTypeReader(attrTypeReader), stringReader(stringReader),
917  resourceReader(resourceReader), dialectsMap(dialectsMap),
918  reader(reader), bytecodeVersion(bytecodeVersion) {}
919 
920  InFlightDiagnostic emitError(const Twine &msg) const override {
921  return reader.emitError(msg);
922  }
923 
925  getDialectVersion(StringRef dialectName) const override {
926  // First check if the dialect is available in the map.
927  auto dialectEntry = dialectsMap.find(dialectName);
928  if (dialectEntry == dialectsMap.end())
929  return failure();
930  // If the dialect was found, try to load it. This will trigger reading the
931  // bytecode version from the version buffer if it wasn't already processed.
932  // Return failure if either of those two actions could not be completed.
933  if (failed(dialectEntry->getValue()->load(*this, getLoc().getContext())) ||
934  dialectEntry->getValue()->loadedVersion == nullptr)
935  return failure();
936  return dialectEntry->getValue()->loadedVersion.get();
937  }
938 
939  MLIRContext *getContext() const override { return getLoc().getContext(); }
940 
941  uint64_t getBytecodeVersion() const override { return bytecodeVersion; }
942 
943  DialectReader withEncodingReader(EncodingReader &encReader) const {
944  return DialectReader(attrTypeReader, stringReader, resourceReader,
945  dialectsMap, encReader, bytecodeVersion);
946  }
947 
948  Location getLoc() const { return reader.getLoc(); }
949 
950  //===--------------------------------------------------------------------===//
951  // IR
952  //===--------------------------------------------------------------------===//
953 
954  LogicalResult readAttribute(Attribute &result) override {
955  return attrTypeReader.parseAttribute(reader, result);
956  }
957  LogicalResult readOptionalAttribute(Attribute &result) override {
958  return attrTypeReader.parseOptionalAttribute(reader, result);
959  }
960  LogicalResult readType(Type &result) override {
961  return attrTypeReader.parseType(reader, result);
962  }
963 
966  if (failed(resourceReader.parseResourceHandle(reader, handle)))
967  return failure();
968  return handle;
969  }
970 
971  //===--------------------------------------------------------------------===//
972  // Primitives
973  //===--------------------------------------------------------------------===//
974 
975  LogicalResult readVarInt(uint64_t &result) override {
976  return reader.parseVarInt(result);
977  }
978 
979  LogicalResult readSignedVarInt(int64_t &result) override {
980  uint64_t unsignedResult;
981  if (failed(reader.parseSignedVarInt(unsignedResult)))
982  return failure();
983  result = static_cast<int64_t>(unsignedResult);
984  return success();
985  }
986 
987  FailureOr<APInt> readAPIntWithKnownWidth(unsigned bitWidth) override {
988  // Small values are encoded using a single byte.
989  if (bitWidth <= 8) {
990  uint8_t value;
991  if (failed(reader.parseByte(value)))
992  return failure();
993  return APInt(bitWidth, value);
994  }
995 
996  // Large values up to 64 bits are encoded using a single varint.
997  if (bitWidth <= 64) {
998  uint64_t value;
999  if (failed(reader.parseSignedVarInt(value)))
1000  return failure();
1001  return APInt(bitWidth, value);
1002  }
1003 
1004  // Otherwise, for really big values we encode the array of active words in
1005  // the value.
1006  uint64_t numActiveWords;
1007  if (failed(reader.parseVarInt(numActiveWords)))
1008  return failure();
1009  SmallVector<uint64_t, 4> words(numActiveWords);
1010  for (uint64_t i = 0; i < numActiveWords; ++i)
1011  if (failed(reader.parseSignedVarInt(words[i])))
1012  return failure();
1013  return APInt(bitWidth, words);
1014  }
1015 
1017  readAPFloatWithKnownSemantics(const llvm::fltSemantics &semantics) override {
1018  FailureOr<APInt> intVal =
1019  readAPIntWithKnownWidth(APFloat::getSizeInBits(semantics));
1020  if (failed(intVal))
1021  return failure();
1022  return APFloat(semantics, *intVal);
1023  }
1024 
1025  LogicalResult readString(StringRef &result) override {
1026  return stringReader.parseString(reader, result);
1027  }
1028 
1029  LogicalResult readBlob(ArrayRef<char> &result) override {
1030  uint64_t dataSize;
1031  ArrayRef<uint8_t> data;
1032  if (failed(reader.parseVarInt(dataSize)) ||
1033  failed(reader.parseBytes(dataSize, data)))
1034  return failure();
1035  result = llvm::ArrayRef(reinterpret_cast<const char *>(data.data()),
1036  data.size());
1037  return success();
1038  }
1039 
1040  LogicalResult readBool(bool &result) override {
1041  return reader.parseByte(result);
1042  }
1043 
1044 private:
1045  AttrTypeReader &attrTypeReader;
1046  StringSectionReader &stringReader;
1047  ResourceSectionReader &resourceReader;
1048  const llvm::StringMap<BytecodeDialect *> &dialectsMap;
1049  EncodingReader &reader;
1050  uint64_t &bytecodeVersion;
1051 };
1052 
1053 /// Wraps the properties section and handles reading properties out of it.
1054 class PropertiesSectionReader {
1055 public:
1056  /// Initialize the properties section reader with the given section data.
1057  LogicalResult initialize(Location fileLoc, ArrayRef<uint8_t> sectionData) {
1058  if (sectionData.empty())
1059  return success();
1060  EncodingReader propReader(sectionData, fileLoc);
1061  uint64_t count;
1062  if (failed(propReader.parseVarInt(count)))
1063  return failure();
1064  // Parse the raw properties buffer.
1065  if (failed(propReader.parseBytes(propReader.size(), propertiesBuffers)))
1066  return failure();
1067 
1068  EncodingReader offsetsReader(propertiesBuffers, fileLoc);
1069  offsetTable.reserve(count);
1070  for (auto idx : llvm::seq<int64_t>(0, count)) {
1071  (void)idx;
1072  offsetTable.push_back(propertiesBuffers.size() - offsetsReader.size());
1073  ArrayRef<uint8_t> rawProperties;
1074  uint64_t dataSize;
1075  if (failed(offsetsReader.parseVarInt(dataSize)) ||
1076  failed(offsetsReader.parseBytes(dataSize, rawProperties)))
1077  return failure();
1078  }
1079  if (!offsetsReader.empty())
1080  return offsetsReader.emitError()
1081  << "Broken properties section: didn't exhaust the offsets table";
1082  return success();
1083  }
1084 
1085  LogicalResult read(Location fileLoc, DialectReader &dialectReader,
1086  OperationName *opName, OperationState &opState) {
1087  uint64_t propertiesIdx;
1088  if (failed(dialectReader.readVarInt(propertiesIdx)))
1089  return failure();
1090  if (propertiesIdx >= offsetTable.size())
1091  return dialectReader.emitError("Properties idx out-of-bound for ")
1092  << opName->getStringRef();
1093  size_t propertiesOffset = offsetTable[propertiesIdx];
1094  if (propertiesIdx >= propertiesBuffers.size())
1095  return dialectReader.emitError("Properties offset out-of-bound for ")
1096  << opName->getStringRef();
1097 
1098  // Acquire the sub-buffer that represent the requested properties.
1099  ArrayRef<char> rawProperties;
1100  {
1101  // "Seek" to the requested offset by getting a new reader with the right
1102  // sub-buffer.
1103  EncodingReader reader(propertiesBuffers.drop_front(propertiesOffset),
1104  fileLoc);
1105  // Properties are stored as a sequence of {size + raw_data}.
1106  if (failed(
1107  dialectReader.withEncodingReader(reader).readBlob(rawProperties)))
1108  return failure();
1109  }
1110  // Setup a new reader to read from the `rawProperties` sub-buffer.
1111  EncodingReader reader(
1112  StringRef(rawProperties.begin(), rawProperties.size()), fileLoc);
1113  DialectReader propReader = dialectReader.withEncodingReader(reader);
1114 
1115  auto *iface = opName->getInterface<BytecodeOpInterface>();
1116  if (iface)
1117  return iface->readProperties(propReader, opState);
1118  if (opName->isRegistered())
1119  return propReader.emitError(
1120  "has properties but missing BytecodeOpInterface for ")
1121  << opName->getStringRef();
1122  // Unregistered op are storing properties as an attribute.
1123  return propReader.readAttribute(opState.propertiesAttr);
1124  }
1125 
1126 private:
1127  /// The properties buffer referenced within the bytecode file.
1128  ArrayRef<uint8_t> propertiesBuffers;
1129 
1130  /// Table of offset in the buffer above.
1131  SmallVector<int64_t> offsetTable;
1132 };
1133 } // namespace
1134 
1135 LogicalResult AttrTypeReader::initialize(
1136  MutableArrayRef<std::unique_ptr<BytecodeDialect>> dialects,
1137  ArrayRef<uint8_t> sectionData, ArrayRef<uint8_t> offsetSectionData) {
1138  EncodingReader offsetReader(offsetSectionData, fileLoc);
1139 
1140  // Parse the number of attribute and type entries.
1141  uint64_t numAttributes, numTypes;
1142  if (failed(offsetReader.parseVarInt(numAttributes)) ||
1143  failed(offsetReader.parseVarInt(numTypes)))
1144  return failure();
1145  attributes.resize(numAttributes);
1146  types.resize(numTypes);
1147 
1148  // A functor used to accumulate the offsets for the entries in the given
1149  // range.
1150  uint64_t currentOffset = 0;
1151  auto parseEntries = [&](auto &&range) {
1152  size_t currentIndex = 0, endIndex = range.size();
1153 
1154  // Parse an individual entry.
1155  auto parseEntryFn = [&](BytecodeDialect *dialect) -> LogicalResult {
1156  auto &entry = range[currentIndex++];
1157 
1158  uint64_t entrySize;
1159  if (failed(offsetReader.parseVarIntWithFlag(entrySize,
1160  entry.hasCustomEncoding)))
1161  return failure();
1162 
1163  // Verify that the offset is actually valid.
1164  if (currentOffset + entrySize > sectionData.size()) {
1165  return offsetReader.emitError(
1166  "Attribute or Type entry offset points past the end of section");
1167  }
1168 
1169  entry.data = sectionData.slice(currentOffset, entrySize);
1170  entry.dialect = dialect;
1171  currentOffset += entrySize;
1172  return success();
1173  };
1174  while (currentIndex != endIndex)
1175  if (failed(parseDialectGrouping(offsetReader, dialects, parseEntryFn)))
1176  return failure();
1177  return success();
1178  };
1179 
1180  // Process each of the attributes, and then the types.
1181  if (failed(parseEntries(attributes)) || failed(parseEntries(types)))
1182  return failure();
1183 
1184  // Ensure that we read everything from the section.
1185  if (!offsetReader.empty()) {
1186  return offsetReader.emitError(
1187  "unexpected trailing data in the Attribute/Type offset section");
1188  }
1189 
1190  return success();
1191 }
1192 
1193 template <typename T>
1194 T AttrTypeReader::resolveEntry(SmallVectorImpl<Entry<T>> &entries, size_t index,
1195  StringRef entryType) {
1196  if (index >= entries.size()) {
1197  emitError(fileLoc) << "invalid " << entryType << " index: " << index;
1198  return {};
1199  }
1200 
1201  // If the entry has already been resolved, there is nothing left to do.
1202  Entry<T> &entry = entries[index];
1203  if (entry.entry)
1204  return entry.entry;
1205 
1206  // Parse the entry.
1207  EncodingReader reader(entry.data, fileLoc);
1208 
1209  // Parse based on how the entry was encoded.
1210  if (entry.hasCustomEncoding) {
1211  if (failed(parseCustomEntry(entry, reader, entryType)))
1212  return T();
1213  } else if (failed(parseAsmEntry(entry.entry, reader, entryType))) {
1214  return T();
1215  }
1216 
1217  if (!reader.empty()) {
1218  reader.emitError("unexpected trailing bytes after " + entryType + " entry");
1219  return T();
1220  }
1221  return entry.entry;
1222 }
1223 
1224 template <typename T>
1225 LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader,
1226  StringRef entryType) {
1227  StringRef asmStr;
1228  if (failed(reader.parseNullTerminatedString(asmStr)))
1229  return failure();
1230 
1231  // Invoke the MLIR assembly parser to parse the entry text.
1232  size_t numRead = 0;
1233  MLIRContext *context = fileLoc->getContext();
1234  if constexpr (std::is_same_v<T, Type>)
1235  result =
1236  ::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true);
1237  else
1238  result = ::parseAttribute(asmStr, context, Type(), &numRead,
1239  /*isKnownNullTerminated=*/true);
1240  if (!result)
1241  return failure();
1242 
1243  // Ensure there weren't dangling characters after the entry.
1244  if (numRead != asmStr.size()) {
1245  return reader.emitError("trailing characters found after ", entryType,
1246  " assembly format: ", asmStr.drop_front(numRead));
1247  }
1248  return success();
1249 }
1250 
1251 template <typename T>
1252 LogicalResult AttrTypeReader::parseCustomEntry(Entry<T> &entry,
1253  EncodingReader &reader,
1254  StringRef entryType) {
1255  DialectReader dialectReader(*this, stringReader, resourceReader, dialectsMap,
1256  reader, bytecodeVersion);
1257  if (failed(entry.dialect->load(dialectReader, fileLoc.getContext())))
1258  return failure();
1259 
1260  if constexpr (std::is_same_v<T, Type>) {
1261  // Try parsing with callbacks first if available.
1262  for (const auto &callback :
1263  parserConfig.getBytecodeReaderConfig().getTypeCallbacks()) {
1264  if (failed(
1265  callback->read(dialectReader, entry.dialect->name, entry.entry)))
1266  return failure();
1267  // Early return if parsing was successful.
1268  if (!!entry.entry)
1269  return success();
1270 
1271  // Reset the reader if we failed to parse, so we can fall through the
1272  // other parsing functions.
1273  reader = EncodingReader(entry.data, reader.getLoc());
1274  }
1275  } else {
1276  // Try parsing with callbacks first if available.
1277  for (const auto &callback :
1278  parserConfig.getBytecodeReaderConfig().getAttributeCallbacks()) {
1279  if (failed(
1280  callback->read(dialectReader, entry.dialect->name, entry.entry)))
1281  return failure();
1282  // Early return if parsing was successful.
1283  if (!!entry.entry)
1284  return success();
1285 
1286  // Reset the reader if we failed to parse, so we can fall through the
1287  // other parsing functions.
1288  reader = EncodingReader(entry.data, reader.getLoc());
1289  }
1290  }
1291 
1292  // Ensure that the dialect implements the bytecode interface.
1293  if (!entry.dialect->interface) {
1294  return reader.emitError("dialect '", entry.dialect->name,
1295  "' does not implement the bytecode interface");
1296  }
1297 
1298  if constexpr (std::is_same_v<T, Type>)
1299  entry.entry = entry.dialect->interface->readType(dialectReader);
1300  else
1301  entry.entry = entry.dialect->interface->readAttribute(dialectReader);
1302 
1303  return success(!!entry.entry);
1304 }
1305 
1306 //===----------------------------------------------------------------------===//
1307 // Bytecode Reader
1308 //===----------------------------------------------------------------------===//
1309 
1310 /// This class is used to read a bytecode buffer and translate it into MLIR.
1312  struct RegionReadState;
1313  using LazyLoadableOpsInfo =
1314  std::list<std::pair<Operation *, RegionReadState>>;
1315  using LazyLoadableOpsMap =
1317 
1318 public:
1319  Impl(Location fileLoc, const ParserConfig &config, bool lazyLoading,
1320  llvm::MemoryBufferRef buffer,
1321  const std::shared_ptr<llvm::SourceMgr> &bufferOwnerRef)
1322  : config(config), fileLoc(fileLoc), lazyLoading(lazyLoading),
1323  attrTypeReader(stringReader, resourceReader, dialectsMap, version,
1324  fileLoc, config),
1325  // Use the builtin unrealized conversion cast operation to represent
1326  // forward references to values that aren't yet defined.
1327  forwardRefOpState(UnknownLoc::get(config.getContext()),
1328  "builtin.unrealized_conversion_cast", ValueRange(),
1329  NoneType::get(config.getContext())),
1330  buffer(buffer), bufferOwnerRef(bufferOwnerRef) {}
1331 
1332  /// Read the bytecode defined within `buffer` into the given block.
1333  LogicalResult read(Block *block,
1334  llvm::function_ref<bool(Operation *)> lazyOps);
1335 
1336  /// Return the number of ops that haven't been materialized yet.
1337  int64_t getNumOpsToMaterialize() const { return lazyLoadableOpsMap.size(); }
1338 
1339  bool isMaterializable(Operation *op) { return lazyLoadableOpsMap.count(op); }
1340 
1341  /// Materialize the provided operation, invoke the lazyOpsCallback on every
1342  /// newly found lazy operation.
1345  llvm::function_ref<bool(Operation *)> lazyOpsCallback) {
1346  this->lazyOpsCallback = lazyOpsCallback;
1347  auto resetlazyOpsCallback =
1348  llvm::make_scope_exit([&] { this->lazyOpsCallback = nullptr; });
1349  auto it = lazyLoadableOpsMap.find(op);
1350  assert(it != lazyLoadableOpsMap.end() &&
1351  "materialize called on non-materializable op");
1352  return materialize(it);
1353  }
1354 
1355  /// Materialize all operations.
1357  while (!lazyLoadableOpsMap.empty()) {
1358  if (failed(materialize(lazyLoadableOpsMap.begin())))
1359  return failure();
1360  }
1361  return success();
1362  }
1363 
1364  /// Finalize the lazy-loading by calling back with every op that hasn't been
1365  /// materialized to let the client decide if the op should be deleted or
1366  /// materialized. The op is materialized if the callback returns true, deleted
1367  /// otherwise.
1368  LogicalResult finalize(function_ref<bool(Operation *)> shouldMaterialize) {
1369  while (!lazyLoadableOps.empty()) {
1370  Operation *op = lazyLoadableOps.begin()->first;
1371  if (shouldMaterialize(op)) {
1372  if (failed(materialize(lazyLoadableOpsMap.find(op))))
1373  return failure();
1374  continue;
1375  }
1376  op->dropAllReferences();
1377  op->erase();
1378  lazyLoadableOps.pop_front();
1379  lazyLoadableOpsMap.erase(op);
1380  }
1381  return success();
1382  }
1383 
1384 private:
1385  LogicalResult materialize(LazyLoadableOpsMap::iterator it) {
1386  assert(it != lazyLoadableOpsMap.end() &&
1387  "materialize called on non-materializable op");
1388  valueScopes.emplace_back();
1389  std::vector<RegionReadState> regionStack;
1390  regionStack.push_back(std::move(it->getSecond()->second));
1391  lazyLoadableOps.erase(it->getSecond());
1392  lazyLoadableOpsMap.erase(it);
1393 
1394  while (!regionStack.empty())
1395  if (failed(parseRegions(regionStack, regionStack.back())))
1396  return failure();
1397  return success();
1398  }
1399 
1400  /// Return the context for this config.
1401  MLIRContext *getContext() const { return config.getContext(); }
1402 
1403  /// Parse the bytecode version.
1404  LogicalResult parseVersion(EncodingReader &reader);
1405 
1406  //===--------------------------------------------------------------------===//
1407  // Dialect Section
1408 
1409  LogicalResult parseDialectSection(ArrayRef<uint8_t> sectionData);
1410 
1411  /// Parse an operation name reference using the given reader, and set the
1412  /// `wasRegistered` flag that indicates if the bytecode was produced by a
1413  /// context where opName was registered.
1414  FailureOr<OperationName> parseOpName(EncodingReader &reader,
1415  std::optional<bool> &wasRegistered);
1416 
1417  //===--------------------------------------------------------------------===//
1418  // Attribute/Type Section
1419 
1420  /// Parse an attribute or type using the given reader.
1421  template <typename T>
1422  LogicalResult parseAttribute(EncodingReader &reader, T &result) {
1423  return attrTypeReader.parseAttribute(reader, result);
1424  }
1425  LogicalResult parseType(EncodingReader &reader, Type &result) {
1426  return attrTypeReader.parseType(reader, result);
1427  }
1428 
1429  //===--------------------------------------------------------------------===//
1430  // Resource Section
1431 
1433  parseResourceSection(EncodingReader &reader,
1434  std::optional<ArrayRef<uint8_t>> resourceData,
1435  std::optional<ArrayRef<uint8_t>> resourceOffsetData);
1436 
1437  //===--------------------------------------------------------------------===//
1438  // IR Section
1439 
1440  /// This struct represents the current read state of a range of regions. This
1441  /// struct is used to enable iterative parsing of regions.
1442  struct RegionReadState {
1443  RegionReadState(Operation *op, EncodingReader *reader,
1444  bool isIsolatedFromAbove)
1445  : RegionReadState(op->getRegions(), reader, isIsolatedFromAbove) {}
1446  RegionReadState(MutableArrayRef<Region> regions, EncodingReader *reader,
1447  bool isIsolatedFromAbove)
1448  : curRegion(regions.begin()), endRegion(regions.end()), reader(reader),
1449  isIsolatedFromAbove(isIsolatedFromAbove) {}
1450 
1451  /// The current regions being read.
1452  MutableArrayRef<Region>::iterator curRegion, endRegion;
1453  /// This is the reader to use for this region, this pointer is pointing to
1454  /// the parent region reader unless the current region is IsolatedFromAbove,
1455  /// in which case the pointer is pointing to the `owningReader` which is a
1456  /// section dedicated to the current region.
1457  EncodingReader *reader;
1458  std::unique_ptr<EncodingReader> owningReader;
1459 
1460  /// The number of values defined immediately within this region.
1461  unsigned numValues = 0;
1462 
1463  /// The current blocks of the region being read.
1464  SmallVector<Block *> curBlocks;
1465  Region::iterator curBlock = {};
1466 
1467  /// The number of operations remaining to be read from the current block
1468  /// being read.
1469  uint64_t numOpsRemaining = 0;
1470 
1471  /// A flag indicating if the regions being read are isolated from above.
1472  bool isIsolatedFromAbove = false;
1473  };
1474 
1475  LogicalResult parseIRSection(ArrayRef<uint8_t> sectionData, Block *block);
1476  LogicalResult parseRegions(std::vector<RegionReadState> &regionStack,
1477  RegionReadState &readState);
1478  FailureOr<Operation *> parseOpWithoutRegions(EncodingReader &reader,
1479  RegionReadState &readState,
1480  bool &isIsolatedFromAbove);
1481 
1482  LogicalResult parseRegion(RegionReadState &readState);
1483  LogicalResult parseBlockHeader(EncodingReader &reader,
1484  RegionReadState &readState);
1485  LogicalResult parseBlockArguments(EncodingReader &reader, Block *block);
1486 
1487  //===--------------------------------------------------------------------===//
1488  // Value Processing
1489 
1490  /// Parse an operand reference using the given reader. Returns nullptr in the
1491  /// case of failure.
1492  Value parseOperand(EncodingReader &reader);
1493 
1494  /// Sequentially define the given value range.
1495  LogicalResult defineValues(EncodingReader &reader, ValueRange values);
1496 
1497  /// Create a value to use for a forward reference.
1498  Value createForwardRef();
1499 
1500  //===--------------------------------------------------------------------===//
1501  // Use-list order helpers
1502 
1503  /// This struct is a simple storage that contains information required to
1504  /// reorder the use-list of a value with respect to the pre-order traversal
1505  /// ordering.
1506  struct UseListOrderStorage {
1507  UseListOrderStorage(bool isIndexPairEncoding,
1508  SmallVector<unsigned, 4> &&indices)
1509  : indices(std::move(indices)),
1510  isIndexPairEncoding(isIndexPairEncoding){};
1511  /// The vector containing the information required to reorder the
1512  /// use-list of a value.
1513  SmallVector<unsigned, 4> indices;
1514 
1515  /// Whether indices represent a pair of type `(src, dst)` or it is a direct
1516  /// indexing, such as `dst = order[src]`.
1517  bool isIndexPairEncoding;
1518  };
1519 
1520  /// Parse use-list order from bytecode for a range of values if available. The
1521  /// range is expected to be either a block argument or an op result range. On
1522  /// success, return a map of the position in the range and the use-list order
1523  /// encoding. The function assumes to know the size of the range it is
1524  /// processing.
1525  using UseListMapT = DenseMap<unsigned, UseListOrderStorage>;
1526  FailureOr<UseListMapT> parseUseListOrderForRange(EncodingReader &reader,
1527  uint64_t rangeSize);
1528 
1529  /// Shuffle the use-chain according to the order parsed.
1530  LogicalResult sortUseListOrder(Value value);
1531 
1532  /// Recursively visit all the values defined within topLevelOp and sort the
1533  /// use-list orders according to the indices parsed.
1534  LogicalResult processUseLists(Operation *topLevelOp);
1535 
1536  //===--------------------------------------------------------------------===//
1537  // Fields
1538 
1539  /// This class represents a single value scope, in which a value scope is
1540  /// delimited by isolated from above regions.
1541  struct ValueScope {
1542  /// Push a new region state onto this scope, reserving enough values for
1543  /// those defined within the current region of the provided state.
1544  void push(RegionReadState &readState) {
1545  nextValueIDs.push_back(values.size());
1546  values.resize(values.size() + readState.numValues);
1547  }
1548 
1549  /// Pop the values defined for the current region within the provided region
1550  /// state.
1551  void pop(RegionReadState &readState) {
1552  values.resize(values.size() - readState.numValues);
1553  nextValueIDs.pop_back();
1554  }
1555 
1556  /// The set of values defined in this scope.
1557  std::vector<Value> values;
1558 
1559  /// The ID for the next defined value for each region current being
1560  /// processed in this scope.
1561  SmallVector<unsigned, 4> nextValueIDs;
1562  };
1563 
1564  /// The configuration of the parser.
1565  const ParserConfig &config;
1566 
1567  /// A location to use when emitting errors.
1568  Location fileLoc;
1569 
1570  /// Flag that indicates if lazyloading is enabled.
1571  bool lazyLoading;
1572 
1573  /// Keep track of operations that have been lazy loaded (their regions haven't
1574  /// been materialized), along with the `RegionReadState` that allows to
1575  /// lazy-load the regions nested under the operation.
1576  LazyLoadableOpsInfo lazyLoadableOps;
1577  LazyLoadableOpsMap lazyLoadableOpsMap;
1578  llvm::function_ref<bool(Operation *)> lazyOpsCallback;
1579 
1580  /// The reader used to process attribute and types within the bytecode.
1581  AttrTypeReader attrTypeReader;
1582 
1583  /// The version of the bytecode being read.
1584  uint64_t version = 0;
1585 
1586  /// The producer of the bytecode being read.
1587  StringRef producer;
1588 
1589  /// The table of IR units referenced within the bytecode file.
1591  llvm::StringMap<BytecodeDialect *> dialectsMap;
1593 
1594  /// The reader used to process resources within the bytecode.
1595  ResourceSectionReader resourceReader;
1596 
1597  /// Worklist of values with custom use-list orders to process before the end
1598  /// of the parsing.
1599  DenseMap<void *, UseListOrderStorage> valueToUseListMap;
1600 
1601  /// The table of strings referenced within the bytecode file.
1602  StringSectionReader stringReader;
1603 
1604  /// The table of properties referenced by the operation in the bytecode file.
1605  PropertiesSectionReader propertiesReader;
1606 
1607  /// The current set of available IR value scopes.
1608  std::vector<ValueScope> valueScopes;
1609 
1610  /// The global pre-order operation ordering.
1611  DenseMap<Operation *, unsigned> operationIDs;
1612 
1613  /// A block containing the set of operations defined to create forward
1614  /// references.
1615  Block forwardRefOps;
1616 
1617  /// A block containing previously created, and no longer used, forward
1618  /// reference operations.
1619  Block openForwardRefOps;
1620 
1621  /// An operation state used when instantiating forward references.
1622  OperationState forwardRefOpState;
1623 
1624  /// Reference to the input buffer.
1625  llvm::MemoryBufferRef buffer;
1626 
1627  /// The optional owning source manager, which when present may be used to
1628  /// extend the lifetime of the input buffer.
1629  const std::shared_ptr<llvm::SourceMgr> &bufferOwnerRef;
1630 };
1631 
1633  Block *block, llvm::function_ref<bool(Operation *)> lazyOpsCallback) {
1634  EncodingReader reader(buffer.getBuffer(), fileLoc);
1635  this->lazyOpsCallback = lazyOpsCallback;
1636  auto resetlazyOpsCallback =
1637  llvm::make_scope_exit([&] { this->lazyOpsCallback = nullptr; });
1638 
1639  // Skip over the bytecode header, this should have already been checked.
1640  if (failed(reader.skipBytes(StringRef("ML\xefR").size())))
1641  return failure();
1642  // Parse the bytecode version and producer.
1643  if (failed(parseVersion(reader)) ||
1644  failed(reader.parseNullTerminatedString(producer)))
1645  return failure();
1646 
1647  // Add a diagnostic handler that attaches a note that includes the original
1648  // producer of the bytecode.
1649  ScopedDiagnosticHandler diagHandler(getContext(), [&](Diagnostic &diag) {
1650  diag.attachNote() << "in bytecode version " << version
1651  << " produced by: " << producer;
1652  return failure();
1653  });
1654 
1655  // Parse the raw data for each of the top-level sections of the bytecode.
1656  std::optional<ArrayRef<uint8_t>>
1657  sectionDatas[bytecode::Section::kNumSections];
1658  while (!reader.empty()) {
1659  // Read the next section from the bytecode.
1660  bytecode::Section::ID sectionID;
1661  ArrayRef<uint8_t> sectionData;
1662  if (failed(reader.parseSection(sectionID, sectionData)))
1663  return failure();
1664 
1665  // Check for duplicate sections, we only expect one instance of each.
1666  if (sectionDatas[sectionID]) {
1667  return reader.emitError("duplicate top-level section: ",
1668  ::toString(sectionID));
1669  }
1670  sectionDatas[sectionID] = sectionData;
1671  }
1672  // Check that all of the required sections were found.
1673  for (int i = 0; i < bytecode::Section::kNumSections; ++i) {
1674  bytecode::Section::ID sectionID = static_cast<bytecode::Section::ID>(i);
1675  if (!sectionDatas[i] && !isSectionOptional(sectionID, version)) {
1676  return reader.emitError("missing data for top-level section: ",
1677  ::toString(sectionID));
1678  }
1679  }
1680 
1681  // Process the string section first.
1682  if (failed(stringReader.initialize(
1683  fileLoc, *sectionDatas[bytecode::Section::kString])))
1684  return failure();
1685 
1686  // Process the properties section.
1687  if (sectionDatas[bytecode::Section::kProperties] &&
1688  failed(propertiesReader.initialize(
1689  fileLoc, *sectionDatas[bytecode::Section::kProperties])))
1690  return failure();
1691 
1692  // Process the dialect section.
1693  if (failed(parseDialectSection(*sectionDatas[bytecode::Section::kDialect])))
1694  return failure();
1695 
1696  // Process the resource section if present.
1697  if (failed(parseResourceSection(
1698  reader, sectionDatas[bytecode::Section::kResource],
1699  sectionDatas[bytecode::Section::kResourceOffset])))
1700  return failure();
1701 
1702  // Process the attribute and type section.
1703  if (failed(attrTypeReader.initialize(
1704  dialects, *sectionDatas[bytecode::Section::kAttrType],
1705  *sectionDatas[bytecode::Section::kAttrTypeOffset])))
1706  return failure();
1707 
1708  // Finally, process the IR section.
1709  return parseIRSection(*sectionDatas[bytecode::Section::kIR], block);
1710 }
1711 
1712 LogicalResult BytecodeReader::Impl::parseVersion(EncodingReader &reader) {
1713  if (failed(reader.parseVarInt(version)))
1714  return failure();
1715 
1716  // Validate the bytecode version.
1717  uint64_t currentVersion = bytecode::kVersion;
1718  uint64_t minSupportedVersion = bytecode::kMinSupportedVersion;
1719  if (version < minSupportedVersion) {
1720  return reader.emitError("bytecode version ", version,
1721  " is older than the current version of ",
1722  currentVersion, ", and upgrade is not supported");
1723  }
1724  if (version > currentVersion) {
1725  return reader.emitError("bytecode version ", version,
1726  " is newer than the current version ",
1727  currentVersion);
1728  }
1729  // Override any request to lazy-load if the bytecode version is too old.
1730  if (version < bytecode::kLazyLoading)
1731  lazyLoading = false;
1732  return success();
1733 }
1734 
1735 //===----------------------------------------------------------------------===//
1736 // Dialect Section
1737 
1738 LogicalResult BytecodeDialect::load(const DialectReader &reader,
1739  MLIRContext *ctx) {
1740  if (dialect)
1741  return success();
1742  Dialect *loadedDialect = ctx->getOrLoadDialect(name);
1743  if (!loadedDialect && !ctx->allowsUnregisteredDialects()) {
1744  return reader.emitError("dialect '")
1745  << name
1746  << "' is unknown. If this is intended, please call "
1747  "allowUnregisteredDialects() on the MLIRContext, or use "
1748  "-allow-unregistered-dialect with the MLIR tool used.";
1749  }
1750  dialect = loadedDialect;
1751 
1752  // If the dialect was actually loaded, check to see if it has a bytecode
1753  // interface.
1754  if (loadedDialect)
1755  interface = dyn_cast<BytecodeDialectInterface>(loadedDialect);
1756  if (!versionBuffer.empty()) {
1757  if (!interface)
1758  return reader.emitError("dialect '")
1759  << name
1760  << "' does not implement the bytecode interface, "
1761  "but found a version entry";
1762  EncodingReader encReader(versionBuffer, reader.getLoc());
1763  DialectReader versionReader = reader.withEncodingReader(encReader);
1764  loadedVersion = interface->readVersion(versionReader);
1765  if (!loadedVersion)
1766  return failure();
1767  }
1768  return success();
1769 }
1770 
1772 BytecodeReader::Impl::parseDialectSection(ArrayRef<uint8_t> sectionData) {
1773  EncodingReader sectionReader(sectionData, fileLoc);
1774 
1775  // Parse the number of dialects in the section.
1776  uint64_t numDialects;
1777  if (failed(sectionReader.parseVarInt(numDialects)))
1778  return failure();
1779  dialects.resize(numDialects);
1780 
1781  // Parse each of the dialects.
1782  for (uint64_t i = 0; i < numDialects; ++i) {
1783  dialects[i] = std::make_unique<BytecodeDialect>();
1784  /// Before version kDialectVersioning, there wasn't any versioning available
1785  /// for dialects, and the entryIdx represent the string itself.
1786  if (version < bytecode::kDialectVersioning) {
1787  if (failed(stringReader.parseString(sectionReader, dialects[i]->name)))
1788  return failure();
1789  continue;
1790  }
1791 
1792  // Parse ID representing dialect and version.
1793  uint64_t dialectNameIdx;
1794  bool versionAvailable;
1795  if (failed(sectionReader.parseVarIntWithFlag(dialectNameIdx,
1796  versionAvailable)))
1797  return failure();
1798  if (failed(stringReader.parseStringAtIndex(sectionReader, dialectNameIdx,
1799  dialects[i]->name)))
1800  return failure();
1801  if (versionAvailable) {
1802  bytecode::Section::ID sectionID;
1803  if (failed(sectionReader.parseSection(sectionID,
1804  dialects[i]->versionBuffer)))
1805  return failure();
1806  if (sectionID != bytecode::Section::kDialectVersions) {
1807  emitError(fileLoc, "expected dialect version section");
1808  return failure();
1809  }
1810  }
1811  dialectsMap[dialects[i]->name] = dialects[i].get();
1812  }
1813 
1814  // Parse the operation names, which are grouped by dialect.
1815  auto parseOpName = [&](BytecodeDialect *dialect) {
1816  StringRef opName;
1817  std::optional<bool> wasRegistered;
1818  // Prior to version kNativePropertiesEncoding, the information about wheter
1819  // an op was registered or not wasn't encoded.
1820  if (version < bytecode::kNativePropertiesEncoding) {
1821  if (failed(stringReader.parseString(sectionReader, opName)))
1822  return failure();
1823  } else {
1824  bool wasRegisteredFlag;
1825  if (failed(stringReader.parseStringWithFlag(sectionReader, opName,
1826  wasRegisteredFlag)))
1827  return failure();
1828  wasRegistered = wasRegisteredFlag;
1829  }
1830  opNames.emplace_back(dialect, opName, wasRegistered);
1831  return success();
1832  };
1833  // Avoid re-allocation in bytecode version >=kElideUnknownBlockArgLocation
1834  // where the number of ops are known.
1835  if (version >= bytecode::kElideUnknownBlockArgLocation) {
1836  uint64_t numOps;
1837  if (failed(sectionReader.parseVarInt(numOps)))
1838  return failure();
1839  opNames.reserve(numOps);
1840  }
1841  while (!sectionReader.empty())
1842  if (failed(parseDialectGrouping(sectionReader, dialects, parseOpName)))
1843  return failure();
1844  return success();
1845 }
1846 
1848 BytecodeReader::Impl::parseOpName(EncodingReader &reader,
1849  std::optional<bool> &wasRegistered) {
1850  BytecodeOperationName *opName = nullptr;
1851  if (failed(parseEntry(reader, opNames, opName, "operation name")))
1852  return failure();
1853  wasRegistered = opName->wasRegistered;
1854  // Check to see if this operation name has already been resolved. If we
1855  // haven't, load the dialect and build the operation name.
1856  if (!opName->opName) {
1857  // Load the dialect and its version.
1858  DialectReader dialectReader(attrTypeReader, stringReader, resourceReader,
1859  dialectsMap, reader, version);
1860  if (failed(opName->dialect->load(dialectReader, getContext())))
1861  return failure();
1862  // If the opName is empty, this is because we use to accept names such as
1863  // `foo` without any `.` separator. We shouldn't tolerate this in textual
1864  // format anymore but for now we'll be backward compatible. This can only
1865  // happen with unregistered dialects.
1866  if (opName->name.empty()) {
1867  if (opName->dialect->getLoadedDialect())
1868  return emitError(fileLoc) << "has an empty opname for dialect '"
1869  << opName->dialect->name << "'\n";
1870 
1871  opName->opName.emplace(opName->dialect->name, getContext());
1872  } else {
1873  opName->opName.emplace((opName->dialect->name + "." + opName->name).str(),
1874  getContext());
1875  }
1876  }
1877  return *opName->opName;
1878 }
1879 
1880 //===----------------------------------------------------------------------===//
1881 // Resource Section
1882 
1883 LogicalResult BytecodeReader::Impl::parseResourceSection(
1884  EncodingReader &reader, std::optional<ArrayRef<uint8_t>> resourceData,
1885  std::optional<ArrayRef<uint8_t>> resourceOffsetData) {
1886  // Ensure both sections are either present or not.
1887  if (resourceData.has_value() != resourceOffsetData.has_value()) {
1888  if (resourceOffsetData)
1889  return emitError(fileLoc, "unexpected resource offset section when "
1890  "resource section is not present");
1891  return emitError(
1892  fileLoc,
1893  "expected resource offset section when resource section is present");
1894  }
1895 
1896  // If the resource sections are absent, there is nothing to do.
1897  if (!resourceData)
1898  return success();
1899 
1900  // Initialize the resource reader with the resource sections.
1901  DialectReader dialectReader(attrTypeReader, stringReader, resourceReader,
1902  dialectsMap, reader, version);
1903  return resourceReader.initialize(fileLoc, config, dialects, stringReader,
1904  *resourceData, *resourceOffsetData,
1905  dialectReader, bufferOwnerRef);
1906 }
1907 
1908 //===----------------------------------------------------------------------===//
1909 // UseListOrder Helpers
1910 
1912 BytecodeReader::Impl::parseUseListOrderForRange(EncodingReader &reader,
1913  uint64_t numResults) {
1915  uint64_t numValuesToRead = 1;
1916  if (numResults > 1 && failed(reader.parseVarInt(numValuesToRead)))
1917  return failure();
1918 
1919  for (size_t valueIdx = 0; valueIdx < numValuesToRead; valueIdx++) {
1920  uint64_t resultIdx = 0;
1921  if (numResults > 1 && failed(reader.parseVarInt(resultIdx)))
1922  return failure();
1923 
1924  uint64_t numValues;
1925  bool indexPairEncoding;
1926  if (failed(reader.parseVarIntWithFlag(numValues, indexPairEncoding)))
1927  return failure();
1928 
1929  SmallVector<unsigned, 4> useListOrders;
1930  for (size_t idx = 0; idx < numValues; idx++) {
1931  uint64_t index;
1932  if (failed(reader.parseVarInt(index)))
1933  return failure();
1934  useListOrders.push_back(index);
1935  }
1936 
1937  // Store in a map the result index
1938  map.try_emplace(resultIdx, UseListOrderStorage(indexPairEncoding,
1939  std::move(useListOrders)));
1940  }
1941 
1942  return map;
1943 }
1944 
1945 /// Sorts each use according to the order specified in the use-list parsed. If
1946 /// the custom use-list is not found, this means that the order needs to be
1947 /// consistent with the reverse pre-order walk of the IR. If multiple uses lie
1948 /// on the same operation, the order will follow the reverse operand number
1949 /// ordering.
1950 LogicalResult BytecodeReader::Impl::sortUseListOrder(Value value) {
1951  // Early return for trivial use-lists.
1952  if (value.use_empty() || value.hasOneUse())
1953  return success();
1954 
1955  bool hasIncomingOrder =
1956  valueToUseListMap.contains(value.getAsOpaquePointer());
1957 
1958  // Compute the current order of the use-list with respect to the global
1959  // ordering. Detect if the order is already sorted while doing so.
1960  bool alreadySorted = true;
1961  auto &firstUse = *value.use_begin();
1962  uint64_t prevID =
1963  bytecode::getUseID(firstUse, operationIDs.at(firstUse.getOwner()));
1964  llvm::SmallVector<std::pair<unsigned, uint64_t>> currentOrder = {{0, prevID}};
1965  for (auto item : llvm::drop_begin(llvm::enumerate(value.getUses()))) {
1966  uint64_t currentID = bytecode::getUseID(
1967  item.value(), operationIDs.at(item.value().getOwner()));
1968  alreadySorted &= prevID > currentID;
1969  currentOrder.push_back({item.index(), currentID});
1970  prevID = currentID;
1971  }
1972 
1973  // If the order is already sorted, and there wasn't a custom order to apply
1974  // from the bytecode file, we are done.
1975  if (alreadySorted && !hasIncomingOrder)
1976  return success();
1977 
1978  // If not already sorted, sort the indices of the current order by descending
1979  // useIDs.
1980  if (!alreadySorted)
1981  std::sort(
1982  currentOrder.begin(), currentOrder.end(),
1983  [](auto elem1, auto elem2) { return elem1.second > elem2.second; });
1984 
1985  if (!hasIncomingOrder) {
1986  // If the bytecode file did not contain any custom use-list order, it means
1987  // that the order was descending useID. Hence, shuffle by the first index
1988  // of the `currentOrder` pair.
1990  llvm::map_range(currentOrder, [&](auto item) { return item.first; }));
1991  value.shuffleUseList(shuffle);
1992  return success();
1993  }
1994 
1995  // Pull the custom order info from the map.
1996  UseListOrderStorage customOrder =
1997  valueToUseListMap.at(value.getAsOpaquePointer());
1998  SmallVector<unsigned, 4> shuffle = std::move(customOrder.indices);
1999  uint64_t numUses =
2000  std::distance(value.getUses().begin(), value.getUses().end());
2001 
2002  // If the encoding was a pair of indices `(src, dst)` for every permutation,
2003  // reconstruct the shuffle vector for every use. Initialize the shuffle vector
2004  // as identity, and then apply the mapping encoded in the indices.
2005  if (customOrder.isIndexPairEncoding) {
2006  // Return failure if the number of indices was not representing pairs.
2007  if (shuffle.size() & 1)
2008  return failure();
2009 
2010  SmallVector<unsigned, 4> newShuffle(numUses);
2011  size_t idx = 0;
2012  std::iota(newShuffle.begin(), newShuffle.end(), idx);
2013  for (idx = 0; idx < shuffle.size(); idx += 2)
2014  newShuffle[shuffle[idx]] = shuffle[idx + 1];
2015 
2016  shuffle = std::move(newShuffle);
2017  }
2018 
2019  // Make sure that the indices represent a valid mapping. That is, the sum of
2020  // all the values needs to be equal to (numUses - 1) * numUses / 2, and no
2021  // duplicates are allowed in the list.
2022  DenseSet<unsigned> set;
2023  uint64_t accumulator = 0;
2024  for (const auto &elem : shuffle) {
2025  if (set.contains(elem))
2026  return failure();
2027  accumulator += elem;
2028  set.insert(elem);
2029  }
2030  if (numUses != shuffle.size() ||
2031  accumulator != (((numUses - 1) * numUses) >> 1))
2032  return failure();
2033 
2034  // Apply the current ordering map onto the shuffle vector to get the final
2035  // use-list sorting indices before shuffling.
2036  shuffle = SmallVector<unsigned, 4>(llvm::map_range(
2037  currentOrder, [&](auto item) { return shuffle[item.first]; }));
2038  value.shuffleUseList(shuffle);
2039  return success();
2040 }
2041 
2042 LogicalResult BytecodeReader::Impl::processUseLists(Operation *topLevelOp) {
2043  // Precompute operation IDs according to the pre-order walk of the IR. We
2044  // can't do this while parsing since parseRegions ordering is not strictly
2045  // equal to the pre-order walk.
2046  unsigned operationID = 0;
2047  topLevelOp->walk<mlir::WalkOrder::PreOrder>(
2048  [&](Operation *op) { operationIDs.try_emplace(op, operationID++); });
2049 
2050  auto blockWalk = topLevelOp->walk([this](Block *block) {
2051  for (auto arg : block->getArguments())
2052  if (failed(sortUseListOrder(arg)))
2053  return WalkResult::interrupt();
2054  return WalkResult::advance();
2055  });
2056 
2057  auto resultWalk = topLevelOp->walk([this](Operation *op) {
2058  for (auto result : op->getResults())
2059  if (failed(sortUseListOrder(result)))
2060  return WalkResult::interrupt();
2061  return WalkResult::advance();
2062  });
2063 
2064  return failure(blockWalk.wasInterrupted() || resultWalk.wasInterrupted());
2065 }
2066 
2067 //===----------------------------------------------------------------------===//
2068 // IR Section
2069 
2071 BytecodeReader::Impl::parseIRSection(ArrayRef<uint8_t> sectionData,
2072  Block *block) {
2073  EncodingReader reader(sectionData, fileLoc);
2074 
2075  // A stack of operation regions currently being read from the bytecode.
2076  std::vector<RegionReadState> regionStack;
2077 
2078  // Parse the top-level block using a temporary module operation.
2079  OwningOpRef<ModuleOp> moduleOp = ModuleOp::create(fileLoc);
2080  regionStack.emplace_back(*moduleOp, &reader, /*isIsolatedFromAbove=*/true);
2081  regionStack.back().curBlocks.push_back(moduleOp->getBody());
2082  regionStack.back().curBlock = regionStack.back().curRegion->begin();
2083  if (failed(parseBlockHeader(reader, regionStack.back())))
2084  return failure();
2085  valueScopes.emplace_back();
2086  valueScopes.back().push(regionStack.back());
2087 
2088  // Iteratively parse regions until everything has been resolved.
2089  while (!regionStack.empty())
2090  if (failed(parseRegions(regionStack, regionStack.back())))
2091  return failure();
2092  if (!forwardRefOps.empty()) {
2093  return reader.emitError(
2094  "not all forward unresolved forward operand references");
2095  }
2096 
2097  // Sort use-lists according to what specified in bytecode.
2098  if (failed(processUseLists(*moduleOp)))
2099  return reader.emitError(
2100  "parsed use-list orders were invalid and could not be applied");
2101 
2102  // Resolve dialect version.
2103  for (const std::unique_ptr<BytecodeDialect> &byteCodeDialect : dialects) {
2104  // Parsing is complete, give an opportunity to each dialect to visit the
2105  // IR and perform upgrades.
2106  if (!byteCodeDialect->loadedVersion)
2107  continue;
2108  if (byteCodeDialect->interface &&
2109  failed(byteCodeDialect->interface->upgradeFromVersion(
2110  *moduleOp, *byteCodeDialect->loadedVersion)))
2111  return failure();
2112  }
2113 
2114  // Verify that the parsed operations are valid.
2115  if (config.shouldVerifyAfterParse() && failed(verify(*moduleOp)))
2116  return failure();
2117 
2118  // Splice the parsed operations over to the provided top-level block.
2119  auto &parsedOps = moduleOp->getBody()->getOperations();
2120  auto &destOps = block->getOperations();
2121  destOps.splice(destOps.end(), parsedOps, parsedOps.begin(), parsedOps.end());
2122  return success();
2123 }
2124 
2126 BytecodeReader::Impl::parseRegions(std::vector<RegionReadState> &regionStack,
2127  RegionReadState &readState) {
2128  // Process regions, blocks, and operations until the end or if a nested
2129  // region is encountered. In this case we push a new state in regionStack and
2130  // return, the processing of the current region will resume afterward.
2131  for (; readState.curRegion != readState.endRegion; ++readState.curRegion) {
2132  // If the current block hasn't been setup yet, parse the header for this
2133  // region. The current block is already setup when this function was
2134  // interrupted to recurse down in a nested region and we resume the current
2135  // block after processing the nested region.
2136  if (readState.curBlock == Region::iterator()) {
2137  if (failed(parseRegion(readState)))
2138  return failure();
2139 
2140  // If the region is empty, there is nothing to more to do.
2141  if (readState.curRegion->empty())
2142  continue;
2143  }
2144 
2145  // Parse the blocks within the region.
2146  EncodingReader &reader = *readState.reader;
2147  do {
2148  while (readState.numOpsRemaining--) {
2149  // Read in the next operation. We don't read its regions directly, we
2150  // handle those afterwards as necessary.
2151  bool isIsolatedFromAbove = false;
2153  parseOpWithoutRegions(reader, readState, isIsolatedFromAbove);
2154  if (failed(op))
2155  return failure();
2156 
2157  // If the op has regions, add it to the stack for processing and return:
2158  // we stop the processing of the current region and resume it after the
2159  // inner one is completed. Unless LazyLoading is activated in which case
2160  // nested region parsing is delayed.
2161  if ((*op)->getNumRegions()) {
2162  RegionReadState childState(*op, &reader, isIsolatedFromAbove);
2163 
2164  // Isolated regions are encoded as a section in version 2 and above.
2165  if (version >= bytecode::kLazyLoading && isIsolatedFromAbove) {
2166  bytecode::Section::ID sectionID;
2167  ArrayRef<uint8_t> sectionData;
2168  if (failed(reader.parseSection(sectionID, sectionData)))
2169  return failure();
2170  if (sectionID != bytecode::Section::kIR)
2171  return emitError(fileLoc, "expected IR section for region");
2172  childState.owningReader =
2173  std::make_unique<EncodingReader>(sectionData, fileLoc);
2174  childState.reader = childState.owningReader.get();
2175 
2176  // If the user has a callback set, they have the opportunity to
2177  // control lazyloading as we go.
2178  if (lazyLoading && (!lazyOpsCallback || !lazyOpsCallback(*op))) {
2179  lazyLoadableOps.emplace_back(*op, std::move(childState));
2180  lazyLoadableOpsMap.try_emplace(*op,
2181  std::prev(lazyLoadableOps.end()));
2182  continue;
2183  }
2184  }
2185  regionStack.push_back(std::move(childState));
2186 
2187  // If the op is isolated from above, push a new value scope.
2188  if (isIsolatedFromAbove)
2189  valueScopes.emplace_back();
2190  return success();
2191  }
2192  }
2193 
2194  // Move to the next block of the region.
2195  if (++readState.curBlock == readState.curRegion->end())
2196  break;
2197  if (failed(parseBlockHeader(reader, readState)))
2198  return failure();
2199  } while (true);
2200 
2201  // Reset the current block and any values reserved for this region.
2202  readState.curBlock = {};
2203  valueScopes.back().pop(readState);
2204  }
2205 
2206  // When the regions have been fully parsed, pop them off of the read stack. If
2207  // the regions were isolated from above, we also pop the last value scope.
2208  if (readState.isIsolatedFromAbove) {
2209  assert(!valueScopes.empty() && "Expect a valueScope after reading region");
2210  valueScopes.pop_back();
2211  }
2212  assert(!regionStack.empty() && "Expect a regionStack after reading region");
2213  regionStack.pop_back();
2214  return success();
2215 }
2216 
2218 BytecodeReader::Impl::parseOpWithoutRegions(EncodingReader &reader,
2219  RegionReadState &readState,
2220  bool &isIsolatedFromAbove) {
2221  // Parse the name of the operation.
2222  std::optional<bool> wasRegistered;
2223  FailureOr<OperationName> opName = parseOpName(reader, wasRegistered);
2224  if (failed(opName))
2225  return failure();
2226 
2227  // Parse the operation mask, which indicates which components of the operation
2228  // are present.
2229  uint8_t opMask;
2230  if (failed(reader.parseByte(opMask)))
2231  return failure();
2232 
2233  /// Parse the location.
2234  LocationAttr opLoc;
2235  if (failed(parseAttribute(reader, opLoc)))
2236  return failure();
2237 
2238  // With the location and name resolved, we can start building the operation
2239  // state.
2240  OperationState opState(opLoc, *opName);
2241 
2242  // Parse the attributes of the operation.
2243  if (opMask & bytecode::OpEncodingMask::kHasAttrs) {
2244  DictionaryAttr dictAttr;
2245  if (failed(parseAttribute(reader, dictAttr)))
2246  return failure();
2247  opState.attributes = dictAttr;
2248  }
2249 
2251  // kHasProperties wasn't emitted in older bytecode, we should never get
2252  // there without also having the `wasRegistered` flag available.
2253  if (!wasRegistered)
2254  return emitError(fileLoc,
2255  "Unexpected missing `wasRegistered` opname flag at "
2256  "bytecode version ")
2257  << version << " with properties.";
2258  // When an operation is emitted without being registered, the properties are
2259  // stored as an attribute. Otherwise the op must implement the bytecode
2260  // interface and control the serialization.
2261  if (wasRegistered) {
2262  DialectReader dialectReader(attrTypeReader, stringReader, resourceReader,
2263  dialectsMap, reader, version);
2264  if (failed(
2265  propertiesReader.read(fileLoc, dialectReader, &*opName, opState)))
2266  return failure();
2267  } else {
2268  // If the operation wasn't registered when it was emitted, the properties
2269  // was serialized as an attribute.
2270  if (failed(parseAttribute(reader, opState.propertiesAttr)))
2271  return failure();
2272  }
2273  }
2274 
2275  /// Parse the results of the operation.
2277  uint64_t numResults;
2278  if (failed(reader.parseVarInt(numResults)))
2279  return failure();
2280  opState.types.resize(numResults);
2281  for (int i = 0, e = numResults; i < e; ++i)
2282  if (failed(parseType(reader, opState.types[i])))
2283  return failure();
2284  }
2285 
2286  /// Parse the operands of the operation.
2288  uint64_t numOperands;
2289  if (failed(reader.parseVarInt(numOperands)))
2290  return failure();
2291  opState.operands.resize(numOperands);
2292  for (int i = 0, e = numOperands; i < e; ++i)
2293  if (!(opState.operands[i] = parseOperand(reader)))
2294  return failure();
2295  }
2296 
2297  /// Parse the successors of the operation.
2299  uint64_t numSuccs;
2300  if (failed(reader.parseVarInt(numSuccs)))
2301  return failure();
2302  opState.successors.resize(numSuccs);
2303  for (int i = 0, e = numSuccs; i < e; ++i) {
2304  if (failed(parseEntry(reader, readState.curBlocks, opState.successors[i],
2305  "successor")))
2306  return failure();
2307  }
2308  }
2309 
2310  /// Parse the use-list orders for the results of the operation. Use-list
2311  /// orders are available since version 3 of the bytecode.
2312  std::optional<UseListMapT> resultIdxToUseListMap = std::nullopt;
2313  if (version >= bytecode::kUseListOrdering &&
2315  size_t numResults = opState.types.size();
2316  auto parseResult = parseUseListOrderForRange(reader, numResults);
2317  if (failed(parseResult))
2318  return failure();
2319  resultIdxToUseListMap = std::move(*parseResult);
2320  }
2321 
2322  /// Parse the regions of the operation.
2324  uint64_t numRegions;
2325  if (failed(reader.parseVarIntWithFlag(numRegions, isIsolatedFromAbove)))
2326  return failure();
2327 
2328  opState.regions.reserve(numRegions);
2329  for (int i = 0, e = numRegions; i < e; ++i)
2330  opState.regions.push_back(std::make_unique<Region>());
2331  }
2332 
2333  // Create the operation at the back of the current block.
2334  Operation *op = Operation::create(opState);
2335  readState.curBlock->push_back(op);
2336 
2337  // If the operation had results, update the value references.
2338  if (op->getNumResults() && failed(defineValues(reader, op->getResults())))
2339  return failure();
2340 
2341  /// Store a map for every value that received a custom use-list order from the
2342  /// bytecode file.
2343  if (resultIdxToUseListMap.has_value()) {
2344  for (size_t idx = 0; idx < op->getNumResults(); idx++) {
2345  if (resultIdxToUseListMap->contains(idx)) {
2346  valueToUseListMap.try_emplace(op->getResult(idx).getAsOpaquePointer(),
2347  resultIdxToUseListMap->at(idx));
2348  }
2349  }
2350  }
2351  return op;
2352 }
2353 
2354 LogicalResult BytecodeReader::Impl::parseRegion(RegionReadState &readState) {
2355  EncodingReader &reader = *readState.reader;
2356 
2357  // Parse the number of blocks in the region.
2358  uint64_t numBlocks;
2359  if (failed(reader.parseVarInt(numBlocks)))
2360  return failure();
2361 
2362  // If the region is empty, there is nothing else to do.
2363  if (numBlocks == 0)
2364  return success();
2365 
2366  // Parse the number of values defined in this region.
2367  uint64_t numValues;
2368  if (failed(reader.parseVarInt(numValues)))
2369  return failure();
2370  readState.numValues = numValues;
2371 
2372  // Create the blocks within this region. We do this before processing so that
2373  // we can rely on the blocks existing when creating operations.
2374  readState.curBlocks.clear();
2375  readState.curBlocks.reserve(numBlocks);
2376  for (uint64_t i = 0; i < numBlocks; ++i) {
2377  readState.curBlocks.push_back(new Block());
2378  readState.curRegion->push_back(readState.curBlocks.back());
2379  }
2380 
2381  // Prepare the current value scope for this region.
2382  valueScopes.back().push(readState);
2383 
2384  // Parse the entry block of the region.
2385  readState.curBlock = readState.curRegion->begin();
2386  return parseBlockHeader(reader, readState);
2387 }
2388 
2390 BytecodeReader::Impl::parseBlockHeader(EncodingReader &reader,
2391  RegionReadState &readState) {
2392  bool hasArgs;
2393  if (failed(reader.parseVarIntWithFlag(readState.numOpsRemaining, hasArgs)))
2394  return failure();
2395 
2396  // Parse the arguments of the block.
2397  if (hasArgs && failed(parseBlockArguments(reader, &*readState.curBlock)))
2398  return failure();
2399 
2400  // Uselist orders are available since version 3 of the bytecode.
2401  if (version < bytecode::kUseListOrdering)
2402  return success();
2403 
2404  uint8_t hasUseListOrders = 0;
2405  if (hasArgs && failed(reader.parseByte(hasUseListOrders)))
2406  return failure();
2407 
2408  if (!hasUseListOrders)
2409  return success();
2410 
2411  Block &blk = *readState.curBlock;
2412  auto argIdxToUseListMap =
2413  parseUseListOrderForRange(reader, blk.getNumArguments());
2414  if (failed(argIdxToUseListMap) || argIdxToUseListMap->empty())
2415  return failure();
2416 
2417  for (size_t idx = 0; idx < blk.getNumArguments(); idx++)
2418  if (argIdxToUseListMap->contains(idx))
2419  valueToUseListMap.try_emplace(blk.getArgument(idx).getAsOpaquePointer(),
2420  argIdxToUseListMap->at(idx));
2421 
2422  // We don't parse the operations of the block here, that's done elsewhere.
2423  return success();
2424 }
2425 
2426 LogicalResult BytecodeReader::Impl::parseBlockArguments(EncodingReader &reader,
2427  Block *block) {
2428  // Parse the value ID for the first argument, and the number of arguments.
2429  uint64_t numArgs;
2430  if (failed(reader.parseVarInt(numArgs)))
2431  return failure();
2432 
2433  SmallVector<Type> argTypes;
2434  SmallVector<Location> argLocs;
2435  argTypes.reserve(numArgs);
2436  argLocs.reserve(numArgs);
2437 
2438  Location unknownLoc = UnknownLoc::get(config.getContext());
2439  while (numArgs--) {
2440  Type argType;
2441  LocationAttr argLoc = unknownLoc;
2442  if (version >= bytecode::kElideUnknownBlockArgLocation) {
2443  // Parse the type with hasLoc flag to determine if it has type.
2444  uint64_t typeIdx;
2445  bool hasLoc;
2446  if (failed(reader.parseVarIntWithFlag(typeIdx, hasLoc)) ||
2447  !(argType = attrTypeReader.resolveType(typeIdx)))
2448  return failure();
2449  if (hasLoc && failed(parseAttribute(reader, argLoc)))
2450  return failure();
2451  } else {
2452  // All args has type and location.
2453  if (failed(parseType(reader, argType)) ||
2454  failed(parseAttribute(reader, argLoc)))
2455  return failure();
2456  }
2457  argTypes.push_back(argType);
2458  argLocs.push_back(argLoc);
2459  }
2460  block->addArguments(argTypes, argLocs);
2461  return defineValues(reader, block->getArguments());
2462 }
2463 
2464 //===----------------------------------------------------------------------===//
2465 // Value Processing
2466 
2467 Value BytecodeReader::Impl::parseOperand(EncodingReader &reader) {
2468  std::vector<Value> &values = valueScopes.back().values;
2469  Value *value = nullptr;
2470  if (failed(parseEntry(reader, values, value, "value")))
2471  return Value();
2472 
2473  // Create a new forward reference if necessary.
2474  if (!*value)
2475  *value = createForwardRef();
2476  return *value;
2477 }
2478 
2479 LogicalResult BytecodeReader::Impl::defineValues(EncodingReader &reader,
2480  ValueRange newValues) {
2481  ValueScope &valueScope = valueScopes.back();
2482  std::vector<Value> &values = valueScope.values;
2483 
2484  unsigned &valueID = valueScope.nextValueIDs.back();
2485  unsigned valueIDEnd = valueID + newValues.size();
2486  if (valueIDEnd > values.size()) {
2487  return reader.emitError(
2488  "value index range was outside of the expected range for "
2489  "the parent region, got [",
2490  valueID, ", ", valueIDEnd, "), but the maximum index was ",
2491  values.size() - 1);
2492  }
2493 
2494  // Assign the values and update any forward references.
2495  for (unsigned i = 0, e = newValues.size(); i != e; ++i, ++valueID) {
2496  Value newValue = newValues[i];
2497 
2498  // Check to see if a definition for this value already exists.
2499  if (Value oldValue = std::exchange(values[valueID], newValue)) {
2500  Operation *forwardRefOp = oldValue.getDefiningOp();
2501 
2502  // Assert that this is a forward reference operation. Given how we compute
2503  // definition ids (incrementally as we parse), it shouldn't be possible
2504  // for the value to be defined any other way.
2505  assert(forwardRefOp && forwardRefOp->getBlock() == &forwardRefOps &&
2506  "value index was already defined?");
2507 
2508  oldValue.replaceAllUsesWith(newValue);
2509  forwardRefOp->moveBefore(&openForwardRefOps, openForwardRefOps.end());
2510  }
2511  }
2512  return success();
2513 }
2514 
2515 Value BytecodeReader::Impl::createForwardRef() {
2516  // Check for an avaliable existing operation to use. Otherwise, create a new
2517  // fake operation to use for the reference.
2518  if (!openForwardRefOps.empty()) {
2519  Operation *op = &openForwardRefOps.back();
2520  op->moveBefore(&forwardRefOps, forwardRefOps.end());
2521  } else {
2522  forwardRefOps.push_back(Operation::create(forwardRefOpState));
2523  }
2524  return forwardRefOps.back().getResult(0);
2525 }
2526 
2527 //===----------------------------------------------------------------------===//
2528 // Entry Points
2529 //===----------------------------------------------------------------------===//
2530 
2532 
2534  llvm::MemoryBufferRef buffer, const ParserConfig &config, bool lazyLoading,
2535  const std::shared_ptr<llvm::SourceMgr> &bufferOwnerRef) {
2536  Location sourceFileLoc =
2537  FileLineColLoc::get(config.getContext(), buffer.getBufferIdentifier(),
2538  /*line=*/0, /*column=*/0);
2539  impl = std::make_unique<Impl>(sourceFileLoc, config, lazyLoading, buffer,
2540  bufferOwnerRef);
2541 }
2542 
2544  Block *block, llvm::function_ref<bool(Operation *)> lazyOpsCallback) {
2545  return impl->read(block, lazyOpsCallback);
2546 }
2547 
2549  return impl->getNumOpsToMaterialize();
2550 }
2551 
2553  return impl->isMaterializable(op);
2554 }
2555 
2557  Operation *op, llvm::function_ref<bool(Operation *)> lazyOpsCallback) {
2558  return impl->materialize(op, lazyOpsCallback);
2559 }
2560 
2562 BytecodeReader::finalize(function_ref<bool(Operation *)> shouldMaterialize) {
2563  return impl->finalize(shouldMaterialize);
2564 }
2565 
2566 bool mlir::isBytecode(llvm::MemoryBufferRef buffer) {
2567  return buffer.getBuffer().startswith("ML\xefR");
2568 }
2569 
2570 /// Read the bytecode from the provided memory buffer reference.
2571 /// `bufferOwnerRef` if provided is the owning source manager for the buffer,
2572 /// and may be used to extend the lifetime of the buffer.
2573 static LogicalResult
2574 readBytecodeFileImpl(llvm::MemoryBufferRef buffer, Block *block,
2575  const ParserConfig &config,
2576  const std::shared_ptr<llvm::SourceMgr> &bufferOwnerRef) {
2577  Location sourceFileLoc =
2578  FileLineColLoc::get(config.getContext(), buffer.getBufferIdentifier(),
2579  /*line=*/0, /*column=*/0);
2580  if (!isBytecode(buffer)) {
2581  return emitError(sourceFileLoc,
2582  "input buffer is not an MLIR bytecode file");
2583  }
2584 
2585  BytecodeReader::Impl reader(sourceFileLoc, config, /*lazyLoading=*/false,
2586  buffer, bufferOwnerRef);
2587  return reader.read(block, /*lazyOpsCallback=*/nullptr);
2588 }
2589 
2590 LogicalResult mlir::readBytecodeFile(llvm::MemoryBufferRef buffer, Block *block,
2591  const ParserConfig &config) {
2592  return readBytecodeFileImpl(buffer, block, config, /*bufferOwnerRef=*/{});
2593 }
2595 mlir::readBytecodeFile(const std::shared_ptr<llvm::SourceMgr> &sourceMgr,
2596  Block *block, const ParserConfig &config) {
2597  return readBytecodeFileImpl(
2598  *sourceMgr->getMemoryBuffer(sourceMgr->getMainFileID()), block, config,
2599  sourceMgr);
2600 }
static LogicalResult readBytecodeFileImpl(llvm::MemoryBufferRef buffer, Block *block, const ParserConfig &config, const std::shared_ptr< llvm::SourceMgr > &bufferOwnerRef)
Read the bytecode from the provided memory buffer reference.
static bool isSectionOptional(bytecode::Section::ID sectionID, int version)
Returns true if the given top-level section ID is optional.
static LogicalResult parseResourceGroup(Location fileLoc, bool allowEmpty, EncodingReader &offsetReader, EncodingReader &resourceReader, StringSectionReader &stringReader, T *handler, const std::shared_ptr< llvm::SourceMgr > &bufferOwnerRef, function_ref< StringRef(StringRef)> remapKey={}, function_ref< LogicalResult(StringRef)> processKeyFn={})
static LogicalResult parseDialectGrouping(EncodingReader &reader, MutableArrayRef< std::unique_ptr< BytecodeDialect >> dialects, function_ref< LogicalResult(BytecodeDialect *)> entryCallback)
Parse a single dialect group encoded in the byte stream.
static LogicalResult resolveEntry(EncodingReader &reader, RangeT &entries, uint64_t index, T &entry, StringRef entryStr)
Resolve an index into the given entry list.
static LogicalResult parseEntry(EncodingReader &reader, RangeT &entries, T &entry, StringRef entryStr)
Parse and resolve an index into the given entry list.
static MLIRContext * getContext(OpFoldResult val)
static std::string diag(const llvm::Value &value)
static ParseResult parseRegions(OpAsmParser &parser, OperationState &state, unsigned nRegions=1)
Definition: OpenACC.cpp:324
This class represents an opaque handle to a dialect resource entry.
This class represents a single parsed resource entry.
Definition: AsmState.h:283
The following classes enable support for parsing and printing resources within MLIR assembly formats.
Definition: AsmState.h:88
MutableArrayRef< char > getMutableData()
Return a mutable reference to the raw underlying data of this blob.
Definition: AsmState.h:154
ArrayRef< char > getData() const
Return the raw underlying data of this blob.
Definition: AsmState.h:142
bool isMutable() const
Return if the data of this blob is mutable.
Definition: AsmState.h:161
This class represents an instance of a resource parser.
Definition: AsmState.h:330
Attributes are known-constant values of operations.
Definition: Attributes.h:25
MLIRContext * getContext() const
Return the context this attribute belongs to.
Definition: Attributes.cpp:37
Block represents an ordered list of Operations.
Definition: Block.h:30
BlockArgument getArgument(unsigned i)
Definition: Block.h:122
unsigned getNumArguments()
Definition: Block.h:121
iterator_range< args_iterator > addArguments(TypeRange types, ArrayRef< Location > locs)
Add one argument to the argument list for each type specified in the list.
Definition: Block.cpp:154
OpListType & getOperations()
Definition: Block.h:130
BlockArgListType getArguments()
Definition: Block.h:80
This class is used to read a bytecode buffer and translate it into MLIR.
LogicalResult materializeAll()
Materialize all operations.
LogicalResult read(Block *block, llvm::function_ref< bool(Operation *)> lazyOps)
Read the bytecode defined within buffer into the given block.
bool isMaterializable(Operation *op)
Impl(Location fileLoc, const ParserConfig &config, bool lazyLoading, llvm::MemoryBufferRef buffer, const std::shared_ptr< llvm::SourceMgr > &bufferOwnerRef)
LogicalResult finalize(function_ref< bool(Operation *)> shouldMaterialize)
Finalize the lazy-loading by calling back with every op that hasn't been materialized to let the clie...
LogicalResult materialize(Operation *op, llvm::function_ref< bool(Operation *)> lazyOpsCallback)
Materialize the provided operation, invoke the lazyOpsCallback on every newly found lazy operation.
int64_t getNumOpsToMaterialize() const
Return the number of ops that haven't been materialized yet.
LogicalResult materialize(Operation *op, llvm::function_ref< bool(Operation *)> lazyOpsCallback=[](Operation *) { return false;})
Materialize the provide operation.
LogicalResult finalize(function_ref< bool(Operation *)> shouldMaterialize=[](Operation *) { return true;})
Finalize the lazy-loading by calling back with every op that hasn't been materialized to let the clie...
BytecodeReader(llvm::MemoryBufferRef buffer, const ParserConfig &config, bool lazyLoad, const std::shared_ptr< llvm::SourceMgr > &bufferOwnerRef={})
Create a bytecode reader for the given buffer.
int64_t getNumOpsToMaterialize() const
Return the number of ops that haven't been materialized yet.
bool isMaterializable(Operation *op)
Return true if the provided op is materializable.
LogicalResult readTopLevel(Block *block, llvm::function_ref< bool(Operation *)> lazyOps=[](Operation *) { return false;})
Read the operations defined within the given memory buffer, containing MLIR bytecode,...
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Definition: Diagnostics.h:156
This class defines a virtual interface for reading a bytecode stream, providing hooks into the byteco...
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition: Dialect.h:41
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
This class represents a diagnostic that is inflight and set to be reported.
Definition: Diagnostics.h:308
InFlightDiagnostic & append(Args &&...args) &
Append arguments to the diagnostic.
Definition: Diagnostics.h:334
Location objects represent source locations information in MLIR.
Definition: Location.h:31
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext * getContext() const
Return the context this location is uniqued in.
Definition: Location.h:73
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
bool allowsUnregisteredDialects()
Return true if we allow to create operation for unregistered dialects.
T * getOrLoadDialect()
Get (or create) a dialect for the given derived dialect type.
Definition: MLIRContext.h:97
StringRef getStringRef() const
Return the name of this operation. This always succeeds.
T::Concept * getInterface() const
Returns an instance of the concept object for the given interface if it was registered to this operat...
bool isRegistered() const
Return if this operation is registered.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
void dropAllReferences()
This drops all operand uses from this operation, which is an essential step in breaking cyclic depend...
Definition: Operation.cpp:583
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:776
static Operation * create(Location location, OperationName name, TypeRange resultTypes, ValueRange operands, NamedAttrList &&attributes, OpaqueProperties properties, BlockRange successors, unsigned numRegions)
Create a new Operation with the specific fields.
Definition: Operation.cpp:66
Block * getBlock()
Returns the operation block that contains this operation.
Definition: Operation.h:213
void moveBefore(Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
Definition: Operation.cpp:554
result_range getResults()
Definition: Operation.h:410
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:538
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
This class represents a configuration for the MLIR assembly parser.
Definition: AsmState.h:460
bool shouldVerifyAfterParse() const
Returns if the parser should verify the IR after parsing.
Definition: AsmState.h:477
MLIRContext * getContext() const
Return the MLIRContext to be used when parsing.
Definition: AsmState.h:474
AsmResourceParser * getResourceParser(StringRef name) const
Return the resource parser registered to the given name, or nullptr if no parser with name is registe...
Definition: AsmState.h:486
BlockListType::iterator iterator
Definition: Region.h:52
This diagnostic handler is a simple RAII class that registers and erases a diagnostic handler on a gi...
Definition: Diagnostics.h:516
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
static AsmResourceBlob allocateWithAlign(ArrayRef< char > data, size_t align, AsmResourceBlob::DeleterFn deleter={}, bool dataIsMutable=false)
Create a new unmanaged resource directly referencing the provided data.
Definition: AsmState.h:220
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:378
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
bool use_empty() const
Returns true if this value has no uses.
Definition: Value.h:214
void shuffleUseList(ArrayRef< unsigned > indices)
Shuffle the use list order according to the provided indices.
Definition: Value.cpp:96
use_range getUses() const
Returns a range of all uses, which is useful for iterating over all uses.
Definition: Value.h:208
void * getAsOpaquePointer() const
Methods for supporting PointerLikeTypeTraits.
Definition: Value.h:239
bool hasOneUse() const
Returns true if this value has exactly one use.
Definition: Value.h:211
use_iterator use_begin() const
Definition: Value.h:204
static WalkResult advance()
Definition: Visitors.h:52
static WalkResult interrupt()
Definition: Visitors.h:51
@ kAttrType
This section contains the attributes and types referenced within an IR module.
Definition: Encoding.h:73
@ kAttrTypeOffset
This section contains the offsets for the attribute and types within the AttrType section.
Definition: Encoding.h:77
@ kIR
This section contains the list of operations serialized into the bytecode, and their nested regions/o...
Definition: Encoding.h:81
@ kResource
This section contains the resources of the bytecode.
Definition: Encoding.h:84
@ kResourceOffset
This section contains the offsets of resources within the Resource section.
Definition: Encoding.h:88
@ kDialect
This section contains the dialects referenced within an IR module.
Definition: Encoding.h:69
@ kString
This section contains strings referenced within the bytecode.
Definition: Encoding.h:66
@ kDialectVersions
This section contains the versions of each dialect.
Definition: Encoding.h:91
@ kProperties
This section contains the properties for the operations.
Definition: Encoding.h:94
@ kNumSections
The total number of section types.
Definition: Encoding.h:97
static uint64_t getUseID(OperandT &val, unsigned ownerID)
Get the unique ID of a value use.
Definition: Encoding.h:127
@ kUseListOrdering
Use-list ordering started to be encoded in version 3.
Definition: Encoding.h:38
@ kAlignmentByte
An arbitrary value used to fill alignment padding.
Definition: Encoding.h:56
@ kVersion
The current bytecode version.
Definition: Encoding.h:53
@ kLazyLoading
Support for lazy-loading of isolated region was added in version 2.
Definition: Encoding.h:35
@ kDialectVersioning
Dialects versioning was added in version 1.
Definition: Encoding.h:32
@ kElideUnknownBlockArgLocation
Avoid recording unknown locations on block arguments (compression) started in version 4.
Definition: Encoding.h:42
@ kNativePropertiesEncoding
Support for encoding properties natively in bytecode instead of merged with the discardable attribute...
Definition: Encoding.h:46
@ kMinSupportedVersion
The minimum supported version of the bytecode.
Definition: Encoding.h:29
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
StringRef toString(AsmResourceEntryKind kind)
static LogicalResult readResourceHandle(DialectBytecodeReader &reader, FailureOr< T > &value, Ts &&...params)
Helper for resource handle reading that returns LogicalResult.
bool isBytecode(llvm::MemoryBufferRef buffer)
Returns true if the given buffer starts with the magic bytes that signal MLIR bytecode.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context, Type type={}, size_t *numRead=nullptr, bool isKnownNullTerminated=false)
This parses a single MLIR attribute to an MLIR context if it was valid.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Type parseType(llvm::StringRef typeStr, MLIRContext *context, size_t *numRead=nullptr, bool isKnownNullTerminated=false)
This parses a single MLIR type to an MLIR context if it was valid.
AsmResourceEntryKind
This enum represents the different kinds of resource values.
Definition: AsmState.h:272
@ String
A string value.
@ Bool
A boolean value.
@ Blob
A blob of data with an accompanying alignment.
LogicalResult readBytecodeFile(llvm::MemoryBufferRef buffer, Block *block, const ParserConfig &config)
Read the operations defined within the given memory buffer, containing MLIR bytecode,...
LogicalResult verify(Operation *op, bool verifyRecursively=true)
Perform (potentially expensive) checks of invariants, used to detect compiler bugs,...
Definition: Verifier.cpp:421
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
This represents an operation in an abstracted form, suitable for use with the builder APIs.
SmallVector< Block *, 1 > successors
Successors of this operation and their respective operands.
SmallVector< Value, 4 > operands
SmallVector< std::unique_ptr< Region >, 1 > regions
Regions that the op will hold.
NamedAttrList attributes
SmallVector< Type, 4 > types
Types of the results of this operation.