MLIR  19.0.0git
BytecodeWriter.cpp
Go to the documentation of this file.
1 //===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "IRNumbering.h"
13 #include "mlir/Bytecode/Encoding.h"
14 #include "mlir/IR/Attributes.h"
15 #include "mlir/IR/Diagnostics.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/CachedHashString.h"
20 #include "llvm/ADT/MapVector.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <optional>
25 
26 #define DEBUG_TYPE "mlir-bytecode-writer"
27 
28 using namespace mlir;
29 using namespace mlir::bytecode::detail;
30 
31 //===----------------------------------------------------------------------===//
32 // BytecodeWriterConfig
33 //===----------------------------------------------------------------------===//
34 
36  Impl(StringRef producer) : producer(producer) {}
37 
38  /// Version to use when writing.
39  /// Note: This only differs from kVersion if a specific version is set.
40  int64_t bytecodeVersion = bytecode::kVersion;
41 
42  /// A flag specifying whether to elide emission of resources into the bytecode
43  /// file.
44  bool shouldElideResourceData = false;
45 
46  /// A map containing dialect version information for each dialect to emit.
47  llvm::StringMap<std::unique_ptr<DialectVersion>> dialectVersionMap;
48 
49  /// The producer of the bytecode.
50  StringRef producer;
51 
52  /// Printer callbacks used to emit custom type and attribute encodings.
57 
58  /// A collection of non-dialect resource printers.
60 };
61 
63  : impl(std::make_unique<Impl>(producer)) {}
65  StringRef producer)
66  : BytecodeWriterConfig(producer) {
68 }
70 
73  return impl->attributeWriterCallbacks;
74 }
75 
78  return impl->typeWriterCallbacks;
79 }
80 
82  std::unique_ptr<AttrTypeBytecodeWriter<Attribute>> callback) {
83  impl->attributeWriterCallbacks.emplace_back(std::move(callback));
84 }
85 
87  std::unique_ptr<AttrTypeBytecodeWriter<Type>> callback) {
88  impl->typeWriterCallbacks.emplace_back(std::move(callback));
89 }
90 
92  std::unique_ptr<AsmResourcePrinter> printer) {
93  impl->externalResourcePrinters.emplace_back(std::move(printer));
94 }
95 
97  bool shouldElideResourceData) {
98  impl->shouldElideResourceData = shouldElideResourceData;
99 }
100 
101 void BytecodeWriterConfig::setDesiredBytecodeVersion(int64_t bytecodeVersion) {
102  impl->bytecodeVersion = bytecodeVersion;
103 }
104 
106  return impl->bytecodeVersion;
107 }
108 
109 llvm::StringMap<std::unique_ptr<DialectVersion>> &
111  return impl->dialectVersionMap;
112 }
113 
115  llvm::StringRef dialectName,
116  std::unique_ptr<DialectVersion> dialectVersion) const {
117  assert(!impl->dialectVersionMap.contains(dialectName) &&
118  "cannot override a previously set dialect version");
119  impl->dialectVersionMap.insert({dialectName, std::move(dialectVersion)});
120 }
121 
122 //===----------------------------------------------------------------------===//
123 // EncodingEmitter
124 //===----------------------------------------------------------------------===//
125 
126 namespace {
127 /// This class functions as the underlying encoding emitter for the bytecode
128 /// writer. This class is a bit different compared to other types of encoders;
129 /// it does not use a single buffer, but instead may contain several buffers
130 /// (some owned by the writer, and some not) that get concatted during the final
131 /// emission.
132 class EncodingEmitter {
133 public:
134  EncodingEmitter() = default;
135  EncodingEmitter(const EncodingEmitter &) = delete;
136  EncodingEmitter &operator=(const EncodingEmitter &) = delete;
137 
138  /// Write the current contents to the provided stream.
139  void writeTo(raw_ostream &os) const;
140 
141  /// Return the current size of the encoded buffer.
142  size_t size() const { return prevResultSize + currentResult.size(); }
143 
144  //===--------------------------------------------------------------------===//
145  // Emission
146  //===--------------------------------------------------------------------===//
147 
148  /// Backpatch a byte in the result buffer at the given offset.
149  void patchByte(uint64_t offset, uint8_t value) {
150  assert(offset < size() && offset >= prevResultSize &&
151  "cannot patch previously emitted data");
152  currentResult[offset - prevResultSize] = value;
153  }
154 
155  /// Emit the provided blob of data, which is owned by the caller and is
156  /// guaranteed to not die before the end of the bytecode process.
157  void emitOwnedBlob(ArrayRef<uint8_t> data) {
158  // Push the current buffer before adding the provided data.
159  appendResult(std::move(currentResult));
160  appendOwnedResult(data);
161  }
162 
163  /// Emit the provided blob of data that has the given alignment, which is
164  /// owned by the caller and is guaranteed to not die before the end of the
165  /// bytecode process. The alignment value is also encoded, making it available
166  /// on load.
167  void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment) {
168  emitVarInt(alignment);
169  emitVarInt(data.size());
170 
171  alignTo(alignment);
172  emitOwnedBlob(data);
173  }
174  void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment) {
175  ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
176  data.size());
177  emitOwnedBlobAndAlignment(castedData, alignment);
178  }
179 
180  /// Align the emitter to the given alignment.
181  void alignTo(unsigned alignment) {
182  if (alignment < 2)
183  return;
184  assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
185 
186  // Check to see if we need to emit any padding bytes to meet the desired
187  // alignment.
188  size_t curOffset = size();
189  size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
190  while (paddingSize--)
191  emitByte(bytecode::kAlignmentByte);
192 
193  // Keep track of the maximum required alignment.
194  requiredAlignment = std::max(requiredAlignment, alignment);
195  }
196 
197  //===--------------------------------------------------------------------===//
198  // Integer Emission
199 
200  /// Emit a single byte.
201  template <typename T>
202  void emitByte(T byte) {
203  currentResult.push_back(static_cast<uint8_t>(byte));
204  }
205 
206  /// Emit a range of bytes.
207  void emitBytes(ArrayRef<uint8_t> bytes) {
208  llvm::append_range(currentResult, bytes);
209  }
210 
211  /// Emit a variable length integer. The first encoded byte contains a prefix
212  /// in the low bits indicating the encoded length of the value. This length
213  /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
214  /// indicate the number of _additional_ bytes (not including the prefix byte).
215  /// All remaining bits in the first byte, along with all of the bits in
216  /// additional bytes, provide the value of the integer encoded in
217  /// little-endian order.
218  void emitVarInt(uint64_t value) {
219  // In the most common case, the value can be represented in a single byte.
220  // Given how hot this case is, explicitly handle that here.
221  if ((value >> 7) == 0)
222  return emitByte((value << 1) | 0x1);
223  emitMultiByteVarInt(value);
224  }
225 
226  /// Emit a signed variable length integer. Signed varints are encoded using
227  /// a varint with zigzag encoding, meaning that we use the low bit of the
228  /// value to indicate the sign of the value. This allows for more efficient
229  /// encoding of negative values by limiting the number of active bits
230  void emitSignedVarInt(uint64_t value) {
231  emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63));
232  }
233 
234  /// Emit a variable length integer whose low bit is used to encode the
235  /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
236  void emitVarIntWithFlag(uint64_t value, bool flag) {
237  emitVarInt((value << 1) | (flag ? 1 : 0));
238  }
239 
240  //===--------------------------------------------------------------------===//
241  // String Emission
242 
243  /// Emit the given string as a nul terminated string.
244  void emitNulTerminatedString(StringRef str) {
245  emitString(str);
246  emitByte(0);
247  }
248 
249  /// Emit the given string without a nul terminator.
250  void emitString(StringRef str) {
251  emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()});
252  }
253 
254  //===--------------------------------------------------------------------===//
255  // Section Emission
256 
257  /// Emit a nested section of the given code, whose contents are encoded in the
258  /// provided emitter.
259  void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
260  // Emit the section code and length. The high bit of the code is used to
261  // indicate whether the section alignment is present, so save an offset to
262  // it.
263  uint64_t codeOffset = currentResult.size();
264  emitByte(code);
265  emitVarInt(emitter.size());
266 
267  // Integrate the alignment of the section into this emitter if necessary.
268  unsigned emitterAlign = emitter.requiredAlignment;
269  if (emitterAlign > 1) {
270  if (size() & (emitterAlign - 1)) {
271  emitVarInt(emitterAlign);
272  alignTo(emitterAlign);
273 
274  // Indicate that we needed to align the section, the high bit of the
275  // code field is used for this.
276  currentResult[codeOffset] |= 0b10000000;
277  } else {
278  // Otherwise, if we happen to be at a compatible offset, we just
279  // remember that we need this alignment.
280  requiredAlignment = std::max(requiredAlignment, emitterAlign);
281  }
282  }
283 
284  // Push our current buffer and then merge the provided section body into
285  // ours.
286  appendResult(std::move(currentResult));
287  for (std::vector<uint8_t> &result : emitter.prevResultStorage)
288  prevResultStorage.push_back(std::move(result));
289  llvm::append_range(prevResultList, emitter.prevResultList);
290  prevResultSize += emitter.prevResultSize;
291  appendResult(std::move(emitter.currentResult));
292  }
293 
294 private:
295  /// Emit the given value using a variable width encoding. This method is a
296  /// fallback when the number of bytes needed to encode the value is greater
297  /// than 1. We mark it noinline here so that the single byte hot path isn't
298  /// pessimized.
299  LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value);
300 
301  /// Append a new result buffer to the current contents.
302  void appendResult(std::vector<uint8_t> &&result) {
303  if (result.empty())
304  return;
305  prevResultStorage.emplace_back(std::move(result));
306  appendOwnedResult(prevResultStorage.back());
307  }
308  void appendOwnedResult(ArrayRef<uint8_t> result) {
309  if (result.empty())
310  return;
311  prevResultSize += result.size();
312  prevResultList.emplace_back(result);
313  }
314 
315  /// The result of the emitter currently being built. We refrain from building
316  /// a single buffer to simplify emitting sections, large data, and more. The
317  /// result is thus represented using multiple distinct buffers, some of which
318  /// we own (via prevResultStorage), and some of which are just pointers into
319  /// externally owned buffers.
320  std::vector<uint8_t> currentResult;
321  std::vector<ArrayRef<uint8_t>> prevResultList;
322  std::vector<std::vector<uint8_t>> prevResultStorage;
323 
324  /// An up-to-date total size of all of the buffers within `prevResultList`.
325  /// This enables O(1) size checks of the current encoding.
326  size_t prevResultSize = 0;
327 
328  /// The highest required alignment for the start of this section.
329  unsigned requiredAlignment = 1;
330 };
331 
332 //===----------------------------------------------------------------------===//
333 // StringSectionBuilder
334 //===----------------------------------------------------------------------===//
335 
336 namespace {
337 /// This class is used to simplify the process of emitting the string section.
338 class StringSectionBuilder {
339 public:
340  /// Add the given string to the string section, and return the index of the
341  /// string within the section.
342  size_t insert(StringRef str) {
343  auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()});
344  return it.first->second;
345  }
346 
347  /// Write the current set of strings to the given emitter.
348  void write(EncodingEmitter &emitter) {
349  emitter.emitVarInt(strings.size());
350 
351  // Emit the sizes in reverse order, so that we don't need to backpatch an
352  // offset to the string data or have a separate section.
353  for (const auto &it : llvm::reverse(strings))
354  emitter.emitVarInt(it.first.size() + 1);
355  // Emit the string data itself.
356  for (const auto &it : strings)
357  emitter.emitNulTerminatedString(it.first.val());
358  }
359 
360 private:
361  /// A set of strings referenced within the bytecode. The value of the map is
362  /// unused.
363  llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
364 };
365 } // namespace
366 
367 class DialectWriter : public DialectBytecodeWriter {
368  using DialectVersionMapT = llvm::StringMap<std::unique_ptr<DialectVersion>>;
369 
370 public:
371  DialectWriter(int64_t bytecodeVersion, EncodingEmitter &emitter,
372  IRNumberingState &numberingState,
373  StringSectionBuilder &stringSection,
374  const DialectVersionMapT &dialectVersionMap)
375  : bytecodeVersion(bytecodeVersion), emitter(emitter),
376  numberingState(numberingState), stringSection(stringSection),
377  dialectVersionMap(dialectVersionMap) {}
378 
379  //===--------------------------------------------------------------------===//
380  // IR
381  //===--------------------------------------------------------------------===//
382 
383  void writeAttribute(Attribute attr) override {
384  emitter.emitVarInt(numberingState.getNumber(attr));
385  }
386  void writeOptionalAttribute(Attribute attr) override {
387  if (!attr) {
388  emitter.emitVarInt(0);
389  return;
390  }
391  emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true);
392  }
393 
394  void writeType(Type type) override {
395  emitter.emitVarInt(numberingState.getNumber(type));
396  }
397 
398  void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
399  emitter.emitVarInt(numberingState.getNumber(resource));
400  }
401 
402  //===--------------------------------------------------------------------===//
403  // Primitives
404  //===--------------------------------------------------------------------===//
405 
406  void writeVarInt(uint64_t value) override { emitter.emitVarInt(value); }
407 
408  void writeSignedVarInt(int64_t value) override {
409  emitter.emitSignedVarInt(value);
410  }
411 
412  void writeAPIntWithKnownWidth(const APInt &value) override {
413  size_t bitWidth = value.getBitWidth();
414 
415  // If the value is a single byte, just emit it directly without going
416  // through a varint.
417  if (bitWidth <= 8)
418  return emitter.emitByte(value.getLimitedValue());
419 
420  // If the value fits within a single varint, emit it directly.
421  if (bitWidth <= 64)
422  return emitter.emitSignedVarInt(value.getLimitedValue());
423 
424  // Otherwise, we need to encode a variable number of active words. We use
425  // active words instead of the number of total words under the observation
426  // that smaller values will be more common.
427  unsigned numActiveWords = value.getActiveWords();
428  emitter.emitVarInt(numActiveWords);
429 
430  const uint64_t *rawValueData = value.getRawData();
431  for (unsigned i = 0; i < numActiveWords; ++i)
432  emitter.emitSignedVarInt(rawValueData[i]);
433  }
434 
435  void writeAPFloatWithKnownSemantics(const APFloat &value) override {
436  writeAPIntWithKnownWidth(value.bitcastToAPInt());
437  }
438 
439  void writeOwnedString(StringRef str) override {
440  emitter.emitVarInt(stringSection.insert(str));
441  }
442 
443  void writeOwnedBlob(ArrayRef<char> blob) override {
444  emitter.emitVarInt(blob.size());
445  emitter.emitOwnedBlob(ArrayRef<uint8_t>(
446  reinterpret_cast<const uint8_t *>(blob.data()), blob.size()));
447  }
448 
449  void writeOwnedBool(bool value) override { emitter.emitByte(value); }
450 
451  int64_t getBytecodeVersion() const override { return bytecodeVersion; }
452 
454  getDialectVersion(StringRef dialectName) const override {
455  auto dialectEntry = dialectVersionMap.find(dialectName);
456  if (dialectEntry == dialectVersionMap.end())
457  return failure();
458  return dialectEntry->getValue().get();
459  }
460 
461 private:
462  int64_t bytecodeVersion;
463  EncodingEmitter &emitter;
464  IRNumberingState &numberingState;
465  StringSectionBuilder &stringSection;
466  const DialectVersionMapT &dialectVersionMap;
467 };
468 
469 namespace {
470 class PropertiesSectionBuilder {
471 public:
472  PropertiesSectionBuilder(IRNumberingState &numberingState,
473  StringSectionBuilder &stringSection,
474  const BytecodeWriterConfig::Impl &config)
475  : numberingState(numberingState), stringSection(stringSection),
476  config(config) {}
477 
478  /// Emit the op properties in the properties section and return the index of
479  /// the properties within the section. Return -1 if no properties was emitted.
480  std::optional<ssize_t> emit(Operation *op) {
481  EncodingEmitter propertiesEmitter;
482  if (!op->getPropertiesStorageSize())
483  return std::nullopt;
484  if (!op->isRegistered()) {
485  // Unregistered op are storing properties as an optional attribute.
486  Attribute prop = *op->getPropertiesStorage().as<Attribute *>();
487  if (!prop)
488  return std::nullopt;
489  EncodingEmitter sizeEmitter;
490  sizeEmitter.emitVarInt(numberingState.getNumber(prop));
491  scratch.clear();
492  llvm::raw_svector_ostream os(scratch);
493  sizeEmitter.writeTo(os);
494  return emit(scratch);
495  }
496 
497  EncodingEmitter emitter;
498  DialectWriter propertiesWriter(config.bytecodeVersion, emitter,
499  numberingState, stringSection,
500  config.dialectVersionMap);
501  auto iface = cast<BytecodeOpInterface>(op);
502  iface.writeProperties(propertiesWriter);
503  scratch.clear();
504  llvm::raw_svector_ostream os(scratch);
505  emitter.writeTo(os);
506  return emit(scratch);
507  }
508 
509  /// Write the current set of properties to the given emitter.
510  void write(EncodingEmitter &emitter) {
511  emitter.emitVarInt(propertiesStorage.size());
512  if (propertiesStorage.empty())
513  return;
514  for (const auto &storage : propertiesStorage) {
515  if (storage.empty()) {
516  emitter.emitBytes(ArrayRef<uint8_t>());
517  continue;
518  }
519  emitter.emitBytes(ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]),
520  storage.size()));
521  }
522  }
523 
524  /// Returns true if the section is empty.
525  bool empty() { return propertiesStorage.empty(); }
526 
527 private:
528  /// Emit raw data and returns the offset in the internal buffer.
529  /// Data are deduplicated and will be copied in the internal buffer only if
530  /// they don't exist there already.
531  ssize_t emit(ArrayRef<char> rawProperties) {
532  // Populate a scratch buffer with the properties size.
533  SmallVector<char> sizeScratch;
534  {
535  EncodingEmitter sizeEmitter;
536  sizeEmitter.emitVarInt(rawProperties.size());
537  llvm::raw_svector_ostream os(sizeScratch);
538  sizeEmitter.writeTo(os);
539  }
540  // Append a new storage to the table now.
541  size_t index = propertiesStorage.size();
542  propertiesStorage.emplace_back();
543  std::vector<char> &newStorage = propertiesStorage.back();
544  size_t propertiesSize = sizeScratch.size() + rawProperties.size();
545  newStorage.reserve(propertiesSize);
546  newStorage.insert(newStorage.end(), sizeScratch.begin(), sizeScratch.end());
547  newStorage.insert(newStorage.end(), rawProperties.begin(),
548  rawProperties.end());
549 
550  // Try to de-duplicate the new serialized properties.
551  // If the properties is a duplicate, pop it back from the storage.
552  auto inserted = propertiesUniquing.insert(
553  std::make_pair(ArrayRef<char>(newStorage), index));
554  if (!inserted.second)
555  propertiesStorage.pop_back();
556  return inserted.first->getSecond();
557  }
558 
559  /// Storage for properties.
560  std::vector<std::vector<char>> propertiesStorage;
561  SmallVector<char> scratch;
562  DenseMap<ArrayRef<char>, int64_t> propertiesUniquing;
563  IRNumberingState &numberingState;
564  StringSectionBuilder &stringSection;
565  const BytecodeWriterConfig::Impl &config;
566 };
567 } // namespace
568 
569 /// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
570 /// to go through an intermediate buffer when interacting with code that wants a
571 /// raw_ostream.
572 class RawEmitterOstream : public raw_ostream {
573 public:
574  explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
575  SetUnbuffered();
576  }
577 
578 private:
579  void write_impl(const char *ptr, size_t size) override {
580  emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size});
581  }
582  uint64_t current_pos() const override { return emitter.size(); }
583 
584  /// The section being emitted to.
585  EncodingEmitter &emitter;
586 };
587 } // namespace
588 
589 void EncodingEmitter::writeTo(raw_ostream &os) const {
590  for (auto &prevResult : prevResultList)
591  os.write((const char *)prevResult.data(), prevResult.size());
592  os.write((const char *)currentResult.data(), currentResult.size());
593 }
594 
595 void EncodingEmitter::emitMultiByteVarInt(uint64_t value) {
596  // Compute the number of bytes needed to encode the value. Each byte can hold
597  // up to 7-bits of data. We only check up to the number of bits we can encode
598  // in the first byte (8).
599  uint64_t it = value >> 7;
600  for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
601  if (LLVM_LIKELY(it >>= 7) == 0) {
602  uint64_t encodedValue = (value << 1) | 0x1;
603  encodedValue <<= (numBytes - 1);
604  llvm::support::ulittle64_t encodedValueLE(encodedValue);
605  emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes});
606  return;
607  }
608  }
609 
610  // If the value is too large to encode in a single byte, emit a special all
611  // zero marker byte and splat the value directly.
612  emitByte(0);
613  llvm::support::ulittle64_t valueLE(value);
614  emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)});
615 }
616 
617 //===----------------------------------------------------------------------===//
618 // Bytecode Writer
619 //===----------------------------------------------------------------------===//
620 
621 namespace {
622 class BytecodeWriter {
623 public:
624  BytecodeWriter(Operation *op, const BytecodeWriterConfig &config)
625  : numberingState(op, config), config(config.getImpl()),
626  propertiesSection(numberingState, stringSection, config.getImpl()) {}
627 
628  /// Write the bytecode for the given root operation.
629  LogicalResult write(Operation *rootOp, raw_ostream &os);
630 
631 private:
632  //===--------------------------------------------------------------------===//
633  // Dialects
634 
635  void writeDialectSection(EncodingEmitter &emitter);
636 
637  //===--------------------------------------------------------------------===//
638  // Attributes and Types
639 
640  void writeAttrTypeSection(EncodingEmitter &emitter);
641 
642  //===--------------------------------------------------------------------===//
643  // Operations
644 
645  LogicalResult writeBlock(EncodingEmitter &emitter, Block *block);
646  LogicalResult writeOp(EncodingEmitter &emitter, Operation *op);
647  LogicalResult writeRegion(EncodingEmitter &emitter, Region *region);
648  LogicalResult writeIRSection(EncodingEmitter &emitter, Operation *op);
649 
650  LogicalResult writeRegions(EncodingEmitter &emitter,
651  MutableArrayRef<Region> regions) {
652  return success(llvm::all_of(regions, [&](Region &region) {
653  return succeeded(writeRegion(emitter, &region));
654  }));
655  }
656 
657  //===--------------------------------------------------------------------===//
658  // Resources
659 
660  void writeResourceSection(Operation *op, EncodingEmitter &emitter);
661 
662  //===--------------------------------------------------------------------===//
663  // Strings
664 
665  void writeStringSection(EncodingEmitter &emitter);
666 
667  //===--------------------------------------------------------------------===//
668  // Properties
669 
670  void writePropertiesSection(EncodingEmitter &emitter);
671 
672  //===--------------------------------------------------------------------===//
673  // Helpers
674 
675  void writeUseListOrders(EncodingEmitter &emitter, uint8_t &opEncodingMask,
676  ValueRange range);
677 
678  //===--------------------------------------------------------------------===//
679  // Fields
680 
681  /// The builder used for the string section.
682  StringSectionBuilder stringSection;
683 
684  /// The IR numbering state generated for the root operation.
685  IRNumberingState numberingState;
686 
687  /// Configuration dictating bytecode emission.
688  const BytecodeWriterConfig::Impl &config;
689 
690  /// Storage for the properties section
691  PropertiesSectionBuilder propertiesSection;
692 };
693 } // namespace
694 
695 LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
696  EncodingEmitter emitter;
697 
698  // Emit the bytecode file header. This is how we identify the output as a
699  // bytecode file.
700  emitter.emitString("ML\xefR");
701 
702  // Emit the bytecode version.
703  if (config.bytecodeVersion < bytecode::kMinSupportedVersion ||
704  config.bytecodeVersion > bytecode::kVersion)
705  return rootOp->emitError()
706  << "unsupported version requested " << config.bytecodeVersion
707  << ", must be in range ["
708  << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", "
709  << static_cast<int64_t>(bytecode::kVersion) << ']';
710  emitter.emitVarInt(config.bytecodeVersion);
711 
712  // Emit the producer.
713  emitter.emitNulTerminatedString(config.producer);
714 
715  // Emit the dialect section.
716  writeDialectSection(emitter);
717 
718  // Emit the attributes and types section.
719  writeAttrTypeSection(emitter);
720 
721  // Emit the IR section.
722  if (failed(writeIRSection(emitter, rootOp)))
723  return failure();
724 
725  // Emit the resources section.
726  writeResourceSection(rootOp, emitter);
727 
728  // Emit the string section.
729  writeStringSection(emitter);
730 
731  // Emit the properties section.
732  if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding)
733  writePropertiesSection(emitter);
734  else if (!propertiesSection.empty())
735  return rootOp->emitError(
736  "unexpected properties emitted incompatible with bytecode <5");
737 
738  // Write the generated bytecode to the provided output stream.
739  emitter.writeTo(os);
740 
741  return success();
742 }
743 
744 //===----------------------------------------------------------------------===//
745 // Dialects
746 
747 /// Write the given entries in contiguous groups with the same parent dialect.
748 /// Each dialect sub-group is encoded with the parent dialect and number of
749 /// elements, followed by the encoding for the entries. The given callback is
750 /// invoked to encode each individual entry.
751 template <typename EntriesT, typename EntryCallbackT>
752 static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
753  EntryCallbackT &&callback) {
754  for (auto it = entries.begin(), e = entries.end(); it != e;) {
755  auto groupStart = it++;
756 
757  // Find the end of the group that shares the same parent dialect.
758  DialectNumbering *currentDialect = groupStart->dialect;
759  it = std::find_if(it, e, [&](const auto &entry) {
760  return entry.dialect != currentDialect;
761  });
762 
763  // Emit the dialect and number of elements.
764  emitter.emitVarInt(currentDialect->number);
765  emitter.emitVarInt(std::distance(groupStart, it));
766 
767  // Emit the entries within the group.
768  for (auto &entry : llvm::make_range(groupStart, it))
769  callback(entry);
770  }
771 }
772 
773 void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
774  EncodingEmitter dialectEmitter;
775 
776  // Emit the referenced dialects.
777  auto dialects = numberingState.getDialects();
778  dialectEmitter.emitVarInt(llvm::size(dialects));
779  for (DialectNumbering &dialect : dialects) {
780  // Write the string section and get the ID.
781  size_t nameID = stringSection.insert(dialect.name);
782 
783  if (config.bytecodeVersion < bytecode::kDialectVersioning) {
784  dialectEmitter.emitVarInt(nameID);
785  continue;
786  }
787 
788  // Try writing the version to the versionEmitter.
789  EncodingEmitter versionEmitter;
790  if (dialect.interface) {
791  // The writer used when emitting using a custom bytecode encoding.
792  DialectWriter versionWriter(config.bytecodeVersion, versionEmitter,
793  numberingState, stringSection,
794  config.dialectVersionMap);
795  dialect.interface->writeVersion(versionWriter);
796  }
797 
798  // If the version emitter is empty, version is not available. We can encode
799  // this in the dialect ID, so if there is no version, we don't write the
800  // section.
801  size_t versionAvailable = versionEmitter.size() > 0;
802  dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable);
803  if (versionAvailable)
804  dialectEmitter.emitSection(bytecode::Section::kDialectVersions,
805  std::move(versionEmitter));
806  }
807 
808  if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation)
809  dialectEmitter.emitVarInt(size(numberingState.getOpNames()));
810 
811  // Emit the referenced operation names grouped by dialect.
812  auto emitOpName = [&](OpNameNumbering &name) {
813  size_t stringId = stringSection.insert(name.name.stripDialect());
814  if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding)
815  dialectEmitter.emitVarInt(stringId);
816  else
817  dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered());
818  };
819  writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
820 
821  emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter));
822 }
823 
824 //===----------------------------------------------------------------------===//
825 // Attributes and Types
826 
827 void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
828  EncodingEmitter attrTypeEmitter;
829  EncodingEmitter offsetEmitter;
830  offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()));
831  offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()));
832 
833  // A functor used to emit an attribute or type entry.
834  uint64_t prevOffset = 0;
835  auto emitAttrOrType = [&](auto &entry) {
836  auto entryValue = entry.getValue();
837 
838  auto emitAttrOrTypeRawImpl = [&]() -> void {
839  RawEmitterOstream(attrTypeEmitter) << entryValue;
840  attrTypeEmitter.emitByte(0);
841  };
842  auto emitAttrOrTypeImpl = [&]() -> bool {
843  // TODO: We don't currently support custom encoded mutable types and
844  // attributes.
845  if (entryValue.template hasTrait<TypeTrait::IsMutable>() ||
846  entryValue.template hasTrait<AttributeTrait::IsMutable>()) {
847  emitAttrOrTypeRawImpl();
848  return false;
849  }
850 
851  DialectWriter dialectWriter(config.bytecodeVersion, attrTypeEmitter,
852  numberingState, stringSection,
853  config.dialectVersionMap);
854  if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
855  for (const auto &callback : config.typeWriterCallbacks) {
856  if (succeeded(callback->write(entryValue, dialectWriter)))
857  return true;
858  }
859  if (const BytecodeDialectInterface *interface =
860  entry.dialect->interface) {
861  if (succeeded(interface->writeType(entryValue, dialectWriter)))
862  return true;
863  }
864  } else {
865  for (const auto &callback : config.attributeWriterCallbacks) {
866  if (succeeded(callback->write(entryValue, dialectWriter)))
867  return true;
868  }
869  if (const BytecodeDialectInterface *interface =
870  entry.dialect->interface) {
871  if (succeeded(interface->writeAttribute(entryValue, dialectWriter)))
872  return true;
873  }
874  }
875 
876  // If the entry was not emitted using a callback or a dialect interface,
877  // emit it using the textual format.
878  emitAttrOrTypeRawImpl();
879  return false;
880  };
881 
882  bool hasCustomEncoding = emitAttrOrTypeImpl();
883 
884  // Record the offset of this entry.
885  uint64_t curOffset = attrTypeEmitter.size();
886  offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding);
887  prevOffset = curOffset;
888  };
889 
890  // Emit the attribute and type entries for each dialect.
891  writeDialectGrouping(offsetEmitter, numberingState.getAttributes(),
892  emitAttrOrType);
893  writeDialectGrouping(offsetEmitter, numberingState.getTypes(),
894  emitAttrOrType);
895 
896  // Emit the sections to the stream.
897  emitter.emitSection(bytecode::Section::kAttrTypeOffset,
898  std::move(offsetEmitter));
899  emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter));
900 }
901 
902 //===----------------------------------------------------------------------===//
903 // Operations
904 
905 LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter,
906  Block *block) {
907  ArrayRef<BlockArgument> args = block->getArguments();
908  bool hasArgs = !args.empty();
909 
910  // Emit the number of operations in this block, and if it has arguments. We
911  // use the low bit of the operation count to indicate if the block has
912  // arguments.
913  unsigned numOps = numberingState.getOperationCount(block);
914  emitter.emitVarIntWithFlag(numOps, hasArgs);
915 
916  // Emit the arguments of the block.
917  if (hasArgs) {
918  emitter.emitVarInt(args.size());
919  for (BlockArgument arg : args) {
920  Location argLoc = arg.getLoc();
921  if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) {
922  emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()),
923  !isa<UnknownLoc>(argLoc));
924  if (!isa<UnknownLoc>(argLoc))
925  emitter.emitVarInt(numberingState.getNumber(argLoc));
926  } else {
927  emitter.emitVarInt(numberingState.getNumber(arg.getType()));
928  emitter.emitVarInt(numberingState.getNumber(argLoc));
929  }
930  }
931  if (config.bytecodeVersion >= bytecode::kUseListOrdering) {
932  uint64_t maskOffset = emitter.size();
933  uint8_t encodingMask = 0;
934  emitter.emitByte(0);
935  writeUseListOrders(emitter, encodingMask, args);
936  if (encodingMask)
937  emitter.patchByte(maskOffset, encodingMask);
938  }
939  }
940 
941  // Emit the operations within the block.
942  for (Operation &op : *block)
943  if (failed(writeOp(emitter, &op)))
944  return failure();
945  return success();
946 }
947 
948 LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
949  emitter.emitVarInt(numberingState.getNumber(op->getName()));
950 
951  // Emit a mask for the operation components. We need to fill this in later
952  // (when we actually know what needs to be emitted), so emit a placeholder for
953  // now.
954  uint64_t maskOffset = emitter.size();
955  uint8_t opEncodingMask = 0;
956  emitter.emitByte(0);
957 
958  // Emit the location for this operation.
959  emitter.emitVarInt(numberingState.getNumber(op->getLoc()));
960 
961  // Emit the attributes of this operation.
962  DictionaryAttr attrs = op->getDiscardableAttrDictionary();
963  // Allow deployment to version <kNativePropertiesEncoding by merging inherent
964  // attribute with the discardable ones. We should fail if there are any
965  // conflicts. When properties are not used by the op, also store everything as
966  // attributes.
967  if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding ||
968  !op->getPropertiesStorage()) {
969  attrs = op->getAttrDictionary();
970  }
971  if (!attrs.empty()) {
972  opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
973  emitter.emitVarInt(numberingState.getNumber(attrs));
974  }
975 
976  // Emit the properties of this operation, for now we still support deployment
977  // to version <kNativePropertiesEncoding.
978  if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding) {
979  std::optional<ssize_t> propertiesId = propertiesSection.emit(op);
980  if (propertiesId.has_value()) {
982  emitter.emitVarInt(*propertiesId);
983  }
984  }
985 
986  // Emit the result types of the operation.
987  if (unsigned numResults = op->getNumResults()) {
988  opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
989  emitter.emitVarInt(numResults);
990  for (Type type : op->getResultTypes())
991  emitter.emitVarInt(numberingState.getNumber(type));
992  }
993 
994  // Emit the operands of the operation.
995  if (unsigned numOperands = op->getNumOperands()) {
996  opEncodingMask |= bytecode::OpEncodingMask::kHasOperands;
997  emitter.emitVarInt(numOperands);
998  for (Value operand : op->getOperands())
999  emitter.emitVarInt(numberingState.getNumber(operand));
1000  }
1001 
1002  // Emit the successors of the operation.
1003  if (unsigned numSuccessors = op->getNumSuccessors()) {
1004  opEncodingMask |= bytecode::OpEncodingMask::kHasSuccessors;
1005  emitter.emitVarInt(numSuccessors);
1006  for (Block *successor : op->getSuccessors())
1007  emitter.emitVarInt(numberingState.getNumber(successor));
1008  }
1009 
1010  // Emit the use-list orders to bytecode, so we can reconstruct the same order
1011  // at parsing.
1012  if (config.bytecodeVersion >= bytecode::kUseListOrdering)
1013  writeUseListOrders(emitter, opEncodingMask, ValueRange(op->getResults()));
1014 
1015  // Check for regions.
1016  unsigned numRegions = op->getNumRegions();
1017  if (numRegions)
1019 
1020  // Update the mask for the operation.
1021  emitter.patchByte(maskOffset, opEncodingMask);
1022 
1023  // With the mask emitted, we can now emit the regions of the operation. We do
1024  // this after mask emission to avoid offset complications that may arise by
1025  // emitting the regions first (e.g. if the regions are huge, backpatching the
1026  // op encoding mask is more annoying).
1027  if (numRegions) {
1028  bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op);
1029  emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove);
1030 
1031  // If the region is not isolated from above, or we are emitting bytecode
1032  // targeting version <kLazyLoading, we don't use a section.
1033  if (isIsolatedFromAbove &&
1034  config.bytecodeVersion >= bytecode::kLazyLoading) {
1035  EncodingEmitter regionEmitter;
1036  if (failed(writeRegions(regionEmitter, op->getRegions())))
1037  return failure();
1038  emitter.emitSection(bytecode::Section::kIR, std::move(regionEmitter));
1039 
1040  } else if (failed(writeRegions(emitter, op->getRegions()))) {
1041  return failure();
1042  }
1043  }
1044  return success();
1045 }
1046 
1047 void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter,
1048  uint8_t &opEncodingMask,
1049  ValueRange range) {
1050  // Loop over the results and store the use-list order per result index.
1052  for (auto item : llvm::enumerate(range)) {
1053  auto value = item.value();
1054  // No need to store a custom use-list order if the result does not have
1055  // multiple uses.
1056  if (value.use_empty() || value.hasOneUse())
1057  continue;
1058 
1059  // For each result, assemble the list of pairs (use-list-index,
1060  // global-value-index). While doing so, detect if the global-value-index is
1061  // already ordered with respect to the use-list-index.
1062  bool alreadyOrdered = true;
1063  auto &firstUse = *value.use_begin();
1064  uint64_t prevID = bytecode::getUseID(
1065  firstUse, numberingState.getNumber(firstUse.getOwner()));
1067  {{0, prevID}});
1068 
1069  for (auto use : llvm::drop_begin(llvm::enumerate(value.getUses()))) {
1070  uint64_t currentID = bytecode::getUseID(
1071  use.value(), numberingState.getNumber(use.value().getOwner()));
1072  // The use-list order achieved when building the IR at parsing always
1073  // pushes new uses on front. Hence, if the order by unique ID is
1074  // monotonically decreasing, a roundtrip to bytecode preserves such order.
1075  alreadyOrdered &= (prevID > currentID);
1076  useListPairs.push_back({use.index(), currentID});
1077  prevID = currentID;
1078  }
1079 
1080  // Do not emit if the order is already sorted.
1081  if (alreadyOrdered)
1082  continue;
1083 
1084  // Sort the use indices by the unique ID indices in descending order.
1085  std::sort(
1086  useListPairs.begin(), useListPairs.end(),
1087  [](auto elem1, auto elem2) { return elem1.second > elem2.second; });
1088 
1089  map.try_emplace(item.index(), llvm::map_range(useListPairs, [](auto elem) {
1090  return elem.first;
1091  }));
1092  }
1093 
1094  if (map.empty())
1095  return;
1096 
1098  // Emit the number of results that have a custom use-list order if the number
1099  // of results is greater than one.
1100  if (range.size() != 1)
1101  emitter.emitVarInt(map.size());
1102 
1103  for (const auto &item : map) {
1104  auto resultIdx = item.getFirst();
1105  auto useListOrder = item.getSecond();
1106 
1107  // Compute the number of uses that are actually shuffled. If those are less
1108  // than half of the total uses, encoding the index pair `(src, dst)` is more
1109  // space efficient.
1110  size_t shuffledElements =
1111  llvm::count_if(llvm::enumerate(useListOrder),
1112  [](auto item) { return item.index() != item.value(); });
1113  bool indexPairEncoding = shuffledElements < (useListOrder.size() / 2);
1114 
1115  // For single result, we don't need to store the result index.
1116  if (range.size() != 1)
1117  emitter.emitVarInt(resultIdx);
1118 
1119  if (indexPairEncoding) {
1120  emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding);
1121  for (auto pair : llvm::enumerate(useListOrder)) {
1122  if (pair.index() != pair.value()) {
1123  emitter.emitVarInt(pair.value());
1124  emitter.emitVarInt(pair.index());
1125  }
1126  }
1127  } else {
1128  emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding);
1129  for (const auto &index : useListOrder)
1130  emitter.emitVarInt(index);
1131  }
1132  }
1133 }
1134 
1135 LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter,
1136  Region *region) {
1137  // If the region is empty, we only need to emit the number of blocks (which is
1138  // zero).
1139  if (region->empty()) {
1140  emitter.emitVarInt(/*numBlocks*/ 0);
1141  return success();
1142  }
1143 
1144  // Emit the number of blocks and values within the region.
1145  unsigned numBlocks, numValues;
1146  std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
1147  emitter.emitVarInt(numBlocks);
1148  emitter.emitVarInt(numValues);
1149 
1150  // Emit the blocks within the region.
1151  for (Block &block : *region)
1152  if (failed(writeBlock(emitter, &block)))
1153  return failure();
1154  return success();
1155 }
1156 
1157 LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter,
1158  Operation *op) {
1159  EncodingEmitter irEmitter;
1160 
1161  // Write the IR section the same way as a block with no arguments. Note that
1162  // the low-bit of the operation count for a block is used to indicate if the
1163  // block has arguments, which in this case is always false.
1164  irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false);
1165 
1166  // Emit the operations.
1167  if (failed(writeOp(irEmitter, op)))
1168  return failure();
1169 
1170  emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter));
1171  return success();
1172 }
1173 
1174 //===----------------------------------------------------------------------===//
1175 // Resources
1176 
1177 namespace {
1178 /// This class represents a resource builder implementation for the MLIR
1179 /// bytecode format.
1180 class ResourceBuilder : public AsmResourceBuilder {
1181 public:
1182  using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
1183 
1184  ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
1185  PostProcessFn postProcessFn, bool shouldElideData)
1186  : emitter(emitter), stringSection(stringSection),
1187  postProcessFn(postProcessFn), shouldElideData(shouldElideData) {}
1188  ~ResourceBuilder() override = default;
1189 
1190  void buildBlob(StringRef key, ArrayRef<char> data,
1191  uint32_t dataAlignment) final {
1192  if (!shouldElideData)
1193  emitter.emitOwnedBlobAndAlignment(data, dataAlignment);
1194  postProcessFn(key, AsmResourceEntryKind::Blob);
1195  }
1196  void buildBool(StringRef key, bool data) final {
1197  if (!shouldElideData)
1198  emitter.emitByte(data);
1199  postProcessFn(key, AsmResourceEntryKind::Bool);
1200  }
1201  void buildString(StringRef key, StringRef data) final {
1202  if (!shouldElideData)
1203  emitter.emitVarInt(stringSection.insert(data));
1204  postProcessFn(key, AsmResourceEntryKind::String);
1205  }
1206 
1207 private:
1208  EncodingEmitter &emitter;
1209  StringSectionBuilder &stringSection;
1210  PostProcessFn postProcessFn;
1211  bool shouldElideData = false;
1212 };
1213 } // namespace
1214 
1215 void BytecodeWriter::writeResourceSection(Operation *op,
1216  EncodingEmitter &emitter) {
1217  EncodingEmitter resourceEmitter;
1218  EncodingEmitter resourceOffsetEmitter;
1219  uint64_t prevOffset = 0;
1221  curResourceEntries;
1222 
1223  // Functor used to process the offset for a resource of `kind` defined by
1224  // 'key'.
1225  auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
1226  uint64_t curOffset = resourceEmitter.size();
1227  curResourceEntries.emplace_back(key, kind, curOffset - prevOffset);
1228  prevOffset = curOffset;
1229  };
1230 
1231  // Functor used to emit a resource group defined by 'key'.
1232  auto emitResourceGroup = [&](uint64_t key) {
1233  resourceOffsetEmitter.emitVarInt(key);
1234  resourceOffsetEmitter.emitVarInt(curResourceEntries.size());
1235  for (auto [key, kind, size] : curResourceEntries) {
1236  resourceOffsetEmitter.emitVarInt(stringSection.insert(key));
1237  resourceOffsetEmitter.emitVarInt(size);
1238  resourceOffsetEmitter.emitByte(kind);
1239  }
1240  };
1241 
1242  // Builder used to emit resources.
1243  ResourceBuilder entryBuilder(resourceEmitter, stringSection,
1244  appendResourceOffset,
1245  config.shouldElideResourceData);
1246 
1247  // Emit the external resource entries.
1248  resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size());
1249  for (const auto &printer : config.externalResourcePrinters) {
1250  curResourceEntries.clear();
1251  printer->buildResources(op, entryBuilder);
1252  emitResourceGroup(stringSection.insert(printer->getName()));
1253  }
1254 
1255  // Emit the dialect resource entries.
1256  for (DialectNumbering &dialect : numberingState.getDialects()) {
1257  if (!dialect.asmInterface)
1258  continue;
1259  curResourceEntries.clear();
1260  dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder);
1261 
1262  // Emit the declaration resources for this dialect, these didn't get emitted
1263  // by the interface. These resources don't have data attached, so just use a
1264  // "blob" kind as a placeholder.
1265  for (const auto &resource : dialect.resourceMap)
1266  if (resource.second->isDeclaration)
1267  appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
1268 
1269  // Emit the resource group for this dialect.
1270  if (!curResourceEntries.empty())
1271  emitResourceGroup(dialect.number);
1272  }
1273 
1274  // If we didn't emit any resource groups, elide the resource sections.
1275  if (resourceOffsetEmitter.size() == 0)
1276  return;
1277 
1278  emitter.emitSection(bytecode::Section::kResourceOffset,
1279  std::move(resourceOffsetEmitter));
1280  emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter));
1281 }
1282 
1283 //===----------------------------------------------------------------------===//
1284 // Strings
1285 
1286 void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
1287  EncodingEmitter stringEmitter;
1288  stringSection.write(stringEmitter);
1289  emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter));
1290 }
1291 
1292 //===----------------------------------------------------------------------===//
1293 // Properties
1294 
1295 void BytecodeWriter::writePropertiesSection(EncodingEmitter &emitter) {
1296  EncodingEmitter propertiesEmitter;
1297  propertiesSection.write(propertiesEmitter);
1298  emitter.emitSection(bytecode::Section::kProperties,
1299  std::move(propertiesEmitter));
1300 }
1301 
1302 //===----------------------------------------------------------------------===//
1303 // Entry Points
1304 //===----------------------------------------------------------------------===//
1305 
1307  const BytecodeWriterConfig &config) {
1308  BytecodeWriter writer(op, config);
1309  return writer.write(op, os);
1310 }
static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, EntryCallbackT &&callback)
Write the given entries in contiguous groups with the same parent dialect.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This class represents an opaque handle to a dialect resource entry.
This class is used to build resource entries for use by the printer.
Definition: AsmState.h:239
A class to interact with the attributes and types printer when emitting MLIR bytecode.
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:315
Block represents an ordered list of Operations.
Definition: Block.h:30
BlockArgListType getArguments()
Definition: Block.h:84
This class contains the configuration used for the bytecode writer.
void attachTypeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Type >> callback)
llvm::StringMap< std::unique_ptr< DialectVersion > > & getDialectVersionMap() const
A map containing the dialect versions to emit.
void setElideResourceDataFlag(bool shouldElideResourceData=true)
Set a boolean flag to skip emission of resources into the bytecode file.
BytecodeWriterConfig(StringRef producer="MLIR" LLVM_VERSION_STRING)
producer is an optional string that can be used to identify the producer of the bytecode when reading...
void attachFallbackResourcePrinter(FallbackAsmResourceMap &map)
Attach resource printers to the AsmState for the fallback resources in the given map.
int64_t getDesiredBytecodeVersion() const
Get the set desired bytecode version to emit.
void setDialectVersion(std::unique_ptr< DialectVersion > dialectVersion) const
Set a given dialect version to emit on the map.
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > getTypeWriterCallbacks() const
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > getAttributeWriterCallbacks() const
Retrieve the callbacks.
void setDesiredBytecodeVersion(int64_t bytecodeVersion)
Set the desired bytecode version to emit.
void attachResourcePrinter(std::unique_ptr< AsmResourcePrinter > printer)
Attach the given resource printer to the writer configuration.
void attachAttributeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Attribute >> callback)
Attach a custom bytecode printer callback to the configuration for the emission of custom type/attrib...
This class defines a virtual interface for writing to a bytecode stream, providing hooks into the byt...
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
A fallback map containing external resources not explicitly handled by another parser/printer.
Definition: AsmState.h:412
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
DictionaryAttr getAttrDictionary()
Return all of the attributes on this operation as a DictionaryAttr.
Definition: Operation.cpp:296
unsigned getNumSuccessors()
Definition: Operation.h:702
bool isRegistered()
Returns true if this operation has a registered operation description, otherwise false.
Definition: Operation.h:129
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:669
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:341
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:672
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
DictionaryAttr getDiscardableAttrDictionary()
Return all of the discardable attributes on this operation as a DictionaryAttr.
Definition: Operation.h:496
result_type_range getResultTypes()
Definition: Operation.h:423
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
SuccessorRange getSuccessors()
Definition: Operation.h:699
result_range getResults()
Definition: Operation.h:410
int getPropertiesStorageSize() const
Returns the properties storage size.
Definition: Operation.h:892
OpaqueProperties getPropertiesStorage()
Returns the properties storage.
Definition: Operation.h:896
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
bool empty()
Definition: Region.h:60
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
This class manages numbering IR entities in preparation of bytecode emission.
Definition: IRNumbering.h:151
@ kAttrType
This section contains the attributes and types referenced within an IR module.
Definition: Encoding.h:73
@ kAttrTypeOffset
This section contains the offsets for the attribute and types within the AttrType section.
Definition: Encoding.h:77
@ kIR
This section contains the list of operations serialized into the bytecode, and their nested regions/o...
Definition: Encoding.h:81
@ kResource
This section contains the resources of the bytecode.
Definition: Encoding.h:84
@ kResourceOffset
This section contains the offsets of resources within the Resource section.
Definition: Encoding.h:88
@ kDialect
This section contains the dialects referenced within an IR module.
Definition: Encoding.h:69
@ kString
This section contains strings referenced within the bytecode.
Definition: Encoding.h:66
@ kDialectVersions
This section contains the versions of each dialect.
Definition: Encoding.h:91
@ kProperties
This section contains the properties for the operations.
Definition: Encoding.h:94
static uint64_t getUseID(OperandT &val, unsigned ownerID)
Get the unique ID of a value use.
Definition: Encoding.h:127
@ kUseListOrdering
Use-list ordering started to be encoded in version 3.
Definition: Encoding.h:38
@ kAlignmentByte
An arbitrary value used to fill alignment padding.
Definition: Encoding.h:56
@ kVersion
The current bytecode version.
Definition: Encoding.h:53
@ kLazyLoading
Support for lazy-loading of isolated region was added in version 2.
Definition: Encoding.h:35
@ kDialectVersioning
Dialects versioning was added in version 1.
Definition: Encoding.h:32
@ kElideUnknownBlockArgLocation
Avoid recording unknown locations on block arguments (compression) started in version 4.
Definition: Encoding.h:42
@ kNativePropertiesEncoding
Support for encoding properties natively in bytecode instead of merged with the discardable attribute...
Definition: Encoding.h:46
@ kMinSupportedVersion
The minimum supported version of the bytecode.
Definition: Encoding.h:29
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
AsmResourceEntryKind
This enum represents the different kinds of resource values.
Definition: AsmState.h:272
@ String
A string value.
@ Bool
A boolean value.
@ Blob
A blob of data with an accompanying alignment.
LogicalResult writeBytecodeToFile(Operation *op, raw_ostream &os, const BytecodeWriterConfig &config={})
Write the bytecode for the given operation to the provided output stream.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
StringRef producer
The producer of the bytecode.
llvm::StringMap< std::unique_ptr< DialectVersion > > dialectVersionMap
A map containing dialect version information for each dialect to emit.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > attributeWriterCallbacks
Printer callbacks used to emit custom type and attribute encodings.
SmallVector< std::unique_ptr< AsmResourcePrinter > > externalResourcePrinters
A collection of non-dialect resource printers.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > typeWriterCallbacks
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
This class represents a numbering entry for an Dialect.
Definition: IRNumbering.h:106
unsigned number
The number assigned to the dialect.
Definition: IRNumbering.h:114
This class represents the numbering entry of an operation name.
Definition: IRNumbering.h:65