MLIR  22.0.0git
BytecodeWriter.cpp
Go to the documentation of this file.
1 //===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "IRNumbering.h"
13 #include "mlir/Bytecode/Encoding.h"
14 #include "mlir/IR/Attributes.h"
15 #include "mlir/IR/Diagnostics.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/CachedHashString.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/DebugLog.h"
23 #include "llvm/Support/Endian.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <optional>
26 
27 #define DEBUG_TYPE "mlir-bytecode-writer"
28 
29 using namespace mlir;
30 using namespace mlir::bytecode::detail;
31 
32 //===----------------------------------------------------------------------===//
33 // BytecodeWriterConfig
34 //===----------------------------------------------------------------------===//
35 
37  Impl(StringRef producer) : producer(producer) {}
38 
39  /// Version to use when writing.
40  /// Note: This only differs from kVersion if a specific version is set.
41  int64_t bytecodeVersion = bytecode::kVersion;
42 
43  /// A flag specifying whether to elide emission of resources into the bytecode
44  /// file.
45  bool shouldElideResourceData = false;
46 
47  /// A map containing dialect version information for each dialect to emit.
48  llvm::StringMap<std::unique_ptr<DialectVersion>> dialectVersionMap;
49 
50  /// The producer of the bytecode.
51  StringRef producer;
52 
53  /// Printer callbacks used to emit custom type and attribute encodings.
58 
59  /// A collection of non-dialect resource printers.
61 };
62 
64  : impl(std::make_unique<Impl>(producer)) {}
66  StringRef producer)
67  : BytecodeWriterConfig(producer) {
69 }
71  : impl(std::move(config.impl)) {}
72 
74 
77  return impl->attributeWriterCallbacks;
78 }
79 
82  return impl->typeWriterCallbacks;
83 }
84 
86  std::unique_ptr<AttrTypeBytecodeWriter<Attribute>> callback) {
87  impl->attributeWriterCallbacks.emplace_back(std::move(callback));
88 }
89 
91  std::unique_ptr<AttrTypeBytecodeWriter<Type>> callback) {
92  impl->typeWriterCallbacks.emplace_back(std::move(callback));
93 }
94 
96  std::unique_ptr<AsmResourcePrinter> printer) {
97  impl->externalResourcePrinters.emplace_back(std::move(printer));
98 }
99 
101  bool shouldElideResourceData) {
102  impl->shouldElideResourceData = shouldElideResourceData;
103 }
104 
105 void BytecodeWriterConfig::setDesiredBytecodeVersion(int64_t bytecodeVersion) {
106  impl->bytecodeVersion = bytecodeVersion;
107 }
108 
110  return impl->bytecodeVersion;
111 }
112 
113 llvm::StringMap<std::unique_ptr<DialectVersion>> &
115  return impl->dialectVersionMap;
116 }
117 
119  llvm::StringRef dialectName,
120  std::unique_ptr<DialectVersion> dialectVersion) const {
121  assert(!impl->dialectVersionMap.contains(dialectName) &&
122  "cannot override a previously set dialect version");
123  impl->dialectVersionMap.insert({dialectName, std::move(dialectVersion)});
124 }
125 
126 //===----------------------------------------------------------------------===//
127 // EncodingEmitter
128 //===----------------------------------------------------------------------===//
129 
130 namespace {
131 /// This class functions as the underlying encoding emitter for the bytecode
132 /// writer. This class is a bit different compared to other types of encoders;
133 /// it does not use a single buffer, but instead may contain several buffers
134 /// (some owned by the writer, and some not) that get concatted during the final
135 /// emission.
136 class EncodingEmitter {
137 public:
138  EncodingEmitter() = default;
139  EncodingEmitter(const EncodingEmitter &) = delete;
140  EncodingEmitter &operator=(const EncodingEmitter &) = delete;
141 
142  /// Write the current contents to the provided stream.
143  void writeTo(raw_ostream &os) const;
144 
145  /// Return the current size of the encoded buffer.
146  size_t size() const { return prevResultSize + currentResult.size(); }
147 
148  //===--------------------------------------------------------------------===//
149  // Emission
150  //===--------------------------------------------------------------------===//
151 
152  /// Backpatch a byte in the result buffer at the given offset.
153  void patchByte(uint64_t offset, uint8_t value, StringLiteral desc) {
154  LDBG() << "patchByte(" << offset << ',' << uint64_t(value) << ")\t" << desc;
155  assert(offset < size() && offset >= prevResultSize &&
156  "cannot patch previously emitted data");
157  currentResult[offset - prevResultSize] = value;
158  }
159 
160  /// Emit the provided blob of data, which is owned by the caller and is
161  /// guaranteed to not die before the end of the bytecode process.
162  void emitOwnedBlob(ArrayRef<uint8_t> data, StringLiteral desc) {
163  LDBG() << "emitOwnedBlob(" << data.size() << "b)\t" << desc;
164  // Push the current buffer before adding the provided data.
165  appendResult(std::move(currentResult));
166  appendOwnedResult(data);
167  }
168 
169  /// Emit the provided blob of data that has the given alignment, which is
170  /// owned by the caller and is guaranteed to not die before the end of the
171  /// bytecode process. The alignment value is also encoded, making it available
172  /// on load.
173  void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment,
174  StringLiteral desc) {
175  emitVarInt(alignment, desc);
176  emitVarInt(data.size(), desc);
177 
178  alignTo(alignment);
179  emitOwnedBlob(data, desc);
180  }
181  void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment,
182  StringLiteral desc) {
183  ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
184  data.size());
185  emitOwnedBlobAndAlignment(castedData, alignment, desc);
186  }
187 
188  /// Align the emitter to the given alignment.
189  void alignTo(unsigned alignment) {
190  if (alignment < 2)
191  return;
192  assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
193 
194  // Check to see if we need to emit any padding bytes to meet the desired
195  // alignment.
196  size_t curOffset = size();
197  size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
198  while (paddingSize--)
199  emitByte(bytecode::kAlignmentByte, "alignment byte");
200 
201  // Keep track of the maximum required alignment.
202  requiredAlignment = std::max(requiredAlignment, alignment);
203  }
204 
205  //===--------------------------------------------------------------------===//
206  // Integer Emission
207 
208  /// Emit a single byte.
209  template <typename T>
210  void emitByte(T byte, StringLiteral desc) {
211  LDBG() << "emitByte(" << uint64_t(byte) << ")\t" << desc;
212  currentResult.push_back(static_cast<uint8_t>(byte));
213  }
214 
215  /// Emit a range of bytes.
216  void emitBytes(ArrayRef<uint8_t> bytes, StringLiteral desc) {
217  LDBG() << "emitBytes(" << bytes.size() << "b)\t" << desc;
218  llvm::append_range(currentResult, bytes);
219  }
220 
221  /// Emit a variable length integer. The first encoded byte contains a prefix
222  /// in the low bits indicating the encoded length of the value. This length
223  /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
224  /// indicate the number of _additional_ bytes (not including the prefix byte).
225  /// All remaining bits in the first byte, along with all of the bits in
226  /// additional bytes, provide the value of the integer encoded in
227  /// little-endian order.
228  void emitVarInt(uint64_t value, StringLiteral desc) {
229  LDBG() << "emitVarInt(" << value << ")\t" << desc;
230 
231  // In the most common case, the value can be represented in a single byte.
232  // Given how hot this case is, explicitly handle that here.
233  if ((value >> 7) == 0)
234  return emitByte((value << 1) | 0x1, desc);
235  emitMultiByteVarInt(value, desc);
236  }
237 
238  /// Emit a signed variable length integer. Signed varints are encoded using
239  /// a varint with zigzag encoding, meaning that we use the low bit of the
240  /// value to indicate the sign of the value. This allows for more efficient
241  /// encoding of negative values by limiting the number of active bits
242  void emitSignedVarInt(uint64_t value, StringLiteral desc) {
243  emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63), desc);
244  }
245 
246  /// Emit a variable length integer whose low bit is used to encode the
247  /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
248  void emitVarIntWithFlag(uint64_t value, bool flag, StringLiteral desc) {
249  emitVarInt((value << 1) | (flag ? 1 : 0), desc);
250  }
251 
252  //===--------------------------------------------------------------------===//
253  // String Emission
254 
255  /// Emit the given string as a nul terminated string.
256  void emitNulTerminatedString(StringRef str, StringLiteral desc) {
257  emitString(str, desc);
258  emitByte(0, "null terminator");
259  }
260 
261  /// Emit the given string without a nul terminator.
262  void emitString(StringRef str, StringLiteral desc) {
263  emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()},
264  desc);
265  }
266 
267  //===--------------------------------------------------------------------===//
268  // Section Emission
269 
270  /// Emit a nested section of the given code, whose contents are encoded in the
271  /// provided emitter.
272  void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
273  // Emit the section code and length. The high bit of the code is used to
274  // indicate whether the section alignment is present, so save an offset to
275  // it.
276  uint64_t codeOffset = currentResult.size();
277  emitByte(code, "section code");
278  emitVarInt(emitter.size(), "section size");
279 
280  // Integrate the alignment of the section into this emitter if necessary.
281  unsigned emitterAlign = emitter.requiredAlignment;
282  if (emitterAlign > 1) {
283  if (size() & (emitterAlign - 1)) {
284  emitVarInt(emitterAlign, "section alignment");
285  alignTo(emitterAlign);
286 
287  // Indicate that we needed to align the section, the high bit of the
288  // code field is used for this.
289  currentResult[codeOffset] |= 0b10000000;
290  } else {
291  // Otherwise, if we happen to be at a compatible offset, we just
292  // remember that we need this alignment.
293  requiredAlignment = std::max(requiredAlignment, emitterAlign);
294  }
295  }
296 
297  // Push our current buffer and then merge the provided section body into
298  // ours.
299  appendResult(std::move(currentResult));
300  for (std::vector<uint8_t> &result : emitter.prevResultStorage)
301  prevResultStorage.push_back(std::move(result));
302  llvm::append_range(prevResultList, emitter.prevResultList);
303  prevResultSize += emitter.prevResultSize;
304  appendResult(std::move(emitter.currentResult));
305  }
306 
307 private:
308  /// Emit the given value using a variable width encoding. This method is a
309  /// fallback when the number of bytes needed to encode the value is greater
310  /// than 1. We mark it noinline here so that the single byte hot path isn't
311  /// pessimized.
312  LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value,
313  StringLiteral desc);
314 
315  /// Append a new result buffer to the current contents.
316  void appendResult(std::vector<uint8_t> &&result) {
317  if (result.empty())
318  return;
319  prevResultStorage.emplace_back(std::move(result));
320  appendOwnedResult(prevResultStorage.back());
321  }
322  void appendOwnedResult(ArrayRef<uint8_t> result) {
323  if (result.empty())
324  return;
325  prevResultSize += result.size();
326  prevResultList.emplace_back(result);
327  }
328 
329  /// The result of the emitter currently being built. We refrain from building
330  /// a single buffer to simplify emitting sections, large data, and more. The
331  /// result is thus represented using multiple distinct buffers, some of which
332  /// we own (via prevResultStorage), and some of which are just pointers into
333  /// externally owned buffers.
334  std::vector<uint8_t> currentResult;
335  std::vector<ArrayRef<uint8_t>> prevResultList;
336  std::vector<std::vector<uint8_t>> prevResultStorage;
337 
338  /// An up-to-date total size of all of the buffers within `prevResultList`.
339  /// This enables O(1) size checks of the current encoding.
340  size_t prevResultSize = 0;
341 
342  /// The highest required alignment for the start of this section.
343  unsigned requiredAlignment = 1;
344 };
345 
346 //===----------------------------------------------------------------------===//
347 // StringSectionBuilder
348 //===----------------------------------------------------------------------===//
349 
350 namespace {
351 /// This class is used to simplify the process of emitting the string section.
352 class StringSectionBuilder {
353 public:
354  /// Add the given string to the string section, and return the index of the
355  /// string within the section.
356  size_t insert(StringRef str) {
357  auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()});
358  return it.first->second;
359  }
360 
361  /// Write the current set of strings to the given emitter.
362  void write(EncodingEmitter &emitter) {
363  emitter.emitVarInt(strings.size(), "string section size");
364 
365  // Emit the sizes in reverse order, so that we don't need to backpatch an
366  // offset to the string data or have a separate section.
367  for (const auto &it : llvm::reverse(strings))
368  emitter.emitVarInt(it.first.size() + 1, "string size");
369  // Emit the string data itself.
370  for (const auto &it : strings)
371  emitter.emitNulTerminatedString(it.first.val(), "string");
372  }
373 
374 private:
375  /// A set of strings referenced within the bytecode. The value of the map is
376  /// unused.
377  llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
378 };
379 } // namespace
380 
381 class DialectWriter : public DialectBytecodeWriter {
382  using DialectVersionMapT = llvm::StringMap<std::unique_ptr<DialectVersion>>;
383 
384 public:
385  DialectWriter(int64_t bytecodeVersion, EncodingEmitter &emitter,
386  IRNumberingState &numberingState,
387  StringSectionBuilder &stringSection,
388  const DialectVersionMapT &dialectVersionMap)
389  : bytecodeVersion(bytecodeVersion), emitter(emitter),
390  numberingState(numberingState), stringSection(stringSection),
391  dialectVersionMap(dialectVersionMap) {}
392 
393  //===--------------------------------------------------------------------===//
394  // IR
395  //===--------------------------------------------------------------------===//
396 
397  void writeAttribute(Attribute attr) override {
398  emitter.emitVarInt(numberingState.getNumber(attr), "dialect attr");
399  }
400  void writeOptionalAttribute(Attribute attr) override {
401  if (!attr) {
402  emitter.emitVarInt(0, "dialect optional attr none");
403  return;
404  }
405  emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true,
406  "dialect optional attr");
407  }
408 
409  void writeType(Type type) override {
410  emitter.emitVarInt(numberingState.getNumber(type), "dialect type");
411  }
412 
413  void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
414  emitter.emitVarInt(numberingState.getNumber(resource), "dialect resource");
415  }
416 
417  //===--------------------------------------------------------------------===//
418  // Primitives
419  //===--------------------------------------------------------------------===//
420 
421  void writeVarInt(uint64_t value) override {
422  emitter.emitVarInt(value, "dialect writer");
423  }
424 
425  void writeSignedVarInt(int64_t value) override {
426  emitter.emitSignedVarInt(value, "dialect writer");
427  }
428 
429  void writeAPIntWithKnownWidth(const APInt &value) override {
430  size_t bitWidth = value.getBitWidth();
431 
432  // If the value is a single byte, just emit it directly without going
433  // through a varint.
434  if (bitWidth <= 8)
435  return emitter.emitByte(value.getLimitedValue(), "dialect APInt");
436 
437  // If the value fits within a single varint, emit it directly.
438  if (bitWidth <= 64)
439  return emitter.emitSignedVarInt(value.getLimitedValue(), "dialect APInt");
440 
441  // Otherwise, we need to encode a variable number of active words. We use
442  // active words instead of the number of total words under the observation
443  // that smaller values will be more common.
444  unsigned numActiveWords = value.getActiveWords();
445  emitter.emitVarInt(numActiveWords, "dialect APInt word count");
446 
447  const uint64_t *rawValueData = value.getRawData();
448  for (unsigned i = 0; i < numActiveWords; ++i)
449  emitter.emitSignedVarInt(rawValueData[i], "dialect APInt word");
450  }
451 
452  void writeAPFloatWithKnownSemantics(const APFloat &value) override {
453  writeAPIntWithKnownWidth(value.bitcastToAPInt());
454  }
455 
456  void writeOwnedString(StringRef str) override {
457  emitter.emitVarInt(stringSection.insert(str), "dialect string");
458  }
459 
460  void writeOwnedBlob(ArrayRef<char> blob) override {
461  emitter.emitVarInt(blob.size(), "dialect blob");
462  emitter.emitOwnedBlob(
463  ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()),
464  blob.size()),
465  "dialect blob");
466  }
467 
468  void writeOwnedBool(bool value) override {
469  emitter.emitByte(value, "dialect bool");
470  }
471 
472  int64_t getBytecodeVersion() const override { return bytecodeVersion; }
473 
474  FailureOr<const DialectVersion *>
475  getDialectVersion(StringRef dialectName) const override {
476  auto dialectEntry = dialectVersionMap.find(dialectName);
477  if (dialectEntry == dialectVersionMap.end())
478  return failure();
479  return dialectEntry->getValue().get();
480  }
481 
482 private:
483  int64_t bytecodeVersion;
484  EncodingEmitter &emitter;
485  IRNumberingState &numberingState;
486  StringSectionBuilder &stringSection;
487  const DialectVersionMapT &dialectVersionMap;
488 };
489 
490 namespace {
491 class PropertiesSectionBuilder {
492 public:
493  PropertiesSectionBuilder(IRNumberingState &numberingState,
494  StringSectionBuilder &stringSection,
496  : numberingState(numberingState), stringSection(stringSection),
497  config(config) {}
498 
499  /// Emit the op properties in the properties section and return the index of
500  /// the properties within the section. Return -1 if no properties was emitted.
501  std::optional<ssize_t> emit(Operation *op) {
502  EncodingEmitter propertiesEmitter;
503  if (!op->getPropertiesStorageSize())
504  return std::nullopt;
505  if (!op->isRegistered()) {
506  // Unregistered op are storing properties as an optional attribute.
507  Attribute prop = *op->getPropertiesStorage().as<Attribute *>();
508  if (!prop)
509  return std::nullopt;
510  EncodingEmitter sizeEmitter;
511  sizeEmitter.emitVarInt(numberingState.getNumber(prop), "properties size");
512  scratch.clear();
513  llvm::raw_svector_ostream os(scratch);
514  sizeEmitter.writeTo(os);
515  return emit(scratch);
516  }
517 
518  EncodingEmitter emitter;
519  DialectWriter propertiesWriter(config.bytecodeVersion, emitter,
520  numberingState, stringSection,
521  config.dialectVersionMap);
522  auto iface = cast<BytecodeOpInterface>(op);
523  iface.writeProperties(propertiesWriter);
524  scratch.clear();
525  llvm::raw_svector_ostream os(scratch);
526  emitter.writeTo(os);
527  return emit(scratch);
528  }
529 
530  /// Write the current set of properties to the given emitter.
531  void write(EncodingEmitter &emitter) {
532  emitter.emitVarInt(propertiesStorage.size(), "properties size");
533  if (propertiesStorage.empty())
534  return;
535  for (const auto &storage : propertiesStorage) {
536  if (storage.empty()) {
537  emitter.emitBytes(ArrayRef<uint8_t>(), "empty properties");
538  continue;
539  }
540  emitter.emitBytes(ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]),
541  storage.size()),
542  "property");
543  }
544  }
545 
546  /// Returns true if the section is empty.
547  bool empty() { return propertiesStorage.empty(); }
548 
549 private:
550  /// Emit raw data and returns the offset in the internal buffer.
551  /// Data are deduplicated and will be copied in the internal buffer only if
552  /// they don't exist there already.
553  ssize_t emit(ArrayRef<char> rawProperties) {
554  // Populate a scratch buffer with the properties size.
555  SmallVector<char> sizeScratch;
556  {
557  EncodingEmitter sizeEmitter;
558  sizeEmitter.emitVarInt(rawProperties.size(), "properties");
559  llvm::raw_svector_ostream os(sizeScratch);
560  sizeEmitter.writeTo(os);
561  }
562  // Append a new storage to the table now.
563  size_t index = propertiesStorage.size();
564  propertiesStorage.emplace_back();
565  std::vector<char> &newStorage = propertiesStorage.back();
566  size_t propertiesSize = sizeScratch.size() + rawProperties.size();
567  newStorage.reserve(propertiesSize);
568  llvm::append_range(newStorage, sizeScratch);
569  llvm::append_range(newStorage, rawProperties);
570 
571  // Try to de-duplicate the new serialized properties.
572  // If the properties is a duplicate, pop it back from the storage.
573  auto inserted = propertiesUniquing.insert(
574  std::make_pair(ArrayRef<char>(newStorage), index));
575  if (!inserted.second)
576  propertiesStorage.pop_back();
577  return inserted.first->getSecond();
578  }
579 
580  /// Storage for properties.
581  std::vector<std::vector<char>> propertiesStorage;
582  SmallVector<char> scratch;
583  DenseMap<ArrayRef<char>, int64_t> propertiesUniquing;
584  IRNumberingState &numberingState;
585  StringSectionBuilder &stringSection;
587 };
588 } // namespace
589 
590 /// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
591 /// to go through an intermediate buffer when interacting with code that wants a
592 /// raw_ostream.
593 class RawEmitterOstream : public raw_ostream {
594 public:
595  explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
596  SetUnbuffered();
597  }
598 
599 private:
600  void write_impl(const char *ptr, size_t size) override {
601  emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size},
602  "raw emitter");
603  }
604  uint64_t current_pos() const override { return emitter.size(); }
605 
606  /// The section being emitted to.
607  EncodingEmitter &emitter;
608 };
609 } // namespace
610 
611 void EncodingEmitter::writeTo(raw_ostream &os) const {
612  // Reserve space in the ostream for the encoded contents.
613  os.reserveExtraSpace(size());
614 
615  for (auto &prevResult : prevResultList)
616  os.write((const char *)prevResult.data(), prevResult.size());
617  os.write((const char *)currentResult.data(), currentResult.size());
618 }
619 
620 void EncodingEmitter::emitMultiByteVarInt(uint64_t value, StringLiteral desc) {
621  // Compute the number of bytes needed to encode the value. Each byte can hold
622  // up to 7-bits of data. We only check up to the number of bits we can encode
623  // in the first byte (8).
624  uint64_t it = value >> 7;
625  for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
626  if (LLVM_LIKELY(it >>= 7) == 0) {
627  uint64_t encodedValue = (value << 1) | 0x1;
628  encodedValue <<= (numBytes - 1);
629  llvm::support::ulittle64_t encodedValueLE(encodedValue);
630  emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes}, desc);
631  return;
632  }
633  }
634 
635  // If the value is too large to encode in a single byte, emit a special all
636  // zero marker byte and splat the value directly.
637  emitByte(0, desc);
638  llvm::support::ulittle64_t valueLE(value);
639  emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)}, desc);
640 }
641 
642 //===----------------------------------------------------------------------===//
643 // Bytecode Writer
644 //===----------------------------------------------------------------------===//
645 
646 namespace {
647 class BytecodeWriter {
648 public:
649  BytecodeWriter(Operation *op, const BytecodeWriterConfig &config)
650  : numberingState(op, config), config(config.getImpl()),
651  propertiesSection(numberingState, stringSection, config.getImpl()) {}
652 
653  /// Write the bytecode for the given root operation.
654  LogicalResult write(Operation *rootOp, raw_ostream &os);
655 
656 private:
657  //===--------------------------------------------------------------------===//
658  // Dialects
659 
660  void writeDialectSection(EncodingEmitter &emitter);
661 
662  //===--------------------------------------------------------------------===//
663  // Attributes and Types
664 
665  void writeAttrTypeSection(EncodingEmitter &emitter);
666 
667  //===--------------------------------------------------------------------===//
668  // Operations
669 
670  LogicalResult writeBlock(EncodingEmitter &emitter, Block *block);
671  LogicalResult writeOp(EncodingEmitter &emitter, Operation *op);
672  LogicalResult writeRegion(EncodingEmitter &emitter, Region *region);
673  LogicalResult writeIRSection(EncodingEmitter &emitter, Operation *op);
674 
675  LogicalResult writeRegions(EncodingEmitter &emitter,
676  MutableArrayRef<Region> regions) {
677  return success(llvm::all_of(regions, [&](Region &region) {
678  return succeeded(writeRegion(emitter, &region));
679  }));
680  }
681 
682  //===--------------------------------------------------------------------===//
683  // Resources
684 
685  void writeResourceSection(Operation *op, EncodingEmitter &emitter);
686 
687  //===--------------------------------------------------------------------===//
688  // Strings
689 
690  void writeStringSection(EncodingEmitter &emitter);
691 
692  //===--------------------------------------------------------------------===//
693  // Properties
694 
695  void writePropertiesSection(EncodingEmitter &emitter);
696 
697  //===--------------------------------------------------------------------===//
698  // Helpers
699 
700  void writeUseListOrders(EncodingEmitter &emitter, uint8_t &opEncodingMask,
701  ValueRange range);
702 
703  //===--------------------------------------------------------------------===//
704  // Fields
705 
706  /// The builder used for the string section.
707  StringSectionBuilder stringSection;
708 
709  /// The IR numbering state generated for the root operation.
710  IRNumberingState numberingState;
711 
712  /// Configuration dictating bytecode emission.
714 
715  /// Storage for the properties section
716  PropertiesSectionBuilder propertiesSection;
717 };
718 } // namespace
719 
720 LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
721  EncodingEmitter emitter;
722 
723  // Emit the bytecode file header. This is how we identify the output as a
724  // bytecode file.
725  emitter.emitString("ML\xefR", "bytecode header");
726 
727  // Emit the bytecode version.
728  if (config.bytecodeVersion < bytecode::kMinSupportedVersion ||
729  config.bytecodeVersion > bytecode::kVersion)
730  return rootOp->emitError()
731  << "unsupported version requested " << config.bytecodeVersion
732  << ", must be in range ["
733  << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", "
734  << static_cast<int64_t>(bytecode::kVersion) << ']';
735  emitter.emitVarInt(config.bytecodeVersion, "bytecode version");
736 
737  // Emit the producer.
738  emitter.emitNulTerminatedString(config.producer, "bytecode producer");
739 
740  // Emit the dialect section.
741  writeDialectSection(emitter);
742 
743  // Emit the attributes and types section.
744  writeAttrTypeSection(emitter);
745 
746  // Emit the IR section.
747  if (failed(writeIRSection(emitter, rootOp)))
748  return failure();
749 
750  // Emit the resources section.
751  writeResourceSection(rootOp, emitter);
752 
753  // Emit the string section.
754  writeStringSection(emitter);
755 
756  // Emit the properties section.
757  if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding)
758  writePropertiesSection(emitter);
759  else if (!propertiesSection.empty())
760  return rootOp->emitError(
761  "unexpected properties emitted incompatible with bytecode <5");
762 
763  // Write the generated bytecode to the provided output stream.
764  emitter.writeTo(os);
765 
766  return success();
767 }
768 
769 //===----------------------------------------------------------------------===//
770 // Dialects
771 //===----------------------------------------------------------------------===//
772 
773 /// Write the given entries in contiguous groups with the same parent dialect.
774 /// Each dialect sub-group is encoded with the parent dialect and number of
775 /// elements, followed by the encoding for the entries. The given callback is
776 /// invoked to encode each individual entry.
777 template <typename EntriesT, typename EntryCallbackT>
778 static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
779  EntryCallbackT &&callback) {
780  for (auto it = entries.begin(), e = entries.end(); it != e;) {
781  auto groupStart = it++;
782 
783  // Find the end of the group that shares the same parent dialect.
784  DialectNumbering *currentDialect = groupStart->dialect;
785  it = std::find_if(it, e, [&](const auto &entry) {
786  return entry.dialect != currentDialect;
787  });
788 
789  // Emit the dialect and number of elements.
790  emitter.emitVarInt(currentDialect->number, "dialect number");
791  emitter.emitVarInt(std::distance(groupStart, it), "dialect offset");
792 
793  // Emit the entries within the group.
794  for (auto &entry : llvm::make_range(groupStart, it))
795  callback(entry);
796  }
797 }
798 
799 void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
800  EncodingEmitter dialectEmitter;
801 
802  // Emit the referenced dialects.
803  auto dialects = numberingState.getDialects();
804  dialectEmitter.emitVarInt(llvm::size(dialects), "dialects count");
805  for (DialectNumbering &dialect : dialects) {
806  // Write the string section and get the ID.
807  size_t nameID = stringSection.insert(dialect.name);
808 
809  if (config.bytecodeVersion < bytecode::kDialectVersioning) {
810  dialectEmitter.emitVarInt(nameID, "dialect name ID");
811  continue;
812  }
813 
814  // Try writing the version to the versionEmitter.
815  EncodingEmitter versionEmitter;
816  if (dialect.interface) {
817  // The writer used when emitting using a custom bytecode encoding.
818  DialectWriter versionWriter(config.bytecodeVersion, versionEmitter,
819  numberingState, stringSection,
820  config.dialectVersionMap);
821  dialect.interface->writeVersion(versionWriter);
822  }
823 
824  // If the version emitter is empty, version is not available. We can encode
825  // this in the dialect ID, so if there is no version, we don't write the
826  // section.
827  size_t versionAvailable = versionEmitter.size() > 0;
828  dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable,
829  "dialect version");
830  if (versionAvailable)
831  dialectEmitter.emitSection(bytecode::Section::kDialectVersions,
832  std::move(versionEmitter));
833  }
834 
835  if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation)
836  dialectEmitter.emitVarInt(size(numberingState.getOpNames()),
837  "op names count");
838 
839  // Emit the referenced operation names grouped by dialect.
840  auto emitOpName = [&](OpNameNumbering &name) {
841  size_t stringId = stringSection.insert(name.name.stripDialect());
842  if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding)
843  dialectEmitter.emitVarInt(stringId, "dialect op name");
844  else
845  dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered(),
846  "dialect op name");
847  };
848  writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
849 
850  emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter));
851 }
852 
853 //===----------------------------------------------------------------------===//
854 // Attributes and Types
855 //===----------------------------------------------------------------------===//
856 
857 void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
858  EncodingEmitter attrTypeEmitter;
859  EncodingEmitter offsetEmitter;
860  offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()),
861  "attributes count");
862  offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()),
863  "types count");
864 
865  // A functor used to emit an attribute or type entry.
866  uint64_t prevOffset = 0;
867  auto emitAttrOrType = [&](auto &entry) {
868  auto entryValue = entry.getValue();
869 
870  auto emitAttrOrTypeRawImpl = [&]() -> void {
871  RawEmitterOstream(attrTypeEmitter) << entryValue;
872  attrTypeEmitter.emitByte(0, "attr/type separator");
873  };
874  auto emitAttrOrTypeImpl = [&]() -> bool {
875  // TODO: We don't currently support custom encoded mutable types and
876  // attributes.
877  if (entryValue.template hasTrait<TypeTrait::IsMutable>() ||
878  entryValue.template hasTrait<AttributeTrait::IsMutable>()) {
879  emitAttrOrTypeRawImpl();
880  return false;
881  }
882 
883  DialectWriter dialectWriter(config.bytecodeVersion, attrTypeEmitter,
884  numberingState, stringSection,
885  config.dialectVersionMap);
886  if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
887  for (const auto &callback : config.typeWriterCallbacks) {
888  if (succeeded(callback->write(entryValue, dialectWriter)))
889  return true;
890  }
891  if (const BytecodeDialectInterface *interface =
892  entry.dialect->interface) {
893  if (succeeded(interface->writeType(entryValue, dialectWriter)))
894  return true;
895  }
896  } else {
897  for (const auto &callback : config.attributeWriterCallbacks) {
898  if (succeeded(callback->write(entryValue, dialectWriter)))
899  return true;
900  }
901  if (const BytecodeDialectInterface *interface =
902  entry.dialect->interface) {
903  if (succeeded(interface->writeAttribute(entryValue, dialectWriter)))
904  return true;
905  }
906  }
907 
908  // If the entry was not emitted using a callback or a dialect interface,
909  // emit it using the textual format.
910  emitAttrOrTypeRawImpl();
911  return false;
912  };
913 
914  bool hasCustomEncoding = emitAttrOrTypeImpl();
915 
916  // Record the offset of this entry.
917  uint64_t curOffset = attrTypeEmitter.size();
918  offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding,
919  "attr/type offset");
920  prevOffset = curOffset;
921  };
922 
923  // Emit the attribute and type entries for each dialect.
924  writeDialectGrouping(offsetEmitter, numberingState.getAttributes(),
925  emitAttrOrType);
926  writeDialectGrouping(offsetEmitter, numberingState.getTypes(),
927  emitAttrOrType);
928 
929  // Emit the sections to the stream.
930  emitter.emitSection(bytecode::Section::kAttrTypeOffset,
931  std::move(offsetEmitter));
932  emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter));
933 }
934 
935 //===----------------------------------------------------------------------===//
936 // Operations
937 //===----------------------------------------------------------------------===//
938 
939 LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter,
940  Block *block) {
941  ArrayRef<BlockArgument> args = block->getArguments();
942  bool hasArgs = !args.empty();
943 
944  // Emit the number of operations in this block, and if it has arguments. We
945  // use the low bit of the operation count to indicate if the block has
946  // arguments.
947  unsigned numOps = numberingState.getOperationCount(block);
948  emitter.emitVarIntWithFlag(numOps, hasArgs, "block num ops");
949 
950  // Emit the arguments of the block.
951  if (hasArgs) {
952  emitter.emitVarInt(args.size(), "block args count");
953  for (BlockArgument arg : args) {
954  Location argLoc = arg.getLoc();
955  if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) {
956  emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()),
957  !isa<UnknownLoc>(argLoc), "block arg type");
958  if (!isa<UnknownLoc>(argLoc))
959  emitter.emitVarInt(numberingState.getNumber(argLoc),
960  "block arg location");
961  } else {
962  emitter.emitVarInt(numberingState.getNumber(arg.getType()),
963  "block arg type");
964  emitter.emitVarInt(numberingState.getNumber(argLoc),
965  "block arg location");
966  }
967  }
968  if (config.bytecodeVersion >= bytecode::kUseListOrdering) {
969  uint64_t maskOffset = emitter.size();
970  uint8_t encodingMask = 0;
971  emitter.emitByte(0, "use-list separator");
972  writeUseListOrders(emitter, encodingMask, args);
973  if (encodingMask)
974  emitter.patchByte(maskOffset, encodingMask, "block patch encoding");
975  }
976  }
977 
978  // Emit the operations within the block.
979  for (Operation &op : *block)
980  if (failed(writeOp(emitter, &op)))
981  return failure();
982  return success();
983 }
984 
985 LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
986  emitter.emitVarInt(numberingState.getNumber(op->getName()), "op name ID");
987 
988  // Emit a mask for the operation components. We need to fill this in later
989  // (when we actually know what needs to be emitted), so emit a placeholder for
990  // now.
991  uint64_t maskOffset = emitter.size();
992  uint8_t opEncodingMask = 0;
993  emitter.emitByte(0, "op separator");
994 
995  // Emit the location for this operation.
996  emitter.emitVarInt(numberingState.getNumber(op->getLoc()), "op location");
997 
998  // Emit the attributes of this operation.
999  DictionaryAttr attrs = op->getDiscardableAttrDictionary();
1000  // Allow deployment to version <kNativePropertiesEncoding by merging inherent
1001  // attribute with the discardable ones. We should fail if there are any
1002  // conflicts. When properties are not used by the op, also store everything as
1003  // attributes.
1004  if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding ||
1005  !op->getPropertiesStorage()) {
1006  attrs = op->getAttrDictionary();
1007  }
1008  if (!attrs.empty()) {
1009  opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
1010  emitter.emitVarInt(numberingState.getNumber(attrs), "op attrs count");
1011  }
1012 
1013  // Emit the properties of this operation, for now we still support deployment
1014  // to version <kNativePropertiesEncoding.
1015  if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding) {
1016  std::optional<ssize_t> propertiesId = propertiesSection.emit(op);
1017  if (propertiesId.has_value()) {
1018  opEncodingMask |= bytecode::OpEncodingMask::kHasProperties;
1019  emitter.emitVarInt(*propertiesId, "op properties ID");
1020  }
1021  }
1022 
1023  // Emit the result types of the operation.
1024  if (unsigned numResults = op->getNumResults()) {
1025  opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
1026  emitter.emitVarInt(numResults, "op results count");
1027  for (Type type : op->getResultTypes())
1028  emitter.emitVarInt(numberingState.getNumber(type), "op result type");
1029  }
1030 
1031  // Emit the operands of the operation.
1032  if (unsigned numOperands = op->getNumOperands()) {
1033  opEncodingMask |= bytecode::OpEncodingMask::kHasOperands;
1034  emitter.emitVarInt(numOperands, "op operands count");
1035  for (Value operand : op->getOperands())
1036  emitter.emitVarInt(numberingState.getNumber(operand), "op operand types");
1037  }
1038 
1039  // Emit the successors of the operation.
1040  if (unsigned numSuccessors = op->getNumSuccessors()) {
1041  opEncodingMask |= bytecode::OpEncodingMask::kHasSuccessors;
1042  emitter.emitVarInt(numSuccessors, "op successors count");
1043  for (Block *successor : op->getSuccessors())
1044  emitter.emitVarInt(numberingState.getNumber(successor), "op successor");
1045  }
1046 
1047  // Emit the use-list orders to bytecode, so we can reconstruct the same order
1048  // at parsing.
1049  if (config.bytecodeVersion >= bytecode::kUseListOrdering)
1050  writeUseListOrders(emitter, opEncodingMask, ValueRange(op->getResults()));
1051 
1052  // Check for regions.
1053  unsigned numRegions = op->getNumRegions();
1054  if (numRegions)
1056 
1057  // Update the mask for the operation.
1058  emitter.patchByte(maskOffset, opEncodingMask, "op encoding mask");
1059 
1060  // With the mask emitted, we can now emit the regions of the operation. We do
1061  // this after mask emission to avoid offset complications that may arise by
1062  // emitting the regions first (e.g. if the regions are huge, backpatching the
1063  // op encoding mask is more annoying).
1064  if (numRegions) {
1065  bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op);
1066  emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove,
1067  "op regions count");
1068 
1069  // If the region is not isolated from above, or we are emitting bytecode
1070  // targeting version <kLazyLoading, we don't use a section.
1071  if (isIsolatedFromAbove &&
1072  config.bytecodeVersion >= bytecode::kLazyLoading) {
1073  EncodingEmitter regionEmitter;
1074  if (failed(writeRegions(regionEmitter, op->getRegions())))
1075  return failure();
1076  emitter.emitSection(bytecode::Section::kIR, std::move(regionEmitter));
1077 
1078  } else if (failed(writeRegions(emitter, op->getRegions()))) {
1079  return failure();
1080  }
1081  }
1082  return success();
1083 }
1084 
1085 void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter,
1086  uint8_t &opEncodingMask,
1087  ValueRange range) {
1088  // Loop over the results and store the use-list order per result index.
1090  for (auto item : llvm::enumerate(range)) {
1091  auto value = item.value();
1092  // No need to store a custom use-list order if the result does not have
1093  // multiple uses.
1094  if (value.use_empty() || value.hasOneUse())
1095  continue;
1096 
1097  // For each result, assemble the list of pairs (use-list-index,
1098  // global-value-index). While doing so, detect if the global-value-index is
1099  // already ordered with respect to the use-list-index.
1100  bool alreadyOrdered = true;
1101  auto &firstUse = *value.use_begin();
1102  uint64_t prevID = bytecode::getUseID(
1103  firstUse, numberingState.getNumber(firstUse.getOwner()));
1105  {{0, prevID}});
1106 
1107  for (auto use : llvm::drop_begin(llvm::enumerate(value.getUses()))) {
1108  uint64_t currentID = bytecode::getUseID(
1109  use.value(), numberingState.getNumber(use.value().getOwner()));
1110  // The use-list order achieved when building the IR at parsing always
1111  // pushes new uses on front. Hence, if the order by unique ID is
1112  // monotonically decreasing, a roundtrip to bytecode preserves such order.
1113  alreadyOrdered &= (prevID > currentID);
1114  useListPairs.push_back({use.index(), currentID});
1115  prevID = currentID;
1116  }
1117 
1118  // Do not emit if the order is already sorted.
1119  if (alreadyOrdered)
1120  continue;
1121 
1122  // Sort the use indices by the unique ID indices in descending order.
1123  std::sort(
1124  useListPairs.begin(), useListPairs.end(),
1125  [](auto elem1, auto elem2) { return elem1.second > elem2.second; });
1126 
1127  map.try_emplace(item.index(), llvm::map_range(useListPairs, [](auto elem) {
1128  return elem.first;
1129  }));
1130  }
1131 
1132  if (map.empty())
1133  return;
1134 
1136  // Emit the number of results that have a custom use-list order if the number
1137  // of results is greater than one.
1138  if (range.size() != 1) {
1139  emitter.emitVarInt(map.size(), "custom use-list size");
1140  }
1141 
1142  for (const auto &item : map) {
1143  auto resultIdx = item.getFirst();
1144  auto useListOrder = item.getSecond();
1145 
1146  // Compute the number of uses that are actually shuffled. If those are less
1147  // than half of the total uses, encoding the index pair `(src, dst)` is more
1148  // space efficient.
1149  size_t shuffledElements =
1150  llvm::count_if(llvm::enumerate(useListOrder),
1151  [](auto item) { return item.index() != item.value(); });
1152  bool indexPairEncoding = shuffledElements < (useListOrder.size() / 2);
1153 
1154  // For single result, we don't need to store the result index.
1155  if (range.size() != 1)
1156  emitter.emitVarInt(resultIdx, "use-list result index");
1157 
1158  if (indexPairEncoding) {
1159  emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding,
1160  "use-list index pair size");
1161  for (auto pair : llvm::enumerate(useListOrder)) {
1162  if (pair.index() != pair.value()) {
1163  emitter.emitVarInt(pair.value(), "use-list index pair first");
1164  emitter.emitVarInt(pair.index(), "use-list index pair second");
1165  }
1166  }
1167  } else {
1168  emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding,
1169  "use-list size");
1170  for (const auto &index : useListOrder)
1171  emitter.emitVarInt(index, "use-list order");
1172  }
1173  }
1174 }
1175 
1176 LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter,
1177  Region *region) {
1178  // If the region is empty, we only need to emit the number of blocks (which is
1179  // zero).
1180  if (region->empty()) {
1181  emitter.emitVarInt(/*numBlocks*/ 0, "region block count empty");
1182  return success();
1183  }
1184 
1185  // Emit the number of blocks and values within the region.
1186  unsigned numBlocks, numValues;
1187  std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
1188  emitter.emitVarInt(numBlocks, "region block count");
1189  emitter.emitVarInt(numValues, "region value count");
1190 
1191  // Emit the blocks within the region.
1192  for (Block &block : *region)
1193  if (failed(writeBlock(emitter, &block)))
1194  return failure();
1195  return success();
1196 }
1197 
1198 LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter,
1199  Operation *op) {
1200  EncodingEmitter irEmitter;
1201 
1202  // Write the IR section the same way as a block with no arguments. Note that
1203  // the low-bit of the operation count for a block is used to indicate if the
1204  // block has arguments, which in this case is always false.
1205  irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false, "ir section");
1206 
1207  // Emit the operations.
1208  if (failed(writeOp(irEmitter, op)))
1209  return failure();
1210 
1211  emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter));
1212  return success();
1213 }
1214 
1215 //===----------------------------------------------------------------------===//
1216 // Resources
1217 //===----------------------------------------------------------------------===//
1218 
1219 namespace {
1220 /// This class represents a resource builder implementation for the MLIR
1221 /// bytecode format.
1222 class ResourceBuilder : public AsmResourceBuilder {
1223 public:
1224  using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
1225 
1226  ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
1227  PostProcessFn postProcessFn, bool shouldElideData)
1228  : emitter(emitter), stringSection(stringSection),
1229  postProcessFn(postProcessFn), shouldElideData(shouldElideData) {}
1230  ~ResourceBuilder() override = default;
1231 
1232  void buildBlob(StringRef key, ArrayRef<char> data,
1233  uint32_t dataAlignment) final {
1234  if (!shouldElideData)
1235  emitter.emitOwnedBlobAndAlignment(data, dataAlignment, "resource blob");
1236  postProcessFn(key, AsmResourceEntryKind::Blob);
1237  }
1238  void buildBool(StringRef key, bool data) final {
1239  if (!shouldElideData)
1240  emitter.emitByte(data, "resource bool");
1241  postProcessFn(key, AsmResourceEntryKind::Bool);
1242  }
1243  void buildString(StringRef key, StringRef data) final {
1244  if (!shouldElideData)
1245  emitter.emitVarInt(stringSection.insert(data), "resource string");
1246  postProcessFn(key, AsmResourceEntryKind::String);
1247  }
1248 
1249 private:
1250  EncodingEmitter &emitter;
1251  StringSectionBuilder &stringSection;
1252  PostProcessFn postProcessFn;
1253  bool shouldElideData = false;
1254 };
1255 } // namespace
1256 
1257 void BytecodeWriter::writeResourceSection(Operation *op,
1258  EncodingEmitter &emitter) {
1259  EncodingEmitter resourceEmitter;
1260  EncodingEmitter resourceOffsetEmitter;
1261  uint64_t prevOffset = 0;
1263  curResourceEntries;
1264 
1265  // Functor used to process the offset for a resource of `kind` defined by
1266  // 'key'.
1267  auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
1268  uint64_t curOffset = resourceEmitter.size();
1269  curResourceEntries.emplace_back(key, kind, curOffset - prevOffset);
1270  prevOffset = curOffset;
1271  };
1272 
1273  // Functor used to emit a resource group defined by 'key'.
1274  auto emitResourceGroup = [&](uint64_t key) {
1275  resourceOffsetEmitter.emitVarInt(key, "resource group key");
1276  resourceOffsetEmitter.emitVarInt(curResourceEntries.size(),
1277  "resource group size");
1278  for (auto [key, kind, size] : curResourceEntries) {
1279  resourceOffsetEmitter.emitVarInt(stringSection.insert(key),
1280  "resource key");
1281  resourceOffsetEmitter.emitVarInt(size, "resource size");
1282  resourceOffsetEmitter.emitByte(kind, "resource kind");
1283  }
1284  };
1285 
1286  // Builder used to emit resources.
1287  ResourceBuilder entryBuilder(resourceEmitter, stringSection,
1288  appendResourceOffset,
1289  config.shouldElideResourceData);
1290 
1291  // Emit the external resource entries.
1292  resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size(),
1293  "external resource printer count");
1294  for (const auto &printer : config.externalResourcePrinters) {
1295  curResourceEntries.clear();
1296  printer->buildResources(op, entryBuilder);
1297  emitResourceGroup(stringSection.insert(printer->getName()));
1298  }
1299 
1300  // Emit the dialect resource entries.
1301  for (DialectNumbering &dialect : numberingState.getDialects()) {
1302  if (!dialect.asmInterface)
1303  continue;
1304  curResourceEntries.clear();
1305  dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder);
1306 
1307  // Emit the declaration resources for this dialect, these didn't get emitted
1308  // by the interface. These resources don't have data attached, so just use a
1309  // "blob" kind as a placeholder.
1310  for (const auto &resource : dialect.resourceMap)
1311  if (resource.second->isDeclaration)
1312  appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
1313 
1314  // Emit the resource group for this dialect.
1315  if (!curResourceEntries.empty())
1316  emitResourceGroup(dialect.number);
1317  }
1318 
1319  // If we didn't emit any resource groups, elide the resource sections.
1320  if (resourceOffsetEmitter.size() == 0)
1321  return;
1322 
1323  emitter.emitSection(bytecode::Section::kResourceOffset,
1324  std::move(resourceOffsetEmitter));
1325  emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter));
1326 }
1327 
1328 //===----------------------------------------------------------------------===//
1329 // Strings
1330 //===----------------------------------------------------------------------===//
1331 
1332 void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
1333  EncodingEmitter stringEmitter;
1334  stringSection.write(stringEmitter);
1335  emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter));
1336 }
1337 
1338 //===----------------------------------------------------------------------===//
1339 // Properties
1340 //===----------------------------------------------------------------------===//
1341 
1342 void BytecodeWriter::writePropertiesSection(EncodingEmitter &emitter) {
1343  EncodingEmitter propertiesEmitter;
1344  propertiesSection.write(propertiesEmitter);
1345  emitter.emitSection(bytecode::Section::kProperties,
1346  std::move(propertiesEmitter));
1347 }
1348 
1349 //===----------------------------------------------------------------------===//
1350 // Entry Points
1351 //===----------------------------------------------------------------------===//
1352 
1353 LogicalResult mlir::writeBytecodeToFile(Operation *op, raw_ostream &os,
1354  const BytecodeWriterConfig &config) {
1355  BytecodeWriter writer(op, config);
1356  return writer.write(op, os);
1357 }
static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, EntryCallbackT &&callback)
Write the given entries in contiguous groups with the same parent dialect.
union mlir::linalg::@1243::ArityGroupAndKind::Kind kind
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static LogicalResult emit(SolverOp solver, const SMTEmissionOptions &options, mlir::raw_indented_ostream &stream)
Emit the SMT operations in the given 'solver' to the 'stream'.
This class represents an opaque handle to a dialect resource entry.
This class is used to build resource entries for use by the printer.
Definition: AsmState.h:247
A class to interact with the attributes and types printer when emitting MLIR bytecode.
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:309
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgListType getArguments()
Definition: Block.h:87
This class contains the configuration used for the bytecode writer.
void attachTypeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Type >> callback)
llvm::StringMap< std::unique_ptr< DialectVersion > > & getDialectVersionMap() const
A map containing the dialect versions to emit.
void setElideResourceDataFlag(bool shouldElideResourceData=true)
Set a boolean flag to skip emission of resources into the bytecode file.
BytecodeWriterConfig(StringRef producer="MLIR" LLVM_VERSION_STRING)
producer is an optional string that can be used to identify the producer of the bytecode when reading...
void attachFallbackResourcePrinter(FallbackAsmResourceMap &map)
Attach resource printers to the AsmState for the fallback resources in the given map.
int64_t getDesiredBytecodeVersion() const
Get the set desired bytecode version to emit.
void setDialectVersion(std::unique_ptr< DialectVersion > dialectVersion) const
Set a given dialect version to emit on the map.
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > getTypeWriterCallbacks() const
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > getAttributeWriterCallbacks() const
Retrieve the callbacks.
void setDesiredBytecodeVersion(int64_t bytecodeVersion)
Set the desired bytecode version to emit.
void attachResourcePrinter(std::unique_ptr< AsmResourcePrinter > printer)
Attach the given resource printer to the writer configuration.
void attachAttributeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Attribute >> callback)
Attach a custom bytecode printer callback to the configuration for the emission of custom type/attrib...
This class defines a virtual interface for writing to a bytecode stream, providing hooks into the byt...
A fallback map containing external resources not explicitly handled by another parser/printer.
Definition: AsmState.h:421
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
DictionaryAttr getAttrDictionary()
Return all of the attributes on this operation as a DictionaryAttr.
Definition: Operation.cpp:295
unsigned getNumSuccessors()
Definition: Operation.h:706
bool isRegistered()
Returns true if this operation has a registered operation description, otherwise false.
Definition: Operation.h:129
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:674
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:267
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:677
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
DictionaryAttr getDiscardableAttrDictionary()
Return all of the discardable attributes on this operation as a DictionaryAttr.
Definition: Operation.h:501
result_type_range getResultTypes()
Definition: Operation.h:428
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
SuccessorRange getSuccessors()
Definition: Operation.h:703
result_range getResults()
Definition: Operation.h:415
int getPropertiesStorageSize() const
Returns the properties storage size.
Definition: Operation.h:896
OpaqueProperties getPropertiesStorage()
Returns the properties storage.
Definition: Operation.h:900
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
bool empty()
Definition: Region.h:60
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
This class manages numbering IR entities in preparation of bytecode emission.
Definition: IRNumbering.h:151
@ kAttrType
This section contains the attributes and types referenced within an IR module.
Definition: Encoding.h:73
@ kAttrTypeOffset
This section contains the offsets for the attribute and types within the AttrType section.
Definition: Encoding.h:77
@ kIR
This section contains the list of operations serialized into the bytecode, and their nested regions/o...
Definition: Encoding.h:81
@ kResource
This section contains the resources of the bytecode.
Definition: Encoding.h:84
@ kResourceOffset
This section contains the offsets of resources within the Resource section.
Definition: Encoding.h:88
@ kDialect
This section contains the dialects referenced within an IR module.
Definition: Encoding.h:69
@ kString
This section contains strings referenced within the bytecode.
Definition: Encoding.h:66
@ kDialectVersions
This section contains the versions of each dialect.
Definition: Encoding.h:91
@ kProperties
This section contains the properties for the operations.
Definition: Encoding.h:94
static uint64_t getUseID(OperandT &val, unsigned ownerID)
Get the unique ID of a value use.
Definition: Encoding.h:127
@ kUseListOrdering
Use-list ordering started to be encoded in version 3.
Definition: Encoding.h:38
@ kAlignmentByte
An arbitrary value used to fill alignment padding.
Definition: Encoding.h:56
@ kVersion
The current bytecode version.
Definition: Encoding.h:53
@ kLazyLoading
Support for lazy-loading of isolated region was added in version 2.
Definition: Encoding.h:35
@ kDialectVersioning
Dialects versioning was added in version 1.
Definition: Encoding.h:32
@ kElideUnknownBlockArgLocation
Avoid recording unknown locations on block arguments (compression) started in version 4.
Definition: Encoding.h:42
@ kNativePropertiesEncoding
Support for encoding properties natively in bytecode instead of merged with the discardable attribute...
Definition: Encoding.h:46
@ kMinSupportedVersion
The minimum supported version of the bytecode.
Definition: Encoding.h:29
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:491
Include the generated interface declarations.
const FrozenRewritePatternSet GreedyRewriteConfig config
AsmResourceEntryKind
This enum represents the different kinds of resource values.
Definition: AsmState.h:280
@ String
A string value.
@ Bool
A boolean value.
@ Blob
A blob of data with an accompanying alignment.
LogicalResult writeBytecodeToFile(Operation *op, raw_ostream &os, const BytecodeWriterConfig &config={})
Write the bytecode for the given operation to the provided output stream.
StringRef producer
The producer of the bytecode.
llvm::StringMap< std::unique_ptr< DialectVersion > > dialectVersionMap
A map containing dialect version information for each dialect to emit.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > attributeWriterCallbacks
Printer callbacks used to emit custom type and attribute encodings.
SmallVector< std::unique_ptr< AsmResourcePrinter > > externalResourcePrinters
A collection of non-dialect resource printers.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > typeWriterCallbacks
This class represents a numbering entry for an Dialect.
Definition: IRNumbering.h:106
unsigned number
The number assigned to the dialect.
Definition: IRNumbering.h:114
This class represents the numbering entry of an operation name.
Definition: IRNumbering.h:65