MLIR  21.0.0git
BytecodeWriter.cpp
Go to the documentation of this file.
1 //===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "IRNumbering.h"
13 #include "mlir/Bytecode/Encoding.h"
14 #include "mlir/IR/Attributes.h"
15 #include "mlir/IR/Diagnostics.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/CachedHashString.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include <optional>
25 
26 #define DEBUG_TYPE "mlir-bytecode-writer"
27 
28 using namespace mlir;
29 using namespace mlir::bytecode::detail;
30 
31 //===----------------------------------------------------------------------===//
32 // BytecodeWriterConfig
33 //===----------------------------------------------------------------------===//
34 
36  Impl(StringRef producer) : producer(producer) {}
37 
38  /// Version to use when writing.
39  /// Note: This only differs from kVersion if a specific version is set.
40  int64_t bytecodeVersion = bytecode::kVersion;
41 
42  /// A flag specifying whether to elide emission of resources into the bytecode
43  /// file.
44  bool shouldElideResourceData = false;
45 
46  /// A map containing dialect version information for each dialect to emit.
47  llvm::StringMap<std::unique_ptr<DialectVersion>> dialectVersionMap;
48 
49  /// The producer of the bytecode.
50  StringRef producer;
51 
52  /// Printer callbacks used to emit custom type and attribute encodings.
57 
58  /// A collection of non-dialect resource printers.
60 };
61 
63  : impl(std::make_unique<Impl>(producer)) {}
65  StringRef producer)
66  : BytecodeWriterConfig(producer) {
68 }
70  : impl(std::move(config.impl)) {}
71 
73 
76  return impl->attributeWriterCallbacks;
77 }
78 
81  return impl->typeWriterCallbacks;
82 }
83 
85  std::unique_ptr<AttrTypeBytecodeWriter<Attribute>> callback) {
86  impl->attributeWriterCallbacks.emplace_back(std::move(callback));
87 }
88 
90  std::unique_ptr<AttrTypeBytecodeWriter<Type>> callback) {
91  impl->typeWriterCallbacks.emplace_back(std::move(callback));
92 }
93 
95  std::unique_ptr<AsmResourcePrinter> printer) {
96  impl->externalResourcePrinters.emplace_back(std::move(printer));
97 }
98 
100  bool shouldElideResourceData) {
101  impl->shouldElideResourceData = shouldElideResourceData;
102 }
103 
104 void BytecodeWriterConfig::setDesiredBytecodeVersion(int64_t bytecodeVersion) {
105  impl->bytecodeVersion = bytecodeVersion;
106 }
107 
109  return impl->bytecodeVersion;
110 }
111 
112 llvm::StringMap<std::unique_ptr<DialectVersion>> &
114  return impl->dialectVersionMap;
115 }
116 
118  llvm::StringRef dialectName,
119  std::unique_ptr<DialectVersion> dialectVersion) const {
120  assert(!impl->dialectVersionMap.contains(dialectName) &&
121  "cannot override a previously set dialect version");
122  impl->dialectVersionMap.insert({dialectName, std::move(dialectVersion)});
123 }
124 
125 //===----------------------------------------------------------------------===//
126 // EncodingEmitter
127 //===----------------------------------------------------------------------===//
128 
129 namespace {
130 /// This class functions as the underlying encoding emitter for the bytecode
131 /// writer. This class is a bit different compared to other types of encoders;
132 /// it does not use a single buffer, but instead may contain several buffers
133 /// (some owned by the writer, and some not) that get concatted during the final
134 /// emission.
135 class EncodingEmitter {
136 public:
137  EncodingEmitter() = default;
138  EncodingEmitter(const EncodingEmitter &) = delete;
139  EncodingEmitter &operator=(const EncodingEmitter &) = delete;
140 
141  /// Write the current contents to the provided stream.
142  void writeTo(raw_ostream &os) const;
143 
144  /// Return the current size of the encoded buffer.
145  size_t size() const { return prevResultSize + currentResult.size(); }
146 
147  //===--------------------------------------------------------------------===//
148  // Emission
149  //===--------------------------------------------------------------------===//
150 
151  /// Backpatch a byte in the result buffer at the given offset.
152  void patchByte(uint64_t offset, uint8_t value, StringLiteral desc) {
153  LLVM_DEBUG(llvm::dbgs() << "patchByte(" << offset << ',' << uint64_t(value)
154  << ")\t" << desc << '\n');
155  assert(offset < size() && offset >= prevResultSize &&
156  "cannot patch previously emitted data");
157  currentResult[offset - prevResultSize] = value;
158  }
159 
160  /// Emit the provided blob of data, which is owned by the caller and is
161  /// guaranteed to not die before the end of the bytecode process.
162  void emitOwnedBlob(ArrayRef<uint8_t> data, StringLiteral desc) {
163  LLVM_DEBUG(llvm::dbgs()
164  << "emitOwnedBlob(" << data.size() << "b)\t" << desc << '\n');
165  // Push the current buffer before adding the provided data.
166  appendResult(std::move(currentResult));
167  appendOwnedResult(data);
168  }
169 
170  /// Emit the provided blob of data that has the given alignment, which is
171  /// owned by the caller and is guaranteed to not die before the end of the
172  /// bytecode process. The alignment value is also encoded, making it available
173  /// on load.
174  void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment,
175  StringLiteral desc) {
176  emitVarInt(alignment, desc);
177  emitVarInt(data.size(), desc);
178 
179  alignTo(alignment);
180  emitOwnedBlob(data, desc);
181  }
182  void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment,
183  StringLiteral desc) {
184  ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
185  data.size());
186  emitOwnedBlobAndAlignment(castedData, alignment, desc);
187  }
188 
189  /// Align the emitter to the given alignment.
190  void alignTo(unsigned alignment) {
191  if (alignment < 2)
192  return;
193  assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
194 
195  // Check to see if we need to emit any padding bytes to meet the desired
196  // alignment.
197  size_t curOffset = size();
198  size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
199  while (paddingSize--)
200  emitByte(bytecode::kAlignmentByte, "alignment byte");
201 
202  // Keep track of the maximum required alignment.
203  requiredAlignment = std::max(requiredAlignment, alignment);
204  }
205 
206  //===--------------------------------------------------------------------===//
207  // Integer Emission
208 
209  /// Emit a single byte.
210  template <typename T>
211  void emitByte(T byte, StringLiteral desc) {
212  LLVM_DEBUG(llvm::dbgs()
213  << "emitByte(" << uint64_t(byte) << ")\t" << desc << '\n');
214  currentResult.push_back(static_cast<uint8_t>(byte));
215  }
216 
217  /// Emit a range of bytes.
218  void emitBytes(ArrayRef<uint8_t> bytes, StringLiteral desc) {
219  LLVM_DEBUG(llvm::dbgs()
220  << "emitBytes(" << bytes.size() << "b)\t" << desc << '\n');
221  llvm::append_range(currentResult, bytes);
222  }
223 
224  /// Emit a variable length integer. The first encoded byte contains a prefix
225  /// in the low bits indicating the encoded length of the value. This length
226  /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
227  /// indicate the number of _additional_ bytes (not including the prefix byte).
228  /// All remaining bits in the first byte, along with all of the bits in
229  /// additional bytes, provide the value of the integer encoded in
230  /// little-endian order.
231  void emitVarInt(uint64_t value, StringLiteral desc) {
232  LLVM_DEBUG(llvm::dbgs() << "emitVarInt(" << value << ")\t" << desc << '\n');
233 
234  // In the most common case, the value can be represented in a single byte.
235  // Given how hot this case is, explicitly handle that here.
236  if ((value >> 7) == 0)
237  return emitByte((value << 1) | 0x1, desc);
238  emitMultiByteVarInt(value, desc);
239  }
240 
241  /// Emit a signed variable length integer. Signed varints are encoded using
242  /// a varint with zigzag encoding, meaning that we use the low bit of the
243  /// value to indicate the sign of the value. This allows for more efficient
244  /// encoding of negative values by limiting the number of active bits
245  void emitSignedVarInt(uint64_t value, StringLiteral desc) {
246  emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63), desc);
247  }
248 
249  /// Emit a variable length integer whose low bit is used to encode the
250  /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
251  void emitVarIntWithFlag(uint64_t value, bool flag, StringLiteral desc) {
252  emitVarInt((value << 1) | (flag ? 1 : 0), desc);
253  }
254 
255  //===--------------------------------------------------------------------===//
256  // String Emission
257 
258  /// Emit the given string as a nul terminated string.
259  void emitNulTerminatedString(StringRef str, StringLiteral desc) {
260  emitString(str, desc);
261  emitByte(0, "null terminator");
262  }
263 
264  /// Emit the given string without a nul terminator.
265  void emitString(StringRef str, StringLiteral desc) {
266  emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()},
267  desc);
268  }
269 
270  //===--------------------------------------------------------------------===//
271  // Section Emission
272 
273  /// Emit a nested section of the given code, whose contents are encoded in the
274  /// provided emitter.
275  void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
276  // Emit the section code and length. The high bit of the code is used to
277  // indicate whether the section alignment is present, so save an offset to
278  // it.
279  uint64_t codeOffset = currentResult.size();
280  emitByte(code, "section code");
281  emitVarInt(emitter.size(), "section size");
282 
283  // Integrate the alignment of the section into this emitter if necessary.
284  unsigned emitterAlign = emitter.requiredAlignment;
285  if (emitterAlign > 1) {
286  if (size() & (emitterAlign - 1)) {
287  emitVarInt(emitterAlign, "section alignment");
288  alignTo(emitterAlign);
289 
290  // Indicate that we needed to align the section, the high bit of the
291  // code field is used for this.
292  currentResult[codeOffset] |= 0b10000000;
293  } else {
294  // Otherwise, if we happen to be at a compatible offset, we just
295  // remember that we need this alignment.
296  requiredAlignment = std::max(requiredAlignment, emitterAlign);
297  }
298  }
299 
300  // Push our current buffer and then merge the provided section body into
301  // ours.
302  appendResult(std::move(currentResult));
303  for (std::vector<uint8_t> &result : emitter.prevResultStorage)
304  prevResultStorage.push_back(std::move(result));
305  llvm::append_range(prevResultList, emitter.prevResultList);
306  prevResultSize += emitter.prevResultSize;
307  appendResult(std::move(emitter.currentResult));
308  }
309 
310 private:
311  /// Emit the given value using a variable width encoding. This method is a
312  /// fallback when the number of bytes needed to encode the value is greater
313  /// than 1. We mark it noinline here so that the single byte hot path isn't
314  /// pessimized.
315  LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value,
316  StringLiteral desc);
317 
318  /// Append a new result buffer to the current contents.
319  void appendResult(std::vector<uint8_t> &&result) {
320  if (result.empty())
321  return;
322  prevResultStorage.emplace_back(std::move(result));
323  appendOwnedResult(prevResultStorage.back());
324  }
325  void appendOwnedResult(ArrayRef<uint8_t> result) {
326  if (result.empty())
327  return;
328  prevResultSize += result.size();
329  prevResultList.emplace_back(result);
330  }
331 
332  /// The result of the emitter currently being built. We refrain from building
333  /// a single buffer to simplify emitting sections, large data, and more. The
334  /// result is thus represented using multiple distinct buffers, some of which
335  /// we own (via prevResultStorage), and some of which are just pointers into
336  /// externally owned buffers.
337  std::vector<uint8_t> currentResult;
338  std::vector<ArrayRef<uint8_t>> prevResultList;
339  std::vector<std::vector<uint8_t>> prevResultStorage;
340 
341  /// An up-to-date total size of all of the buffers within `prevResultList`.
342  /// This enables O(1) size checks of the current encoding.
343  size_t prevResultSize = 0;
344 
345  /// The highest required alignment for the start of this section.
346  unsigned requiredAlignment = 1;
347 };
348 
349 //===----------------------------------------------------------------------===//
350 // StringSectionBuilder
351 //===----------------------------------------------------------------------===//
352 
353 namespace {
354 /// This class is used to simplify the process of emitting the string section.
355 class StringSectionBuilder {
356 public:
357  /// Add the given string to the string section, and return the index of the
358  /// string within the section.
359  size_t insert(StringRef str) {
360  auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()});
361  return it.first->second;
362  }
363 
364  /// Write the current set of strings to the given emitter.
365  void write(EncodingEmitter &emitter) {
366  emitter.emitVarInt(strings.size(), "string section size");
367 
368  // Emit the sizes in reverse order, so that we don't need to backpatch an
369  // offset to the string data or have a separate section.
370  for (const auto &it : llvm::reverse(strings))
371  emitter.emitVarInt(it.first.size() + 1, "string size");
372  // Emit the string data itself.
373  for (const auto &it : strings)
374  emitter.emitNulTerminatedString(it.first.val(), "string");
375  }
376 
377 private:
378  /// A set of strings referenced within the bytecode. The value of the map is
379  /// unused.
380  llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
381 };
382 } // namespace
383 
384 class DialectWriter : public DialectBytecodeWriter {
385  using DialectVersionMapT = llvm::StringMap<std::unique_ptr<DialectVersion>>;
386 
387 public:
388  DialectWriter(int64_t bytecodeVersion, EncodingEmitter &emitter,
389  IRNumberingState &numberingState,
390  StringSectionBuilder &stringSection,
391  const DialectVersionMapT &dialectVersionMap)
392  : bytecodeVersion(bytecodeVersion), emitter(emitter),
393  numberingState(numberingState), stringSection(stringSection),
394  dialectVersionMap(dialectVersionMap) {}
395 
396  //===--------------------------------------------------------------------===//
397  // IR
398  //===--------------------------------------------------------------------===//
399 
400  void writeAttribute(Attribute attr) override {
401  emitter.emitVarInt(numberingState.getNumber(attr), "dialect attr");
402  }
403  void writeOptionalAttribute(Attribute attr) override {
404  if (!attr) {
405  emitter.emitVarInt(0, "dialect optional attr none");
406  return;
407  }
408  emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true,
409  "dialect optional attr");
410  }
411 
412  void writeType(Type type) override {
413  emitter.emitVarInt(numberingState.getNumber(type), "dialect type");
414  }
415 
416  void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
417  emitter.emitVarInt(numberingState.getNumber(resource), "dialect resource");
418  }
419 
420  //===--------------------------------------------------------------------===//
421  // Primitives
422  //===--------------------------------------------------------------------===//
423 
424  void writeVarInt(uint64_t value) override {
425  emitter.emitVarInt(value, "dialect writer");
426  }
427 
428  void writeSignedVarInt(int64_t value) override {
429  emitter.emitSignedVarInt(value, "dialect writer");
430  }
431 
432  void writeAPIntWithKnownWidth(const APInt &value) override {
433  size_t bitWidth = value.getBitWidth();
434 
435  // If the value is a single byte, just emit it directly without going
436  // through a varint.
437  if (bitWidth <= 8)
438  return emitter.emitByte(value.getLimitedValue(), "dialect APInt");
439 
440  // If the value fits within a single varint, emit it directly.
441  if (bitWidth <= 64)
442  return emitter.emitSignedVarInt(value.getLimitedValue(), "dialect APInt");
443 
444  // Otherwise, we need to encode a variable number of active words. We use
445  // active words instead of the number of total words under the observation
446  // that smaller values will be more common.
447  unsigned numActiveWords = value.getActiveWords();
448  emitter.emitVarInt(numActiveWords, "dialect APInt word count");
449 
450  const uint64_t *rawValueData = value.getRawData();
451  for (unsigned i = 0; i < numActiveWords; ++i)
452  emitter.emitSignedVarInt(rawValueData[i], "dialect APInt word");
453  }
454 
455  void writeAPFloatWithKnownSemantics(const APFloat &value) override {
456  writeAPIntWithKnownWidth(value.bitcastToAPInt());
457  }
458 
459  void writeOwnedString(StringRef str) override {
460  emitter.emitVarInt(stringSection.insert(str), "dialect string");
461  }
462 
463  void writeOwnedBlob(ArrayRef<char> blob) override {
464  emitter.emitVarInt(blob.size(), "dialect blob");
465  emitter.emitOwnedBlob(
466  ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()),
467  blob.size()),
468  "dialect blob");
469  }
470 
471  void writeOwnedBool(bool value) override {
472  emitter.emitByte(value, "dialect bool");
473  }
474 
475  int64_t getBytecodeVersion() const override { return bytecodeVersion; }
476 
477  FailureOr<const DialectVersion *>
478  getDialectVersion(StringRef dialectName) const override {
479  auto dialectEntry = dialectVersionMap.find(dialectName);
480  if (dialectEntry == dialectVersionMap.end())
481  return failure();
482  return dialectEntry->getValue().get();
483  }
484 
485 private:
486  int64_t bytecodeVersion;
487  EncodingEmitter &emitter;
488  IRNumberingState &numberingState;
489  StringSectionBuilder &stringSection;
490  const DialectVersionMapT &dialectVersionMap;
491 };
492 
493 namespace {
494 class PropertiesSectionBuilder {
495 public:
496  PropertiesSectionBuilder(IRNumberingState &numberingState,
497  StringSectionBuilder &stringSection,
499  : numberingState(numberingState), stringSection(stringSection),
500  config(config) {}
501 
502  /// Emit the op properties in the properties section and return the index of
503  /// the properties within the section. Return -1 if no properties was emitted.
504  std::optional<ssize_t> emit(Operation *op) {
505  EncodingEmitter propertiesEmitter;
506  if (!op->getPropertiesStorageSize())
507  return std::nullopt;
508  if (!op->isRegistered()) {
509  // Unregistered op are storing properties as an optional attribute.
510  Attribute prop = *op->getPropertiesStorage().as<Attribute *>();
511  if (!prop)
512  return std::nullopt;
513  EncodingEmitter sizeEmitter;
514  sizeEmitter.emitVarInt(numberingState.getNumber(prop), "properties size");
515  scratch.clear();
516  llvm::raw_svector_ostream os(scratch);
517  sizeEmitter.writeTo(os);
518  return emit(scratch);
519  }
520 
521  EncodingEmitter emitter;
522  DialectWriter propertiesWriter(config.bytecodeVersion, emitter,
523  numberingState, stringSection,
524  config.dialectVersionMap);
525  auto iface = cast<BytecodeOpInterface>(op);
526  iface.writeProperties(propertiesWriter);
527  scratch.clear();
528  llvm::raw_svector_ostream os(scratch);
529  emitter.writeTo(os);
530  return emit(scratch);
531  }
532 
533  /// Write the current set of properties to the given emitter.
534  void write(EncodingEmitter &emitter) {
535  emitter.emitVarInt(propertiesStorage.size(), "properties size");
536  if (propertiesStorage.empty())
537  return;
538  for (const auto &storage : propertiesStorage) {
539  if (storage.empty()) {
540  emitter.emitBytes(ArrayRef<uint8_t>(), "empty properties");
541  continue;
542  }
543  emitter.emitBytes(ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]),
544  storage.size()),
545  "property");
546  }
547  }
548 
549  /// Returns true if the section is empty.
550  bool empty() { return propertiesStorage.empty(); }
551 
552 private:
553  /// Emit raw data and returns the offset in the internal buffer.
554  /// Data are deduplicated and will be copied in the internal buffer only if
555  /// they don't exist there already.
556  ssize_t emit(ArrayRef<char> rawProperties) {
557  // Populate a scratch buffer with the properties size.
558  SmallVector<char> sizeScratch;
559  {
560  EncodingEmitter sizeEmitter;
561  sizeEmitter.emitVarInt(rawProperties.size(), "properties");
562  llvm::raw_svector_ostream os(sizeScratch);
563  sizeEmitter.writeTo(os);
564  }
565  // Append a new storage to the table now.
566  size_t index = propertiesStorage.size();
567  propertiesStorage.emplace_back();
568  std::vector<char> &newStorage = propertiesStorage.back();
569  size_t propertiesSize = sizeScratch.size() + rawProperties.size();
570  newStorage.reserve(propertiesSize);
571  llvm::append_range(newStorage, sizeScratch);
572  llvm::append_range(newStorage, rawProperties);
573 
574  // Try to de-duplicate the new serialized properties.
575  // If the properties is a duplicate, pop it back from the storage.
576  auto inserted = propertiesUniquing.insert(
577  std::make_pair(ArrayRef<char>(newStorage), index));
578  if (!inserted.second)
579  propertiesStorage.pop_back();
580  return inserted.first->getSecond();
581  }
582 
583  /// Storage for properties.
584  std::vector<std::vector<char>> propertiesStorage;
585  SmallVector<char> scratch;
586  DenseMap<ArrayRef<char>, int64_t> propertiesUniquing;
587  IRNumberingState &numberingState;
588  StringSectionBuilder &stringSection;
590 };
591 } // namespace
592 
593 /// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
594 /// to go through an intermediate buffer when interacting with code that wants a
595 /// raw_ostream.
596 class RawEmitterOstream : public raw_ostream {
597 public:
598  explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
599  SetUnbuffered();
600  }
601 
602 private:
603  void write_impl(const char *ptr, size_t size) override {
604  emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size},
605  "raw emitter");
606  }
607  uint64_t current_pos() const override { return emitter.size(); }
608 
609  /// The section being emitted to.
610  EncodingEmitter &emitter;
611 };
612 } // namespace
613 
614 void EncodingEmitter::writeTo(raw_ostream &os) const {
615  // Reserve space in the ostream for the encoded contents.
616  os.reserveExtraSpace(size());
617 
618  for (auto &prevResult : prevResultList)
619  os.write((const char *)prevResult.data(), prevResult.size());
620  os.write((const char *)currentResult.data(), currentResult.size());
621 }
622 
623 void EncodingEmitter::emitMultiByteVarInt(uint64_t value, StringLiteral desc) {
624  // Compute the number of bytes needed to encode the value. Each byte can hold
625  // up to 7-bits of data. We only check up to the number of bits we can encode
626  // in the first byte (8).
627  uint64_t it = value >> 7;
628  for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
629  if (LLVM_LIKELY(it >>= 7) == 0) {
630  uint64_t encodedValue = (value << 1) | 0x1;
631  encodedValue <<= (numBytes - 1);
632  llvm::support::ulittle64_t encodedValueLE(encodedValue);
633  emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes}, desc);
634  return;
635  }
636  }
637 
638  // If the value is too large to encode in a single byte, emit a special all
639  // zero marker byte and splat the value directly.
640  emitByte(0, desc);
641  llvm::support::ulittle64_t valueLE(value);
642  emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)}, desc);
643 }
644 
645 //===----------------------------------------------------------------------===//
646 // Bytecode Writer
647 //===----------------------------------------------------------------------===//
648 
649 namespace {
650 class BytecodeWriter {
651 public:
652  BytecodeWriter(Operation *op, const BytecodeWriterConfig &config)
653  : numberingState(op, config), config(config.getImpl()),
654  propertiesSection(numberingState, stringSection, config.getImpl()) {}
655 
656  /// Write the bytecode for the given root operation.
657  LogicalResult write(Operation *rootOp, raw_ostream &os);
658 
659 private:
660  //===--------------------------------------------------------------------===//
661  // Dialects
662 
663  void writeDialectSection(EncodingEmitter &emitter);
664 
665  //===--------------------------------------------------------------------===//
666  // Attributes and Types
667 
668  void writeAttrTypeSection(EncodingEmitter &emitter);
669 
670  //===--------------------------------------------------------------------===//
671  // Operations
672 
673  LogicalResult writeBlock(EncodingEmitter &emitter, Block *block);
674  LogicalResult writeOp(EncodingEmitter &emitter, Operation *op);
675  LogicalResult writeRegion(EncodingEmitter &emitter, Region *region);
676  LogicalResult writeIRSection(EncodingEmitter &emitter, Operation *op);
677 
678  LogicalResult writeRegions(EncodingEmitter &emitter,
679  MutableArrayRef<Region> regions) {
680  return success(llvm::all_of(regions, [&](Region &region) {
681  return succeeded(writeRegion(emitter, &region));
682  }));
683  }
684 
685  //===--------------------------------------------------------------------===//
686  // Resources
687 
688  void writeResourceSection(Operation *op, EncodingEmitter &emitter);
689 
690  //===--------------------------------------------------------------------===//
691  // Strings
692 
693  void writeStringSection(EncodingEmitter &emitter);
694 
695  //===--------------------------------------------------------------------===//
696  // Properties
697 
698  void writePropertiesSection(EncodingEmitter &emitter);
699 
700  //===--------------------------------------------------------------------===//
701  // Helpers
702 
703  void writeUseListOrders(EncodingEmitter &emitter, uint8_t &opEncodingMask,
704  ValueRange range);
705 
706  //===--------------------------------------------------------------------===//
707  // Fields
708 
709  /// The builder used for the string section.
710  StringSectionBuilder stringSection;
711 
712  /// The IR numbering state generated for the root operation.
713  IRNumberingState numberingState;
714 
715  /// Configuration dictating bytecode emission.
717 
718  /// Storage for the properties section
719  PropertiesSectionBuilder propertiesSection;
720 };
721 } // namespace
722 
723 LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
724  EncodingEmitter emitter;
725 
726  // Emit the bytecode file header. This is how we identify the output as a
727  // bytecode file.
728  emitter.emitString("ML\xefR", "bytecode header");
729 
730  // Emit the bytecode version.
731  if (config.bytecodeVersion < bytecode::kMinSupportedVersion ||
732  config.bytecodeVersion > bytecode::kVersion)
733  return rootOp->emitError()
734  << "unsupported version requested " << config.bytecodeVersion
735  << ", must be in range ["
736  << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", "
737  << static_cast<int64_t>(bytecode::kVersion) << ']';
738  emitter.emitVarInt(config.bytecodeVersion, "bytecode version");
739 
740  // Emit the producer.
741  emitter.emitNulTerminatedString(config.producer, "bytecode producer");
742 
743  // Emit the dialect section.
744  writeDialectSection(emitter);
745 
746  // Emit the attributes and types section.
747  writeAttrTypeSection(emitter);
748 
749  // Emit the IR section.
750  if (failed(writeIRSection(emitter, rootOp)))
751  return failure();
752 
753  // Emit the resources section.
754  writeResourceSection(rootOp, emitter);
755 
756  // Emit the string section.
757  writeStringSection(emitter);
758 
759  // Emit the properties section.
760  if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding)
761  writePropertiesSection(emitter);
762  else if (!propertiesSection.empty())
763  return rootOp->emitError(
764  "unexpected properties emitted incompatible with bytecode <5");
765 
766  // Write the generated bytecode to the provided output stream.
767  emitter.writeTo(os);
768 
769  return success();
770 }
771 
772 //===----------------------------------------------------------------------===//
773 // Dialects
774 //===----------------------------------------------------------------------===//
775 
776 /// Write the given entries in contiguous groups with the same parent dialect.
777 /// Each dialect sub-group is encoded with the parent dialect and number of
778 /// elements, followed by the encoding for the entries. The given callback is
779 /// invoked to encode each individual entry.
780 template <typename EntriesT, typename EntryCallbackT>
781 static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
782  EntryCallbackT &&callback) {
783  for (auto it = entries.begin(), e = entries.end(); it != e;) {
784  auto groupStart = it++;
785 
786  // Find the end of the group that shares the same parent dialect.
787  DialectNumbering *currentDialect = groupStart->dialect;
788  it = std::find_if(it, e, [&](const auto &entry) {
789  return entry.dialect != currentDialect;
790  });
791 
792  // Emit the dialect and number of elements.
793  emitter.emitVarInt(currentDialect->number, "dialect number");
794  emitter.emitVarInt(std::distance(groupStart, it), "dialect offset");
795 
796  // Emit the entries within the group.
797  for (auto &entry : llvm::make_range(groupStart, it))
798  callback(entry);
799  }
800 }
801 
802 void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
803  EncodingEmitter dialectEmitter;
804 
805  // Emit the referenced dialects.
806  auto dialects = numberingState.getDialects();
807  dialectEmitter.emitVarInt(llvm::size(dialects), "dialects count");
808  for (DialectNumbering &dialect : dialects) {
809  // Write the string section and get the ID.
810  size_t nameID = stringSection.insert(dialect.name);
811 
812  if (config.bytecodeVersion < bytecode::kDialectVersioning) {
813  dialectEmitter.emitVarInt(nameID, "dialect name ID");
814  continue;
815  }
816 
817  // Try writing the version to the versionEmitter.
818  EncodingEmitter versionEmitter;
819  if (dialect.interface) {
820  // The writer used when emitting using a custom bytecode encoding.
821  DialectWriter versionWriter(config.bytecodeVersion, versionEmitter,
822  numberingState, stringSection,
823  config.dialectVersionMap);
824  dialect.interface->writeVersion(versionWriter);
825  }
826 
827  // If the version emitter is empty, version is not available. We can encode
828  // this in the dialect ID, so if there is no version, we don't write the
829  // section.
830  size_t versionAvailable = versionEmitter.size() > 0;
831  dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable,
832  "dialect version");
833  if (versionAvailable)
834  dialectEmitter.emitSection(bytecode::Section::kDialectVersions,
835  std::move(versionEmitter));
836  }
837 
838  if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation)
839  dialectEmitter.emitVarInt(size(numberingState.getOpNames()),
840  "op names count");
841 
842  // Emit the referenced operation names grouped by dialect.
843  auto emitOpName = [&](OpNameNumbering &name) {
844  size_t stringId = stringSection.insert(name.name.stripDialect());
845  if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding)
846  dialectEmitter.emitVarInt(stringId, "dialect op name");
847  else
848  dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered(),
849  "dialect op name");
850  };
851  writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
852 
853  emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter));
854 }
855 
856 //===----------------------------------------------------------------------===//
857 // Attributes and Types
858 //===----------------------------------------------------------------------===//
859 
860 void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
861  EncodingEmitter attrTypeEmitter;
862  EncodingEmitter offsetEmitter;
863  offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()),
864  "attributes count");
865  offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()),
866  "types count");
867 
868  // A functor used to emit an attribute or type entry.
869  uint64_t prevOffset = 0;
870  auto emitAttrOrType = [&](auto &entry) {
871  auto entryValue = entry.getValue();
872 
873  auto emitAttrOrTypeRawImpl = [&]() -> void {
874  RawEmitterOstream(attrTypeEmitter) << entryValue;
875  attrTypeEmitter.emitByte(0, "attr/type separator");
876  };
877  auto emitAttrOrTypeImpl = [&]() -> bool {
878  // TODO: We don't currently support custom encoded mutable types and
879  // attributes.
880  if (entryValue.template hasTrait<TypeTrait::IsMutable>() ||
881  entryValue.template hasTrait<AttributeTrait::IsMutable>()) {
882  emitAttrOrTypeRawImpl();
883  return false;
884  }
885 
886  DialectWriter dialectWriter(config.bytecodeVersion, attrTypeEmitter,
887  numberingState, stringSection,
888  config.dialectVersionMap);
889  if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
890  for (const auto &callback : config.typeWriterCallbacks) {
891  if (succeeded(callback->write(entryValue, dialectWriter)))
892  return true;
893  }
894  if (const BytecodeDialectInterface *interface =
895  entry.dialect->interface) {
896  if (succeeded(interface->writeType(entryValue, dialectWriter)))
897  return true;
898  }
899  } else {
900  for (const auto &callback : config.attributeWriterCallbacks) {
901  if (succeeded(callback->write(entryValue, dialectWriter)))
902  return true;
903  }
904  if (const BytecodeDialectInterface *interface =
905  entry.dialect->interface) {
906  if (succeeded(interface->writeAttribute(entryValue, dialectWriter)))
907  return true;
908  }
909  }
910 
911  // If the entry was not emitted using a callback or a dialect interface,
912  // emit it using the textual format.
913  emitAttrOrTypeRawImpl();
914  return false;
915  };
916 
917  bool hasCustomEncoding = emitAttrOrTypeImpl();
918 
919  // Record the offset of this entry.
920  uint64_t curOffset = attrTypeEmitter.size();
921  offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding,
922  "attr/type offset");
923  prevOffset = curOffset;
924  };
925 
926  // Emit the attribute and type entries for each dialect.
927  writeDialectGrouping(offsetEmitter, numberingState.getAttributes(),
928  emitAttrOrType);
929  writeDialectGrouping(offsetEmitter, numberingState.getTypes(),
930  emitAttrOrType);
931 
932  // Emit the sections to the stream.
933  emitter.emitSection(bytecode::Section::kAttrTypeOffset,
934  std::move(offsetEmitter));
935  emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter));
936 }
937 
938 //===----------------------------------------------------------------------===//
939 // Operations
940 //===----------------------------------------------------------------------===//
941 
942 LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter,
943  Block *block) {
944  ArrayRef<BlockArgument> args = block->getArguments();
945  bool hasArgs = !args.empty();
946 
947  // Emit the number of operations in this block, and if it has arguments. We
948  // use the low bit of the operation count to indicate if the block has
949  // arguments.
950  unsigned numOps = numberingState.getOperationCount(block);
951  emitter.emitVarIntWithFlag(numOps, hasArgs, "block num ops");
952 
953  // Emit the arguments of the block.
954  if (hasArgs) {
955  emitter.emitVarInt(args.size(), "block args count");
956  for (BlockArgument arg : args) {
957  Location argLoc = arg.getLoc();
958  if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) {
959  emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()),
960  !isa<UnknownLoc>(argLoc), "block arg type");
961  if (!isa<UnknownLoc>(argLoc))
962  emitter.emitVarInt(numberingState.getNumber(argLoc),
963  "block arg location");
964  } else {
965  emitter.emitVarInt(numberingState.getNumber(arg.getType()),
966  "block arg type");
967  emitter.emitVarInt(numberingState.getNumber(argLoc),
968  "block arg location");
969  }
970  }
971  if (config.bytecodeVersion >= bytecode::kUseListOrdering) {
972  uint64_t maskOffset = emitter.size();
973  uint8_t encodingMask = 0;
974  emitter.emitByte(0, "use-list separator");
975  writeUseListOrders(emitter, encodingMask, args);
976  if (encodingMask)
977  emitter.patchByte(maskOffset, encodingMask, "block patch encoding");
978  }
979  }
980 
981  // Emit the operations within the block.
982  for (Operation &op : *block)
983  if (failed(writeOp(emitter, &op)))
984  return failure();
985  return success();
986 }
987 
988 LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
989  emitter.emitVarInt(numberingState.getNumber(op->getName()), "op name ID");
990 
991  // Emit a mask for the operation components. We need to fill this in later
992  // (when we actually know what needs to be emitted), so emit a placeholder for
993  // now.
994  uint64_t maskOffset = emitter.size();
995  uint8_t opEncodingMask = 0;
996  emitter.emitByte(0, "op separator");
997 
998  // Emit the location for this operation.
999  emitter.emitVarInt(numberingState.getNumber(op->getLoc()), "op location");
1000 
1001  // Emit the attributes of this operation.
1002  DictionaryAttr attrs = op->getDiscardableAttrDictionary();
1003  // Allow deployment to version <kNativePropertiesEncoding by merging inherent
1004  // attribute with the discardable ones. We should fail if there are any
1005  // conflicts. When properties are not used by the op, also store everything as
1006  // attributes.
1007  if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding ||
1008  !op->getPropertiesStorage()) {
1009  attrs = op->getAttrDictionary();
1010  }
1011  if (!attrs.empty()) {
1012  opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
1013  emitter.emitVarInt(numberingState.getNumber(attrs), "op attrs count");
1014  }
1015 
1016  // Emit the properties of this operation, for now we still support deployment
1017  // to version <kNativePropertiesEncoding.
1018  if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding) {
1019  std::optional<ssize_t> propertiesId = propertiesSection.emit(op);
1020  if (propertiesId.has_value()) {
1021  opEncodingMask |= bytecode::OpEncodingMask::kHasProperties;
1022  emitter.emitVarInt(*propertiesId, "op properties ID");
1023  }
1024  }
1025 
1026  // Emit the result types of the operation.
1027  if (unsigned numResults = op->getNumResults()) {
1028  opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
1029  emitter.emitVarInt(numResults, "op results count");
1030  for (Type type : op->getResultTypes())
1031  emitter.emitVarInt(numberingState.getNumber(type), "op result type");
1032  }
1033 
1034  // Emit the operands of the operation.
1035  if (unsigned numOperands = op->getNumOperands()) {
1036  opEncodingMask |= bytecode::OpEncodingMask::kHasOperands;
1037  emitter.emitVarInt(numOperands, "op operands count");
1038  for (Value operand : op->getOperands())
1039  emitter.emitVarInt(numberingState.getNumber(operand), "op operand types");
1040  }
1041 
1042  // Emit the successors of the operation.
1043  if (unsigned numSuccessors = op->getNumSuccessors()) {
1044  opEncodingMask |= bytecode::OpEncodingMask::kHasSuccessors;
1045  emitter.emitVarInt(numSuccessors, "op successors count");
1046  for (Block *successor : op->getSuccessors())
1047  emitter.emitVarInt(numberingState.getNumber(successor), "op successor");
1048  }
1049 
1050  // Emit the use-list orders to bytecode, so we can reconstruct the same order
1051  // at parsing.
1052  if (config.bytecodeVersion >= bytecode::kUseListOrdering)
1053  writeUseListOrders(emitter, opEncodingMask, ValueRange(op->getResults()));
1054 
1055  // Check for regions.
1056  unsigned numRegions = op->getNumRegions();
1057  if (numRegions)
1059 
1060  // Update the mask for the operation.
1061  emitter.patchByte(maskOffset, opEncodingMask, "op encoding mask");
1062 
1063  // With the mask emitted, we can now emit the regions of the operation. We do
1064  // this after mask emission to avoid offset complications that may arise by
1065  // emitting the regions first (e.g. if the regions are huge, backpatching the
1066  // op encoding mask is more annoying).
1067  if (numRegions) {
1068  bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op);
1069  emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove,
1070  "op regions count");
1071 
1072  // If the region is not isolated from above, or we are emitting bytecode
1073  // targeting version <kLazyLoading, we don't use a section.
1074  if (isIsolatedFromAbove &&
1075  config.bytecodeVersion >= bytecode::kLazyLoading) {
1076  EncodingEmitter regionEmitter;
1077  if (failed(writeRegions(regionEmitter, op->getRegions())))
1078  return failure();
1079  emitter.emitSection(bytecode::Section::kIR, std::move(regionEmitter));
1080 
1081  } else if (failed(writeRegions(emitter, op->getRegions()))) {
1082  return failure();
1083  }
1084  }
1085  return success();
1086 }
1087 
1088 void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter,
1089  uint8_t &opEncodingMask,
1090  ValueRange range) {
1091  // Loop over the results and store the use-list order per result index.
1093  for (auto item : llvm::enumerate(range)) {
1094  auto value = item.value();
1095  // No need to store a custom use-list order if the result does not have
1096  // multiple uses.
1097  if (value.use_empty() || value.hasOneUse())
1098  continue;
1099 
1100  // For each result, assemble the list of pairs (use-list-index,
1101  // global-value-index). While doing so, detect if the global-value-index is
1102  // already ordered with respect to the use-list-index.
1103  bool alreadyOrdered = true;
1104  auto &firstUse = *value.use_begin();
1105  uint64_t prevID = bytecode::getUseID(
1106  firstUse, numberingState.getNumber(firstUse.getOwner()));
1108  {{0, prevID}});
1109 
1110  for (auto use : llvm::drop_begin(llvm::enumerate(value.getUses()))) {
1111  uint64_t currentID = bytecode::getUseID(
1112  use.value(), numberingState.getNumber(use.value().getOwner()));
1113  // The use-list order achieved when building the IR at parsing always
1114  // pushes new uses on front. Hence, if the order by unique ID is
1115  // monotonically decreasing, a roundtrip to bytecode preserves such order.
1116  alreadyOrdered &= (prevID > currentID);
1117  useListPairs.push_back({use.index(), currentID});
1118  prevID = currentID;
1119  }
1120 
1121  // Do not emit if the order is already sorted.
1122  if (alreadyOrdered)
1123  continue;
1124 
1125  // Sort the use indices by the unique ID indices in descending order.
1126  std::sort(
1127  useListPairs.begin(), useListPairs.end(),
1128  [](auto elem1, auto elem2) { return elem1.second > elem2.second; });
1129 
1130  map.try_emplace(item.index(), llvm::map_range(useListPairs, [](auto elem) {
1131  return elem.first;
1132  }));
1133  }
1134 
1135  if (map.empty())
1136  return;
1137 
1139  // Emit the number of results that have a custom use-list order if the number
1140  // of results is greater than one.
1141  if (range.size() != 1) {
1142  emitter.emitVarInt(map.size(), "custom use-list size");
1143  }
1144 
1145  for (const auto &item : map) {
1146  auto resultIdx = item.getFirst();
1147  auto useListOrder = item.getSecond();
1148 
1149  // Compute the number of uses that are actually shuffled. If those are less
1150  // than half of the total uses, encoding the index pair `(src, dst)` is more
1151  // space efficient.
1152  size_t shuffledElements =
1153  llvm::count_if(llvm::enumerate(useListOrder),
1154  [](auto item) { return item.index() != item.value(); });
1155  bool indexPairEncoding = shuffledElements < (useListOrder.size() / 2);
1156 
1157  // For single result, we don't need to store the result index.
1158  if (range.size() != 1)
1159  emitter.emitVarInt(resultIdx, "use-list result index");
1160 
1161  if (indexPairEncoding) {
1162  emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding,
1163  "use-list index pair size");
1164  for (auto pair : llvm::enumerate(useListOrder)) {
1165  if (pair.index() != pair.value()) {
1166  emitter.emitVarInt(pair.value(), "use-list index pair first");
1167  emitter.emitVarInt(pair.index(), "use-list index pair second");
1168  }
1169  }
1170  } else {
1171  emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding,
1172  "use-list size");
1173  for (const auto &index : useListOrder)
1174  emitter.emitVarInt(index, "use-list order");
1175  }
1176  }
1177 }
1178 
1179 LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter,
1180  Region *region) {
1181  // If the region is empty, we only need to emit the number of blocks (which is
1182  // zero).
1183  if (region->empty()) {
1184  emitter.emitVarInt(/*numBlocks*/ 0, "region block count empty");
1185  return success();
1186  }
1187 
1188  // Emit the number of blocks and values within the region.
1189  unsigned numBlocks, numValues;
1190  std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
1191  emitter.emitVarInt(numBlocks, "region block count");
1192  emitter.emitVarInt(numValues, "region value count");
1193 
1194  // Emit the blocks within the region.
1195  for (Block &block : *region)
1196  if (failed(writeBlock(emitter, &block)))
1197  return failure();
1198  return success();
1199 }
1200 
1201 LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter,
1202  Operation *op) {
1203  EncodingEmitter irEmitter;
1204 
1205  // Write the IR section the same way as a block with no arguments. Note that
1206  // the low-bit of the operation count for a block is used to indicate if the
1207  // block has arguments, which in this case is always false.
1208  irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false, "ir section");
1209 
1210  // Emit the operations.
1211  if (failed(writeOp(irEmitter, op)))
1212  return failure();
1213 
1214  emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter));
1215  return success();
1216 }
1217 
1218 //===----------------------------------------------------------------------===//
1219 // Resources
1220 //===----------------------------------------------------------------------===//
1221 
1222 namespace {
1223 /// This class represents a resource builder implementation for the MLIR
1224 /// bytecode format.
1225 class ResourceBuilder : public AsmResourceBuilder {
1226 public:
1227  using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
1228 
1229  ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
1230  PostProcessFn postProcessFn, bool shouldElideData)
1231  : emitter(emitter), stringSection(stringSection),
1232  postProcessFn(postProcessFn), shouldElideData(shouldElideData) {}
1233  ~ResourceBuilder() override = default;
1234 
1235  void buildBlob(StringRef key, ArrayRef<char> data,
1236  uint32_t dataAlignment) final {
1237  if (!shouldElideData)
1238  emitter.emitOwnedBlobAndAlignment(data, dataAlignment, "resource blob");
1239  postProcessFn(key, AsmResourceEntryKind::Blob);
1240  }
1241  void buildBool(StringRef key, bool data) final {
1242  if (!shouldElideData)
1243  emitter.emitByte(data, "resource bool");
1244  postProcessFn(key, AsmResourceEntryKind::Bool);
1245  }
1246  void buildString(StringRef key, StringRef data) final {
1247  if (!shouldElideData)
1248  emitter.emitVarInt(stringSection.insert(data), "resource string");
1249  postProcessFn(key, AsmResourceEntryKind::String);
1250  }
1251 
1252 private:
1253  EncodingEmitter &emitter;
1254  StringSectionBuilder &stringSection;
1255  PostProcessFn postProcessFn;
1256  bool shouldElideData = false;
1257 };
1258 } // namespace
1259 
1260 void BytecodeWriter::writeResourceSection(Operation *op,
1261  EncodingEmitter &emitter) {
1262  EncodingEmitter resourceEmitter;
1263  EncodingEmitter resourceOffsetEmitter;
1264  uint64_t prevOffset = 0;
1266  curResourceEntries;
1267 
1268  // Functor used to process the offset for a resource of `kind` defined by
1269  // 'key'.
1270  auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
1271  uint64_t curOffset = resourceEmitter.size();
1272  curResourceEntries.emplace_back(key, kind, curOffset - prevOffset);
1273  prevOffset = curOffset;
1274  };
1275 
1276  // Functor used to emit a resource group defined by 'key'.
1277  auto emitResourceGroup = [&](uint64_t key) {
1278  resourceOffsetEmitter.emitVarInt(key, "resource group key");
1279  resourceOffsetEmitter.emitVarInt(curResourceEntries.size(),
1280  "resource group size");
1281  for (auto [key, kind, size] : curResourceEntries) {
1282  resourceOffsetEmitter.emitVarInt(stringSection.insert(key),
1283  "resource key");
1284  resourceOffsetEmitter.emitVarInt(size, "resource size");
1285  resourceOffsetEmitter.emitByte(kind, "resource kind");
1286  }
1287  };
1288 
1289  // Builder used to emit resources.
1290  ResourceBuilder entryBuilder(resourceEmitter, stringSection,
1291  appendResourceOffset,
1292  config.shouldElideResourceData);
1293 
1294  // Emit the external resource entries.
1295  resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size(),
1296  "external resource printer count");
1297  for (const auto &printer : config.externalResourcePrinters) {
1298  curResourceEntries.clear();
1299  printer->buildResources(op, entryBuilder);
1300  emitResourceGroup(stringSection.insert(printer->getName()));
1301  }
1302 
1303  // Emit the dialect resource entries.
1304  for (DialectNumbering &dialect : numberingState.getDialects()) {
1305  if (!dialect.asmInterface)
1306  continue;
1307  curResourceEntries.clear();
1308  dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder);
1309 
1310  // Emit the declaration resources for this dialect, these didn't get emitted
1311  // by the interface. These resources don't have data attached, so just use a
1312  // "blob" kind as a placeholder.
1313  for (const auto &resource : dialect.resourceMap)
1314  if (resource.second->isDeclaration)
1315  appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
1316 
1317  // Emit the resource group for this dialect.
1318  if (!curResourceEntries.empty())
1319  emitResourceGroup(dialect.number);
1320  }
1321 
1322  // If we didn't emit any resource groups, elide the resource sections.
1323  if (resourceOffsetEmitter.size() == 0)
1324  return;
1325 
1326  emitter.emitSection(bytecode::Section::kResourceOffset,
1327  std::move(resourceOffsetEmitter));
1328  emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter));
1329 }
1330 
1331 //===----------------------------------------------------------------------===//
1332 // Strings
1333 //===----------------------------------------------------------------------===//
1334 
1335 void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
1336  EncodingEmitter stringEmitter;
1337  stringSection.write(stringEmitter);
1338  emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter));
1339 }
1340 
1341 //===----------------------------------------------------------------------===//
1342 // Properties
1343 //===----------------------------------------------------------------------===//
1344 
1345 void BytecodeWriter::writePropertiesSection(EncodingEmitter &emitter) {
1346  EncodingEmitter propertiesEmitter;
1347  propertiesSection.write(propertiesEmitter);
1348  emitter.emitSection(bytecode::Section::kProperties,
1349  std::move(propertiesEmitter));
1350 }
1351 
1352 //===----------------------------------------------------------------------===//
1353 // Entry Points
1354 //===----------------------------------------------------------------------===//
1355 
1356 LogicalResult mlir::writeBytecodeToFile(Operation *op, raw_ostream &os,
1357  const BytecodeWriterConfig &config) {
1358  BytecodeWriter writer(op, config);
1359  return writer.write(op, os);
1360 }
static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, EntryCallbackT &&callback)
Write the given entries in contiguous groups with the same parent dialect.
union mlir::linalg::@1193::ArityGroupAndKind::Kind kind
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static LogicalResult emit(SolverOp solver, const SMTEmissionOptions &options, mlir::raw_indented_ostream &stream)
Emit the SMT operations in the given 'solver' to the 'stream'.
This class represents an opaque handle to a dialect resource entry.
This class is used to build resource entries for use by the printer.
Definition: AsmState.h:247
A class to interact with the attributes and types printer when emitting MLIR bytecode.
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:295
Block represents an ordered list of Operations.
Definition: Block.h:33
BlockArgListType getArguments()
Definition: Block.h:87
This class contains the configuration used for the bytecode writer.
void attachTypeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Type >> callback)
llvm::StringMap< std::unique_ptr< DialectVersion > > & getDialectVersionMap() const
A map containing the dialect versions to emit.
void setElideResourceDataFlag(bool shouldElideResourceData=true)
Set a boolean flag to skip emission of resources into the bytecode file.
BytecodeWriterConfig(StringRef producer="MLIR" LLVM_VERSION_STRING)
producer is an optional string that can be used to identify the producer of the bytecode when reading...
void attachFallbackResourcePrinter(FallbackAsmResourceMap &map)
Attach resource printers to the AsmState for the fallback resources in the given map.
int64_t getDesiredBytecodeVersion() const
Get the set desired bytecode version to emit.
void setDialectVersion(std::unique_ptr< DialectVersion > dialectVersion) const
Set a given dialect version to emit on the map.
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > getTypeWriterCallbacks() const
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > getAttributeWriterCallbacks() const
Retrieve the callbacks.
void setDesiredBytecodeVersion(int64_t bytecodeVersion)
Set the desired bytecode version to emit.
void attachResourcePrinter(std::unique_ptr< AsmResourcePrinter > printer)
Attach the given resource printer to the writer configuration.
void attachAttributeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Attribute >> callback)
Attach a custom bytecode printer callback to the configuration for the emission of custom type/attrib...
This class defines a virtual interface for writing to a bytecode stream, providing hooks into the byt...
A fallback map containing external resources not explicitly handled by another parser/printer.
Definition: AsmState.h:421
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
DictionaryAttr getAttrDictionary()
Return all of the attributes on this operation as a DictionaryAttr.
Definition: Operation.cpp:296
unsigned getNumSuccessors()
Definition: Operation.h:707
bool isRegistered()
Returns true if this operation has a registered operation description, otherwise false.
Definition: Operation.h:129
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:674
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
unsigned getNumOperands()
Definition: Operation.h:346
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:677
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
DictionaryAttr getDiscardableAttrDictionary()
Return all of the discardable attributes on this operation as a DictionaryAttr.
Definition: Operation.h:501
result_type_range getResultTypes()
Definition: Operation.h:428
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
SuccessorRange getSuccessors()
Definition: Operation.h:704
result_range getResults()
Definition: Operation.h:415
int getPropertiesStorageSize() const
Returns the properties storage size.
Definition: Operation.h:897
OpaqueProperties getPropertiesStorage()
Returns the properties storage.
Definition: Operation.h:901
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
bool empty()
Definition: Region.h:60
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
This class manages numbering IR entities in preparation of bytecode emission.
Definition: IRNumbering.h:151
@ kAttrType
This section contains the attributes and types referenced within an IR module.
Definition: Encoding.h:73
@ kAttrTypeOffset
This section contains the offsets for the attribute and types within the AttrType section.
Definition: Encoding.h:77
@ kIR
This section contains the list of operations serialized into the bytecode, and their nested regions/o...
Definition: Encoding.h:81
@ kResource
This section contains the resources of the bytecode.
Definition: Encoding.h:84
@ kResourceOffset
This section contains the offsets of resources within the Resource section.
Definition: Encoding.h:88
@ kDialect
This section contains the dialects referenced within an IR module.
Definition: Encoding.h:69
@ kString
This section contains strings referenced within the bytecode.
Definition: Encoding.h:66
@ kDialectVersions
This section contains the versions of each dialect.
Definition: Encoding.h:91
@ kProperties
This section contains the properties for the operations.
Definition: Encoding.h:94
static uint64_t getUseID(OperandT &val, unsigned ownerID)
Get the unique ID of a value use.
Definition: Encoding.h:127
@ kUseListOrdering
Use-list ordering started to be encoded in version 3.
Definition: Encoding.h:38
@ kAlignmentByte
An arbitrary value used to fill alignment padding.
Definition: Encoding.h:56
@ kVersion
The current bytecode version.
Definition: Encoding.h:53
@ kLazyLoading
Support for lazy-loading of isolated region was added in version 2.
Definition: Encoding.h:35
@ kDialectVersioning
Dialects versioning was added in version 1.
Definition: Encoding.h:32
@ kElideUnknownBlockArgLocation
Avoid recording unknown locations on block arguments (compression) started in version 4.
Definition: Encoding.h:42
@ kNativePropertiesEncoding
Support for encoding properties natively in bytecode instead of merged with the discardable attribute...
Definition: Encoding.h:46
@ kMinSupportedVersion
The minimum supported version of the bytecode.
Definition: Encoding.h:29
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Include the generated interface declarations.
const FrozenRewritePatternSet GreedyRewriteConfig config
AsmResourceEntryKind
This enum represents the different kinds of resource values.
Definition: AsmState.h:280
@ String
A string value.
@ Bool
A boolean value.
@ Blob
A blob of data with an accompanying alignment.
LogicalResult writeBytecodeToFile(Operation *op, raw_ostream &os, const BytecodeWriterConfig &config={})
Write the bytecode for the given operation to the provided output stream.
StringRef producer
The producer of the bytecode.
llvm::StringMap< std::unique_ptr< DialectVersion > > dialectVersionMap
A map containing dialect version information for each dialect to emit.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > attributeWriterCallbacks
Printer callbacks used to emit custom type and attribute encodings.
SmallVector< std::unique_ptr< AsmResourcePrinter > > externalResourcePrinters
A collection of non-dialect resource printers.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > typeWriterCallbacks
This class represents a numbering entry for an Dialect.
Definition: IRNumbering.h:106
unsigned number
The number assigned to the dialect.
Definition: IRNumbering.h:114
This class represents the numbering entry of an operation name.
Definition: IRNumbering.h:65