MLIR  17.0.0git
BytecodeWriter.cpp
Go to the documentation of this file.
1 //===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "../Encoding.h"
11 #include "IRNumbering.h"
14 #include "llvm/ADT/CachedHashString.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SmallString.h"
17 
18 #define DEBUG_TYPE "mlir-bytecode-writer"
19 
20 using namespace mlir;
21 using namespace mlir::bytecode::detail;
22 
23 //===----------------------------------------------------------------------===//
24 // BytecodeWriterConfig
25 //===----------------------------------------------------------------------===//
26 
28  Impl(StringRef producer) : producer(producer) {}
29 
30  /// The producer of the bytecode.
31  StringRef producer;
32 
33  /// A collection of non-dialect resource printers.
35 };
36 
38  : impl(std::make_unique<Impl>(producer)) {}
40  StringRef producer)
41  : BytecodeWriterConfig(producer) {
43 }
45 
47  std::unique_ptr<AsmResourcePrinter> printer) {
48  impl->externalResourcePrinters.emplace_back(std::move(printer));
49 }
50 
51 //===----------------------------------------------------------------------===//
52 // EncodingEmitter
53 //===----------------------------------------------------------------------===//
54 
55 namespace {
56 /// This class functions as the underlying encoding emitter for the bytecode
57 /// writer. This class is a bit different compared to other types of encoders;
58 /// it does not use a single buffer, but instead may contain several buffers
59 /// (some owned by the writer, and some not) that get concatted during the final
60 /// emission.
61 class EncodingEmitter {
62 public:
63  EncodingEmitter() = default;
64  EncodingEmitter(const EncodingEmitter &) = delete;
65  EncodingEmitter &operator=(const EncodingEmitter &) = delete;
66 
67  /// Write the current contents to the provided stream.
68  void writeTo(raw_ostream &os) const;
69 
70  /// Return the current size of the encoded buffer.
71  size_t size() const { return prevResultSize + currentResult.size(); }
72 
73  //===--------------------------------------------------------------------===//
74  // Emission
75  //===--------------------------------------------------------------------===//
76 
77  /// Backpatch a byte in the result buffer at the given offset.
78  void patchByte(uint64_t offset, uint8_t value) {
79  assert(offset < size() && offset >= prevResultSize &&
80  "cannot patch previously emitted data");
81  currentResult[offset - prevResultSize] = value;
82  }
83 
84  /// Emit the provided blob of data, which is owned by the caller and is
85  /// guaranteed to not die before the end of the bytecode process.
86  void emitOwnedBlob(ArrayRef<uint8_t> data) {
87  // Push the current buffer before adding the provided data.
88  appendResult(std::move(currentResult));
89  appendOwnedResult(data);
90  }
91 
92  /// Emit the provided blob of data that has the given alignment, which is
93  /// owned by the caller and is guaranteed to not die before the end of the
94  /// bytecode process. The alignment value is also encoded, making it available
95  /// on load.
96  void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment) {
97  emitVarInt(alignment);
98  emitVarInt(data.size());
99 
100  alignTo(alignment);
101  emitOwnedBlob(data);
102  }
103  void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment) {
104  ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
105  data.size());
106  emitOwnedBlobAndAlignment(castedData, alignment);
107  }
108 
109  /// Align the emitter to the given alignment.
110  void alignTo(unsigned alignment) {
111  if (alignment < 2)
112  return;
113  assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
114 
115  // Check to see if we need to emit any padding bytes to meet the desired
116  // alignment.
117  size_t curOffset = size();
118  size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
119  while (paddingSize--)
120  emitByte(bytecode::kAlignmentByte);
121 
122  // Keep track of the maximum required alignment.
123  requiredAlignment = std::max(requiredAlignment, alignment);
124  }
125 
126  //===--------------------------------------------------------------------===//
127  // Integer Emission
128 
129  /// Emit a single byte.
130  template <typename T>
131  void emitByte(T byte) {
132  currentResult.push_back(static_cast<uint8_t>(byte));
133  }
134 
135  /// Emit a range of bytes.
136  void emitBytes(ArrayRef<uint8_t> bytes) {
137  llvm::append_range(currentResult, bytes);
138  }
139 
140  /// Emit a variable length integer. The first encoded byte contains a prefix
141  /// in the low bits indicating the encoded length of the value. This length
142  /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
143  /// indicate the number of _additional_ bytes (not including the prefix byte).
144  /// All remaining bits in the first byte, along with all of the bits in
145  /// additional bytes, provide the value of the integer encoded in
146  /// little-endian order.
147  void emitVarInt(uint64_t value) {
148  // In the most common case, the value can be represented in a single byte.
149  // Given how hot this case is, explicitly handle that here.
150  if ((value >> 7) == 0)
151  return emitByte((value << 1) | 0x1);
152  emitMultiByteVarInt(value);
153  }
154 
155  /// Emit a signed variable length integer. Signed varints are encoded using
156  /// a varint with zigzag encoding, meaning that we use the low bit of the
157  /// value to indicate the sign of the value. This allows for more efficient
158  /// encoding of negative values by limiting the number of active bits
159  void emitSignedVarInt(uint64_t value) {
160  emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63));
161  }
162 
163  /// Emit a variable length integer whose low bit is used to encode the
164  /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
165  void emitVarIntWithFlag(uint64_t value, bool flag) {
166  emitVarInt((value << 1) | (flag ? 1 : 0));
167  }
168 
169  //===--------------------------------------------------------------------===//
170  // String Emission
171 
172  /// Emit the given string as a nul terminated string.
173  void emitNulTerminatedString(StringRef str) {
174  emitString(str);
175  emitByte(0);
176  }
177 
178  /// Emit the given string without a nul terminator.
179  void emitString(StringRef str) {
180  emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()});
181  }
182 
183  //===--------------------------------------------------------------------===//
184  // Section Emission
185 
186  /// Emit a nested section of the given code, whose contents are encoded in the
187  /// provided emitter.
188  void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
189  // Emit the section code and length. The high bit of the code is used to
190  // indicate whether the section alignment is present, so save an offset to
191  // it.
192  uint64_t codeOffset = currentResult.size();
193  emitByte(code);
194  emitVarInt(emitter.size());
195 
196  // Integrate the alignment of the section into this emitter if necessary.
197  unsigned emitterAlign = emitter.requiredAlignment;
198  if (emitterAlign > 1) {
199  if (size() & (emitterAlign - 1)) {
200  emitVarInt(emitterAlign);
201  alignTo(emitterAlign);
202 
203  // Indicate that we needed to align the section, the high bit of the
204  // code field is used for this.
205  currentResult[codeOffset] |= 0b10000000;
206  } else {
207  // Otherwise, if we happen to be at a compatible offset, we just
208  // remember that we need this alignment.
209  requiredAlignment = std::max(requiredAlignment, emitterAlign);
210  }
211  }
212 
213  // Push our current buffer and then merge the provided section body into
214  // ours.
215  appendResult(std::move(currentResult));
216  for (std::vector<uint8_t> &result : emitter.prevResultStorage)
217  prevResultStorage.push_back(std::move(result));
218  llvm::append_range(prevResultList, emitter.prevResultList);
219  prevResultSize += emitter.prevResultSize;
220  appendResult(std::move(emitter.currentResult));
221  }
222 
223 private:
224  /// Emit the given value using a variable width encoding. This method is a
225  /// fallback when the number of bytes needed to encode the value is greater
226  /// than 1. We mark it noinline here so that the single byte hot path isn't
227  /// pessimized.
228  LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value);
229 
230  /// Append a new result buffer to the current contents.
231  void appendResult(std::vector<uint8_t> &&result) {
232  if (result.empty())
233  return;
234  prevResultStorage.emplace_back(std::move(result));
235  appendOwnedResult(prevResultStorage.back());
236  }
237  void appendOwnedResult(ArrayRef<uint8_t> result) {
238  if (result.empty())
239  return;
240  prevResultSize += result.size();
241  prevResultList.emplace_back(result);
242  }
243 
244  /// The result of the emitter currently being built. We refrain from building
245  /// a single buffer to simplify emitting sections, large data, and more. The
246  /// result is thus represented using multiple distinct buffers, some of which
247  /// we own (via prevResultStorage), and some of which are just pointers into
248  /// externally owned buffers.
249  std::vector<uint8_t> currentResult;
250  std::vector<ArrayRef<uint8_t>> prevResultList;
251  std::vector<std::vector<uint8_t>> prevResultStorage;
252 
253  /// An up-to-date total size of all of the buffers within `prevResultList`.
254  /// This enables O(1) size checks of the current encoding.
255  size_t prevResultSize = 0;
256 
257  /// The highest required alignment for the start of this section.
258  unsigned requiredAlignment = 1;
259 };
260 
261 //===----------------------------------------------------------------------===//
262 // StringSectionBuilder
263 //===----------------------------------------------------------------------===//
264 
265 namespace {
266 /// This class is used to simplify the process of emitting the string section.
267 class StringSectionBuilder {
268 public:
269  /// Add the given string to the string section, and return the index of the
270  /// string within the section.
271  size_t insert(StringRef str) {
272  auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()});
273  return it.first->second;
274  }
275 
276  /// Write the current set of strings to the given emitter.
277  void write(EncodingEmitter &emitter) {
278  emitter.emitVarInt(strings.size());
279 
280  // Emit the sizes in reverse order, so that we don't need to backpatch an
281  // offset to the string data or have a separate section.
282  for (const auto &it : llvm::reverse(strings))
283  emitter.emitVarInt(it.first.size() + 1);
284  // Emit the string data itself.
285  for (const auto &it : strings)
286  emitter.emitNulTerminatedString(it.first.val());
287  }
288 
289 private:
290  /// A set of strings referenced within the bytecode. The value of the map is
291  /// unused.
292  llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
293 };
294 } // namespace
295 
296 class DialectWriter : public DialectBytecodeWriter {
297 public:
298  DialectWriter(EncodingEmitter &emitter, IRNumberingState &numberingState,
299  StringSectionBuilder &stringSection)
300  : emitter(emitter), numberingState(numberingState),
301  stringSection(stringSection) {}
302 
303  //===--------------------------------------------------------------------===//
304  // IR
305  //===--------------------------------------------------------------------===//
306 
307  void writeAttribute(Attribute attr) override {
308  emitter.emitVarInt(numberingState.getNumber(attr));
309  }
310  void writeType(Type type) override {
311  emitter.emitVarInt(numberingState.getNumber(type));
312  }
313 
314  void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
315  emitter.emitVarInt(numberingState.getNumber(resource));
316  }
317 
318  //===--------------------------------------------------------------------===//
319  // Primitives
320  //===--------------------------------------------------------------------===//
321 
322  void writeVarInt(uint64_t value) override { emitter.emitVarInt(value); }
323 
324  void writeSignedVarInt(int64_t value) override {
325  emitter.emitSignedVarInt(value);
326  }
327 
328  void writeAPIntWithKnownWidth(const APInt &value) override {
329  size_t bitWidth = value.getBitWidth();
330 
331  // If the value is a single byte, just emit it directly without going
332  // through a varint.
333  if (bitWidth <= 8)
334  return emitter.emitByte(value.getLimitedValue());
335 
336  // If the value fits within a single varint, emit it directly.
337  if (bitWidth <= 64)
338  return emitter.emitSignedVarInt(value.getLimitedValue());
339 
340  // Otherwise, we need to encode a variable number of active words. We use
341  // active words instead of the number of total words under the observation
342  // that smaller values will be more common.
343  unsigned numActiveWords = value.getActiveWords();
344  emitter.emitVarInt(numActiveWords);
345 
346  const uint64_t *rawValueData = value.getRawData();
347  for (unsigned i = 0; i < numActiveWords; ++i)
348  emitter.emitSignedVarInt(rawValueData[i]);
349  }
350 
351  void writeAPFloatWithKnownSemantics(const APFloat &value) override {
352  writeAPIntWithKnownWidth(value.bitcastToAPInt());
353  }
354 
355  void writeOwnedString(StringRef str) override {
356  emitter.emitVarInt(stringSection.insert(str));
357  }
358 
359  void writeOwnedBlob(ArrayRef<char> blob) override {
360  emitter.emitVarInt(blob.size());
361  emitter.emitOwnedBlob(ArrayRef<uint8_t>(
362  reinterpret_cast<const uint8_t *>(blob.data()), blob.size()));
363  }
364 
365 private:
366  EncodingEmitter &emitter;
367  IRNumberingState &numberingState;
368  StringSectionBuilder &stringSection;
369 };
370 
371 /// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
372 /// to go through an intermediate buffer when interacting with code that wants a
373 /// raw_ostream.
374 class RawEmitterOstream : public raw_ostream {
375 public:
376  explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
377  SetUnbuffered();
378  }
379 
380 private:
381  void write_impl(const char *ptr, size_t size) override {
382  emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size});
383  }
384  uint64_t current_pos() const override { return emitter.size(); }
385 
386  /// The section being emitted to.
387  EncodingEmitter &emitter;
388 };
389 } // namespace
390 
391 void EncodingEmitter::writeTo(raw_ostream &os) const {
392  for (auto &prevResult : prevResultList)
393  os.write((const char *)prevResult.data(), prevResult.size());
394  os.write((const char *)currentResult.data(), currentResult.size());
395 }
396 
397 void EncodingEmitter::emitMultiByteVarInt(uint64_t value) {
398  // Compute the number of bytes needed to encode the value. Each byte can hold
399  // up to 7-bits of data. We only check up to the number of bits we can encode
400  // in the first byte (8).
401  uint64_t it = value >> 7;
402  for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
403  if (LLVM_LIKELY(it >>= 7) == 0) {
404  uint64_t encodedValue = (value << 1) | 0x1;
405  encodedValue <<= (numBytes - 1);
406  emitBytes({reinterpret_cast<uint8_t *>(&encodedValue), numBytes});
407  return;
408  }
409  }
410 
411  // If the value is too large to encode in a single byte, emit a special all
412  // zero marker byte and splat the value directly.
413  emitByte(0);
414  emitBytes({reinterpret_cast<uint8_t *>(&value), sizeof(value)});
415 }
416 
417 //===----------------------------------------------------------------------===//
418 // Bytecode Writer
419 //===----------------------------------------------------------------------===//
420 
421 namespace {
422 class BytecodeWriter {
423 public:
424  BytecodeWriter(Operation *op) : numberingState(op) {}
425 
426  /// Write the bytecode for the given root operation.
427  void write(Operation *rootOp, raw_ostream &os,
428  const BytecodeWriterConfig::Impl &config);
429 
430 private:
431  //===--------------------------------------------------------------------===//
432  // Dialects
433 
434  void writeDialectSection(EncodingEmitter &emitter);
435 
436  //===--------------------------------------------------------------------===//
437  // Attributes and Types
438 
439  void writeAttrTypeSection(EncodingEmitter &emitter);
440 
441  //===--------------------------------------------------------------------===//
442  // Operations
443 
444  void writeBlock(EncodingEmitter &emitter, Block *block);
445  void writeOp(EncodingEmitter &emitter, Operation *op);
446  void writeRegion(EncodingEmitter &emitter, Region *region);
447  void writeIRSection(EncodingEmitter &emitter, Operation *op);
448 
449  //===--------------------------------------------------------------------===//
450  // Resources
451 
452  void writeResourceSection(Operation *op, EncodingEmitter &emitter,
453  const BytecodeWriterConfig::Impl &config);
454 
455  //===--------------------------------------------------------------------===//
456  // Strings
457 
458  void writeStringSection(EncodingEmitter &emitter);
459 
460  //===--------------------------------------------------------------------===//
461  // Fields
462 
463  /// The builder used for the string section.
464  StringSectionBuilder stringSection;
465 
466  /// The IR numbering state generated for the root operation.
467  IRNumberingState numberingState;
468 };
469 } // namespace
470 
471 void BytecodeWriter::write(Operation *rootOp, raw_ostream &os,
472  const BytecodeWriterConfig::Impl &config) {
473  EncodingEmitter emitter;
474 
475  // Emit the bytecode file header. This is how we identify the output as a
476  // bytecode file.
477  emitter.emitString("ML\xefR");
478 
479  // Emit the bytecode version.
480  emitter.emitVarInt(bytecode::kVersion);
481 
482  // Emit the producer.
483  emitter.emitNulTerminatedString(config.producer);
484 
485  // Emit the dialect section.
486  writeDialectSection(emitter);
487 
488  // Emit the attributes and types section.
489  writeAttrTypeSection(emitter);
490 
491  // Emit the IR section.
492  writeIRSection(emitter, rootOp);
493 
494  // Emit the resources section.
495  writeResourceSection(rootOp, emitter, config);
496 
497  // Emit the string section.
498  writeStringSection(emitter);
499 
500  // Write the generated bytecode to the provided output stream.
501  emitter.writeTo(os);
502 }
503 
504 //===----------------------------------------------------------------------===//
505 // Dialects
506 
507 /// Write the given entries in contiguous groups with the same parent dialect.
508 /// Each dialect sub-group is encoded with the parent dialect and number of
509 /// elements, followed by the encoding for the entries. The given callback is
510 /// invoked to encode each individual entry.
511 template <typename EntriesT, typename EntryCallbackT>
512 static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
513  EntryCallbackT &&callback) {
514  for (auto it = entries.begin(), e = entries.end(); it != e;) {
515  auto groupStart = it++;
516 
517  // Find the end of the group that shares the same parent dialect.
518  DialectNumbering *currentDialect = groupStart->dialect;
519  it = std::find_if(it, e, [&](const auto &entry) {
520  return entry.dialect != currentDialect;
521  });
522 
523  // Emit the dialect and number of elements.
524  emitter.emitVarInt(currentDialect->number);
525  emitter.emitVarInt(std::distance(groupStart, it));
526 
527  // Emit the entries within the group.
528  for (auto &entry : llvm::make_range(groupStart, it))
529  callback(entry);
530  }
531 }
532 
533 void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
534  EncodingEmitter dialectEmitter;
535 
536  // Emit the referenced dialects.
537  auto dialects = numberingState.getDialects();
538  dialectEmitter.emitVarInt(llvm::size(dialects));
539  for (DialectNumbering &dialect : dialects) {
540  // Write the string section and get the ID.
541  size_t nameID = stringSection.insert(dialect.name);
542 
543  // Try writing the version to the versionEmitter.
544  EncodingEmitter versionEmitter;
545  if (dialect.interface) {
546  // The writer used when emitting using a custom bytecode encoding.
547  DialectWriter versionWriter(versionEmitter, numberingState,
548  stringSection);
549  dialect.interface->writeVersion(versionWriter);
550  }
551 
552  // If the version emitter is empty, version is not available. We can encode
553  // this in the dialect ID, so if there is no version, we don't write the
554  // section.
555  size_t versionAvailable = versionEmitter.size() > 0;
556  dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable);
557  if (versionAvailable)
558  dialectEmitter.emitSection(bytecode::Section::kDialectVersions,
559  std::move(versionEmitter));
560  }
561 
562  // Emit the referenced operation names grouped by dialect.
563  auto emitOpName = [&](OpNameNumbering &name) {
564  dialectEmitter.emitVarInt(stringSection.insert(name.name.stripDialect()));
565  };
566  writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
567 
568  emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter));
569 }
570 
571 //===----------------------------------------------------------------------===//
572 // Attributes and Types
573 
574 void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
575  EncodingEmitter attrTypeEmitter;
576  EncodingEmitter offsetEmitter;
577  offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()));
578  offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()));
579 
580  // A functor used to emit an attribute or type entry.
581  uint64_t prevOffset = 0;
582  auto emitAttrOrType = [&](auto &entry) {
583  auto entryValue = entry.getValue();
584 
585  // First, try to emit this entry using the dialect bytecode interface.
586  bool hasCustomEncoding = false;
587  if (const BytecodeDialectInterface *interface = entry.dialect->interface) {
588  // The writer used when emitting using a custom bytecode encoding.
589  DialectWriter dialectWriter(attrTypeEmitter, numberingState,
590  stringSection);
591 
592  if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
593  // TODO: We don't currently support custom encoded mutable types.
594  hasCustomEncoding =
595  !entryValue.template hasTrait<TypeTrait::IsMutable>() &&
596  succeeded(interface->writeType(entryValue, dialectWriter));
597  } else {
598  // TODO: We don't currently support custom encoded mutable attributes.
599  hasCustomEncoding =
600  !entryValue.template hasTrait<AttributeTrait::IsMutable>() &&
601  succeeded(interface->writeAttribute(entryValue, dialectWriter));
602  }
603  }
604 
605  // If the entry was not emitted using the dialect interface, emit it using
606  // the textual format.
607  if (!hasCustomEncoding) {
608  RawEmitterOstream(attrTypeEmitter) << entryValue;
609  attrTypeEmitter.emitByte(0);
610  }
611 
612  // Record the offset of this entry.
613  uint64_t curOffset = attrTypeEmitter.size();
614  offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding);
615  prevOffset = curOffset;
616  };
617 
618  // Emit the attribute and type entries for each dialect.
619  writeDialectGrouping(offsetEmitter, numberingState.getAttributes(),
620  emitAttrOrType);
621  writeDialectGrouping(offsetEmitter, numberingState.getTypes(),
622  emitAttrOrType);
623 
624  // Emit the sections to the stream.
625  emitter.emitSection(bytecode::Section::kAttrTypeOffset,
626  std::move(offsetEmitter));
627  emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter));
628 }
629 
630 //===----------------------------------------------------------------------===//
631 // Operations
632 
633 void BytecodeWriter::writeBlock(EncodingEmitter &emitter, Block *block) {
634  ArrayRef<BlockArgument> args = block->getArguments();
635  bool hasArgs = !args.empty();
636 
637  // Emit the number of operations in this block, and if it has arguments. We
638  // use the low bit of the operation count to indicate if the block has
639  // arguments.
640  unsigned numOps = numberingState.getOperationCount(block);
641  emitter.emitVarIntWithFlag(numOps, hasArgs);
642 
643  // Emit the arguments of the block.
644  if (hasArgs) {
645  emitter.emitVarInt(args.size());
646  for (BlockArgument arg : args) {
647  emitter.emitVarInt(numberingState.getNumber(arg.getType()));
648  emitter.emitVarInt(numberingState.getNumber(arg.getLoc()));
649  }
650  }
651 
652  // Emit the operations within the block.
653  for (Operation &op : *block)
654  writeOp(emitter, &op);
655 }
656 
657 void BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
658  emitter.emitVarInt(numberingState.getNumber(op->getName()));
659 
660  // Emit a mask for the operation components. We need to fill this in later
661  // (when we actually know what needs to be emitted), so emit a placeholder for
662  // now.
663  uint64_t maskOffset = emitter.size();
664  uint8_t opEncodingMask = 0;
665  emitter.emitByte(0);
666 
667  // Emit the location for this operation.
668  emitter.emitVarInt(numberingState.getNumber(op->getLoc()));
669 
670  // Emit the attributes of this operation.
671  DictionaryAttr attrs = op->getAttrDictionary();
672  if (!attrs.empty()) {
673  opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
674  emitter.emitVarInt(numberingState.getNumber(op->getAttrDictionary()));
675  }
676 
677  // Emit the result types of the operation.
678  if (unsigned numResults = op->getNumResults()) {
679  opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
680  emitter.emitVarInt(numResults);
681  for (Type type : op->getResultTypes())
682  emitter.emitVarInt(numberingState.getNumber(type));
683  }
684 
685  // Emit the operands of the operation.
686  if (unsigned numOperands = op->getNumOperands()) {
687  opEncodingMask |= bytecode::OpEncodingMask::kHasOperands;
688  emitter.emitVarInt(numOperands);
689  for (Value operand : op->getOperands())
690  emitter.emitVarInt(numberingState.getNumber(operand));
691  }
692 
693  // Emit the successors of the operation.
694  if (unsigned numSuccessors = op->getNumSuccessors()) {
696  emitter.emitVarInt(numSuccessors);
697  for (Block *successor : op->getSuccessors())
698  emitter.emitVarInt(numberingState.getNumber(successor));
699  }
700 
701  // Check for regions.
702  unsigned numRegions = op->getNumRegions();
703  if (numRegions)
705 
706  // Update the mask for the operation.
707  emitter.patchByte(maskOffset, opEncodingMask);
708 
709  // With the mask emitted, we can now emit the regions of the operation. We do
710  // this after mask emission to avoid offset complications that may arise by
711  // emitting the regions first (e.g. if the regions are huge, backpatching the
712  // op encoding mask is more annoying).
713  if (numRegions) {
714  bool isIsolatedFromAbove = op->hasTrait<OpTrait::IsIsolatedFromAbove>();
715  emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove);
716 
717  for (Region &region : op->getRegions())
718  writeRegion(emitter, &region);
719  }
720 }
721 
722 void BytecodeWriter::writeRegion(EncodingEmitter &emitter, Region *region) {
723  // If the region is empty, we only need to emit the number of blocks (which is
724  // zero).
725  if (region->empty())
726  return emitter.emitVarInt(/*numBlocks*/ 0);
727 
728  // Emit the number of blocks and values within the region.
729  unsigned numBlocks, numValues;
730  std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
731  emitter.emitVarInt(numBlocks);
732  emitter.emitVarInt(numValues);
733 
734  // Emit the blocks within the region.
735  for (Block &block : *region)
736  writeBlock(emitter, &block);
737 }
738 
739 void BytecodeWriter::writeIRSection(EncodingEmitter &emitter, Operation *op) {
740  EncodingEmitter irEmitter;
741 
742  // Write the IR section the same way as a block with no arguments. Note that
743  // the low-bit of the operation count for a block is used to indicate if the
744  // block has arguments, which in this case is always false.
745  irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false);
746 
747  // Emit the operations.
748  writeOp(irEmitter, op);
749 
750  emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter));
751 }
752 
753 //===----------------------------------------------------------------------===//
754 // Resources
755 
756 namespace {
757 /// This class represents a resource builder implementation for the MLIR
758 /// bytecode format.
759 class ResourceBuilder : public AsmResourceBuilder {
760 public:
761  using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
762 
763  ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
764  PostProcessFn postProcessFn)
765  : emitter(emitter), stringSection(stringSection),
766  postProcessFn(postProcessFn) {}
767  ~ResourceBuilder() override = default;
768 
769  void buildBlob(StringRef key, ArrayRef<char> data,
770  uint32_t dataAlignment) final {
771  emitter.emitOwnedBlobAndAlignment(data, dataAlignment);
772  postProcessFn(key, AsmResourceEntryKind::Blob);
773  }
774  void buildBool(StringRef key, bool data) final {
775  emitter.emitByte(data);
776  postProcessFn(key, AsmResourceEntryKind::Bool);
777  }
778  void buildString(StringRef key, StringRef data) final {
779  emitter.emitVarInt(stringSection.insert(data));
780  postProcessFn(key, AsmResourceEntryKind::String);
781  }
782 
783 private:
784  EncodingEmitter &emitter;
785  StringSectionBuilder &stringSection;
786  PostProcessFn postProcessFn;
787 };
788 } // namespace
789 
790 void BytecodeWriter::writeResourceSection(
791  Operation *op, EncodingEmitter &emitter,
792  const BytecodeWriterConfig::Impl &config) {
793  EncodingEmitter resourceEmitter;
794  EncodingEmitter resourceOffsetEmitter;
795  uint64_t prevOffset = 0;
797  curResourceEntries;
798 
799  // Functor used to process the offset for a resource of `kind` defined by
800  // 'key'.
801  auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
802  uint64_t curOffset = resourceEmitter.size();
803  curResourceEntries.emplace_back(key, kind, curOffset - prevOffset);
804  prevOffset = curOffset;
805  };
806 
807  // Functor used to emit a resource group defined by 'key'.
808  auto emitResourceGroup = [&](uint64_t key) {
809  resourceOffsetEmitter.emitVarInt(key);
810  resourceOffsetEmitter.emitVarInt(curResourceEntries.size());
811  for (auto [key, kind, size] : curResourceEntries) {
812  resourceOffsetEmitter.emitVarInt(stringSection.insert(key));
813  resourceOffsetEmitter.emitVarInt(size);
814  resourceOffsetEmitter.emitByte(kind);
815  }
816  };
817 
818  // Builder used to emit resources.
819  ResourceBuilder entryBuilder(resourceEmitter, stringSection,
820  appendResourceOffset);
821 
822  // Emit the external resource entries.
823  resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size());
824  for (const auto &printer : config.externalResourcePrinters) {
825  curResourceEntries.clear();
826  printer->buildResources(op, entryBuilder);
827  emitResourceGroup(stringSection.insert(printer->getName()));
828  }
829 
830  // Emit the dialect resource entries.
831  for (DialectNumbering &dialect : numberingState.getDialects()) {
832  if (!dialect.asmInterface)
833  continue;
834  curResourceEntries.clear();
835  dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder);
836 
837  // Emit the declaration resources for this dialect, these didn't get emitted
838  // by the interface. These resources don't have data attached, so just use a
839  // "blob" kind as a placeholder.
840  for (const auto &resource : dialect.resourceMap)
841  if (resource.second->isDeclaration)
842  appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
843 
844  // Emit the resource group for this dialect.
845  if (!curResourceEntries.empty())
846  emitResourceGroup(dialect.number);
847  }
848 
849  // If we didn't emit any resource groups, elide the resource sections.
850  if (resourceOffsetEmitter.size() == 0)
851  return;
852 
853  emitter.emitSection(bytecode::Section::kResourceOffset,
854  std::move(resourceOffsetEmitter));
855  emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter));
856 }
857 
858 //===----------------------------------------------------------------------===//
859 // Strings
860 
861 void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
862  EncodingEmitter stringEmitter;
863  stringSection.write(stringEmitter);
864  emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter));
865 }
866 
867 //===----------------------------------------------------------------------===//
868 // Entry Points
869 //===----------------------------------------------------------------------===//
870 
871 void mlir::writeBytecodeToFile(Operation *op, raw_ostream &os,
872  const BytecodeWriterConfig &config) {
873  BytecodeWriter writer(op);
874  writer.write(op, os, config.getImpl());
875 }
static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, EntryCallbackT &&callback)
Write the given entries in contiguous groups with the same parent dialect.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This class represents an opaque handle to a dialect resource entry.
This class is used to build resource entries for use by the printer.
Definition: AsmState.h:237
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:304
Block represents an ordered list of Operations.
Definition: Block.h:30
BlockArgListType getArguments()
Definition: Block.h:76
This class contains the configuration used for the bytecode writer.
BytecodeWriterConfig(StringRef producer="MLIR" LLVM_VERSION_STRING)
producer is an optional string that can be used to identify the producer of the bytecode when reading...
void attachFallbackResourcePrinter(FallbackAsmResourceMap &map)
Attach resource printers to the AsmState for the fallback resources in the given map.
const Impl & getImpl() const
Return an instance of the internal implementation.
void attachResourcePrinter(std::unique_ptr< AsmResourcePrinter > printer)
Attach the given resource printer to the writer configuration.
This class defines a virtual interface for writing to a bytecode stream, providing hooks into the byt...
A fallback map containing external resources not explicitly handled by another parser/printer.
Definition: AsmState.h:410
This class provides the API for ops that are known to be isolated from above.
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:75
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:592
unsigned getNumSuccessors()
Definition: Operation.h:570
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:537
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:207
unsigned getNumOperands()
Definition: Operation.h:325
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:540
DictionaryAttr getAttrDictionary()
Return all of the attributes on this operation as a DictionaryAttr.
Definition: Operation.h:421
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:103
result_type_range getResultTypes()
Definition: Operation.h:407
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:357
SuccessorRange getSuccessors()
Definition: Operation.h:567
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:383
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
bool empty()
Definition: Region.h:60
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:93
This class manages numbering IR entities in preparation of bytecode emission.
Definition: IRNumbering.h:134
@ kAttrType
This section contains the attributes and types referenced within an IR module.
Definition: Encoding.h:50
@ kAttrTypeOffset
This section contains the offsets for the attribute and types within the AttrType section.
Definition: Encoding.h:54
@ kIR
This section contains the list of operations serialized into the bytecode, and their nested regions/o...
Definition: Encoding.h:58
@ kResource
This section contains the resources of the bytecode.
Definition: Encoding.h:61
@ kResourceOffset
This section contains the offsets of resources within the Resource section.
Definition: Encoding.h:65
@ kDialect
This section contains the dialects referenced within an IR module.
Definition: Encoding.h:46
@ kString
This section contains strings referenced within the bytecode.
Definition: Encoding.h:43
@ kDialectVersions
This section contains the versions of each dialect.
Definition: Encoding.h:68
@ kAlignmentByte
An arbitrary value used to fill alignment padding.
Definition: Encoding.h:33
@ kVersion
The current bytecode version.
Definition: Encoding.h:30
This header declares functions that assit transformations in the MemRef dialect.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
AsmResourceEntryKind
This enum represents the different kinds of resource values.
Definition: AsmState.h:270
@ String
A string value.
@ Bool
A boolean value.
@ Blob
A blob of data with an accompanying alignment.
void writeBytecodeToFile(Operation *op, raw_ostream &os, const BytecodeWriterConfig &config={})
Write the bytecode for the given operation to the provided output stream.
StringRef producer
The producer of the bytecode.
SmallVector< std::unique_ptr< AsmResourcePrinter > > externalResourcePrinters
A collection of non-dialect resource printers.
This class represents a numbering entry for an Dialect.
Definition: IRNumbering.h:105
unsigned number
The number assigned to the dialect.
Definition: IRNumbering.h:113
This class represents the numbering entry of an operation name.
Definition: IRNumbering.h:64