MLIR  16.0.0git
BytecodeWriter.cpp
Go to the documentation of this file.
1 //===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "../Encoding.h"
11 #include "IRNumbering.h"
13 #include "mlir/IR/BuiltinDialect.h"
15 #include "llvm/ADT/CachedHashString.h"
16 #include "llvm/ADT/MapVector.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/Support/Debug.h"
19 #include <random>
20 
21 #define DEBUG_TYPE "mlir-bytecode-writer"
22 
23 using namespace mlir;
24 using namespace mlir::bytecode::detail;
25 
26 //===----------------------------------------------------------------------===//
27 // BytecodeWriterConfig
28 //===----------------------------------------------------------------------===//
29 
31  Impl(StringRef producer) : producer(producer) {}
32 
33  /// The producer of the bytecode.
34  StringRef producer;
35 
36  /// A collection of non-dialect resource printers.
38 };
39 
41  : impl(std::make_unique<Impl>(producer)) {}
43  StringRef producer)
44  : BytecodeWriterConfig(producer) {
46 }
48 
50  std::unique_ptr<AsmResourcePrinter> printer) {
51  impl->externalResourcePrinters.emplace_back(std::move(printer));
52 }
53 
54 //===----------------------------------------------------------------------===//
55 // EncodingEmitter
56 //===----------------------------------------------------------------------===//
57 
58 namespace {
59 /// This class functions as the underlying encoding emitter for the bytecode
60 /// writer. This class is a bit different compared to other types of encoders;
61 /// it does not use a single buffer, but instead may contain several buffers
62 /// (some owned by the writer, and some not) that get concatted during the final
63 /// emission.
64 class EncodingEmitter {
65 public:
66  EncodingEmitter() = default;
67  EncodingEmitter(const EncodingEmitter &) = delete;
68  EncodingEmitter &operator=(const EncodingEmitter &) = delete;
69 
70  /// Write the current contents to the provided stream.
71  void writeTo(raw_ostream &os) const;
72 
73  /// Return the current size of the encoded buffer.
74  size_t size() const { return prevResultSize + currentResult.size(); }
75 
76  //===--------------------------------------------------------------------===//
77  // Emission
78  //===--------------------------------------------------------------------===//
79 
80  /// Backpatch a byte in the result buffer at the given offset.
81  void patchByte(uint64_t offset, uint8_t value) {
82  assert(offset < size() && offset >= prevResultSize &&
83  "cannot patch previously emitted data");
84  currentResult[offset - prevResultSize] = value;
85  }
86 
87  /// Emit the provided blob of data, which is owned by the caller and is
88  /// guaranteed to not die before the end of the bytecode process.
89  void emitOwnedBlob(ArrayRef<uint8_t> data) {
90  // Push the current buffer before adding the provided data.
91  appendResult(std::move(currentResult));
92  appendOwnedResult(data);
93  }
94 
95  /// Emit the provided blob of data that has the given alignment, which is
96  /// owned by the caller and is guaranteed to not die before the end of the
97  /// bytecode process. The alignment value is also encoded, making it available
98  /// on load.
99  void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment) {
100  emitVarInt(alignment);
101  emitVarInt(data.size());
102 
103  alignTo(alignment);
104  emitOwnedBlob(data);
105  }
106  void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment) {
107  ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
108  data.size());
109  emitOwnedBlobAndAlignment(castedData, alignment);
110  }
111 
112  /// Align the emitter to the given alignment.
113  void alignTo(unsigned alignment) {
114  if (alignment < 2)
115  return;
116  assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
117 
118  // Check to see if we need to emit any padding bytes to meet the desired
119  // alignment.
120  size_t curOffset = size();
121  size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
122  while (paddingSize--)
123  emitByte(bytecode::kAlignmentByte);
124 
125  // Keep track of the maximum required alignment.
126  requiredAlignment = std::max(requiredAlignment, alignment);
127  }
128 
129  //===--------------------------------------------------------------------===//
130  // Integer Emission
131 
132  /// Emit a single byte.
133  template <typename T>
134  void emitByte(T byte) {
135  currentResult.push_back(static_cast<uint8_t>(byte));
136  }
137 
138  /// Emit a range of bytes.
139  void emitBytes(ArrayRef<uint8_t> bytes) {
140  llvm::append_range(currentResult, bytes);
141  }
142 
143  /// Emit a variable length integer. The first encoded byte contains a prefix
144  /// in the low bits indicating the encoded length of the value. This length
145  /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
146  /// indicate the number of _additional_ bytes (not including the prefix byte).
147  /// All remaining bits in the first byte, along with all of the bits in
148  /// additional bytes, provide the value of the integer encoded in
149  /// little-endian order.
150  void emitVarInt(uint64_t value) {
151  // In the most common case, the value can be represented in a single byte.
152  // Given how hot this case is, explicitly handle that here.
153  if ((value >> 7) == 0)
154  return emitByte((value << 1) | 0x1);
155  emitMultiByteVarInt(value);
156  }
157 
158  /// Emit a signed variable length integer. Signed varints are encoded using
159  /// a varint with zigzag encoding, meaning that we use the low bit of the
160  /// value to indicate the sign of the value. This allows for more efficient
161  /// encoding of negative values by limiting the number of active bits
162  void emitSignedVarInt(uint64_t value) {
163  emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63));
164  }
165 
166  /// Emit a variable length integer whose low bit is used to encode the
167  /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
168  void emitVarIntWithFlag(uint64_t value, bool flag) {
169  emitVarInt((value << 1) | (flag ? 1 : 0));
170  }
171 
172  //===--------------------------------------------------------------------===//
173  // String Emission
174 
175  /// Emit the given string as a nul terminated string.
176  void emitNulTerminatedString(StringRef str) {
177  emitString(str);
178  emitByte(0);
179  }
180 
181  /// Emit the given string without a nul terminator.
182  void emitString(StringRef str) {
183  emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()});
184  }
185 
186  //===--------------------------------------------------------------------===//
187  // Section Emission
188 
189  /// Emit a nested section of the given code, whose contents are encoded in the
190  /// provided emitter.
191  void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
192  // Emit the section code and length. The high bit of the code is used to
193  // indicate whether the section alignment is present, so save an offset to
194  // it.
195  uint64_t codeOffset = currentResult.size();
196  emitByte(code);
197  emitVarInt(emitter.size());
198 
199  // Integrate the alignment of the section into this emitter if necessary.
200  unsigned emitterAlign = emitter.requiredAlignment;
201  if (emitterAlign > 1) {
202  if (size() & (emitterAlign - 1)) {
203  emitVarInt(emitterAlign);
204  alignTo(emitterAlign);
205 
206  // Indicate that we needed to align the section, the high bit of the
207  // code field is used for this.
208  currentResult[codeOffset] |= 0b10000000;
209  } else {
210  // Otherwise, if we happen to be at a compatible offset, we just
211  // remember that we need this alignment.
212  requiredAlignment = std::max(requiredAlignment, emitterAlign);
213  }
214  }
215 
216  // Push our current buffer and then merge the provided section body into
217  // ours.
218  appendResult(std::move(currentResult));
219  for (std::vector<uint8_t> &result : emitter.prevResultStorage)
220  prevResultStorage.push_back(std::move(result));
221  llvm::append_range(prevResultList, emitter.prevResultList);
222  prevResultSize += emitter.prevResultSize;
223  appendResult(std::move(emitter.currentResult));
224  }
225 
226 private:
227  /// Emit the given value using a variable width encoding. This method is a
228  /// fallback when the number of bytes needed to encode the value is greater
229  /// than 1. We mark it noinline here so that the single byte hot path isn't
230  /// pessimized.
231  LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value);
232 
233  /// Append a new result buffer to the current contents.
234  void appendResult(std::vector<uint8_t> &&result) {
235  if (result.empty())
236  return;
237  prevResultStorage.emplace_back(std::move(result));
238  appendOwnedResult(prevResultStorage.back());
239  }
240  void appendOwnedResult(ArrayRef<uint8_t> result) {
241  if (result.empty())
242  return;
243  prevResultSize += result.size();
244  prevResultList.emplace_back(result);
245  }
246 
247  /// The result of the emitter currently being built. We refrain from building
248  /// a single buffer to simplify emitting sections, large data, and more. The
249  /// result is thus represented using multiple distinct buffers, some of which
250  /// we own (via prevResultStorage), and some of which are just pointers into
251  /// externally owned buffers.
252  std::vector<uint8_t> currentResult;
253  std::vector<ArrayRef<uint8_t>> prevResultList;
254  std::vector<std::vector<uint8_t>> prevResultStorage;
255 
256  /// An up-to-date total size of all of the buffers within `prevResultList`.
257  /// This enables O(1) size checks of the current encoding.
258  size_t prevResultSize = 0;
259 
260  /// The highest required alignment for the start of this section.
261  unsigned requiredAlignment = 1;
262 };
263 
264 /// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
265 /// to go through an intermediate buffer when interacting with code that wants a
266 /// raw_ostream.
267 class RawEmitterOstream : public raw_ostream {
268 public:
269  explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
270  SetUnbuffered();
271  }
272 
273 private:
274  void write_impl(const char *ptr, size_t size) override {
275  emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size});
276  }
277  uint64_t current_pos() const override { return emitter.size(); }
278 
279  /// The section being emitted to.
280  EncodingEmitter &emitter;
281 };
282 } // namespace
283 
284 void EncodingEmitter::writeTo(raw_ostream &os) const {
285  for (auto &prevResult : prevResultList)
286  os.write((const char *)prevResult.data(), prevResult.size());
287  os.write((const char *)currentResult.data(), currentResult.size());
288 }
289 
290 void EncodingEmitter::emitMultiByteVarInt(uint64_t value) {
291  // Compute the number of bytes needed to encode the value. Each byte can hold
292  // up to 7-bits of data. We only check up to the number of bits we can encode
293  // in the first byte (8).
294  uint64_t it = value >> 7;
295  for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
296  if (LLVM_LIKELY(it >>= 7) == 0) {
297  uint64_t encodedValue = (value << 1) | 0x1;
298  encodedValue <<= (numBytes - 1);
299  emitBytes({reinterpret_cast<uint8_t *>(&encodedValue), numBytes});
300  return;
301  }
302  }
303 
304  // If the value is too large to encode in a single byte, emit a special all
305  // zero marker byte and splat the value directly.
306  emitByte(0);
307  emitBytes({reinterpret_cast<uint8_t *>(&value), sizeof(value)});
308 }
309 
310 //===----------------------------------------------------------------------===//
311 // StringSectionBuilder
312 //===----------------------------------------------------------------------===//
313 
314 namespace {
315 /// This class is used to simplify the process of emitting the string section.
316 class StringSectionBuilder {
317 public:
318  /// Add the given string to the string section, and return the index of the
319  /// string within the section.
320  size_t insert(StringRef str) {
321  auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()});
322  return it.first->second;
323  }
324 
325  /// Write the current set of strings to the given emitter.
326  void write(EncodingEmitter &emitter) {
327  emitter.emitVarInt(strings.size());
328 
329  // Emit the sizes in reverse order, so that we don't need to backpatch an
330  // offset to the string data or have a separate section.
331  for (const auto &it : llvm::reverse(strings))
332  emitter.emitVarInt(it.first.size() + 1);
333  // Emit the string data itself.
334  for (const auto &it : strings)
335  emitter.emitNulTerminatedString(it.first.val());
336  }
337 
338 private:
339  /// A set of strings referenced within the bytecode. The value of the map is
340  /// unused.
341  llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
342 };
343 } // namespace
344 
345 //===----------------------------------------------------------------------===//
346 // Bytecode Writer
347 //===----------------------------------------------------------------------===//
348 
349 namespace {
350 class BytecodeWriter {
351 public:
352  BytecodeWriter(Operation *op) : numberingState(op) {}
353 
354  /// Write the bytecode for the given root operation.
355  void write(Operation *rootOp, raw_ostream &os,
356  const BytecodeWriterConfig::Impl &config);
357 
358 private:
359  //===--------------------------------------------------------------------===//
360  // Dialects
361 
362  void writeDialectSection(EncodingEmitter &emitter);
363 
364  //===--------------------------------------------------------------------===//
365  // Attributes and Types
366 
367  void writeAttrTypeSection(EncodingEmitter &emitter);
368 
369  //===--------------------------------------------------------------------===//
370  // Operations
371 
372  void writeBlock(EncodingEmitter &emitter, Block *block);
373  void writeOp(EncodingEmitter &emitter, Operation *op);
374  void writeRegion(EncodingEmitter &emitter, Region *region);
375  void writeIRSection(EncodingEmitter &emitter, Operation *op);
376 
377  //===--------------------------------------------------------------------===//
378  // Resources
379 
380  void writeResourceSection(Operation *op, EncodingEmitter &emitter,
381  const BytecodeWriterConfig::Impl &config);
382 
383  //===--------------------------------------------------------------------===//
384  // Strings
385 
386  void writeStringSection(EncodingEmitter &emitter);
387 
388  //===--------------------------------------------------------------------===//
389  // Fields
390 
391  /// The builder used for the string section.
392  StringSectionBuilder stringSection;
393 
394  /// The IR numbering state generated for the root operation.
395  IRNumberingState numberingState;
396 };
397 } // namespace
398 
399 void BytecodeWriter::write(Operation *rootOp, raw_ostream &os,
400  const BytecodeWriterConfig::Impl &config) {
401  EncodingEmitter emitter;
402 
403  // Emit the bytecode file header. This is how we identify the output as a
404  // bytecode file.
405  emitter.emitString("ML\xefR");
406 
407  // Emit the bytecode version.
408  emitter.emitVarInt(bytecode::kVersion);
409 
410  // Emit the producer.
411  emitter.emitNulTerminatedString(config.producer);
412 
413  // Emit the dialect section.
414  writeDialectSection(emitter);
415 
416  // Emit the attributes and types section.
417  writeAttrTypeSection(emitter);
418 
419  // Emit the IR section.
420  writeIRSection(emitter, rootOp);
421 
422  // Emit the resources section.
423  writeResourceSection(rootOp, emitter, config);
424 
425  // Emit the string section.
426  writeStringSection(emitter);
427 
428  // Write the generated bytecode to the provided output stream.
429  emitter.writeTo(os);
430 }
431 
432 //===----------------------------------------------------------------------===//
433 // Dialects
434 
435 /// Write the given entries in contiguous groups with the same parent dialect.
436 /// Each dialect sub-group is encoded with the parent dialect and number of
437 /// elements, followed by the encoding for the entries. The given callback is
438 /// invoked to encode each individual entry.
439 template <typename EntriesT, typename EntryCallbackT>
440 static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
441  EntryCallbackT &&callback) {
442  for (auto it = entries.begin(), e = entries.end(); it != e;) {
443  auto groupStart = it++;
444 
445  // Find the end of the group that shares the same parent dialect.
446  DialectNumbering *currentDialect = groupStart->dialect;
447  it = std::find_if(it, e, [&](const auto &entry) {
448  return entry.dialect != currentDialect;
449  });
450 
451  // Emit the dialect and number of elements.
452  emitter.emitVarInt(currentDialect->number);
453  emitter.emitVarInt(std::distance(groupStart, it));
454 
455  // Emit the entries within the group.
456  for (auto &entry : llvm::make_range(groupStart, it))
457  callback(entry);
458  }
459 }
460 
461 void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
462  EncodingEmitter dialectEmitter;
463 
464  // Emit the referenced dialects.
465  auto dialects = numberingState.getDialects();
466  dialectEmitter.emitVarInt(llvm::size(dialects));
467  for (DialectNumbering &dialect : dialects)
468  dialectEmitter.emitVarInt(stringSection.insert(dialect.name));
469 
470  // Emit the referenced operation names grouped by dialect.
471  auto emitOpName = [&](OpNameNumbering &name) {
472  dialectEmitter.emitVarInt(stringSection.insert(name.name.stripDialect()));
473  };
474  writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
475 
476  emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter));
477 }
478 
479 //===----------------------------------------------------------------------===//
480 // Attributes and Types
481 
482 namespace {
483 class DialectWriter : public DialectBytecodeWriter {
484 public:
485  DialectWriter(EncodingEmitter &emitter, IRNumberingState &numberingState,
486  StringSectionBuilder &stringSection)
487  : emitter(emitter), numberingState(numberingState),
488  stringSection(stringSection) {}
489 
490  //===--------------------------------------------------------------------===//
491  // IR
492  //===--------------------------------------------------------------------===//
493 
494  void writeAttribute(Attribute attr) override {
495  emitter.emitVarInt(numberingState.getNumber(attr));
496  }
497  void writeType(Type type) override {
498  emitter.emitVarInt(numberingState.getNumber(type));
499  }
500 
501  void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
502  emitter.emitVarInt(numberingState.getNumber(resource));
503  }
504 
505  //===--------------------------------------------------------------------===//
506  // Primitives
507  //===--------------------------------------------------------------------===//
508 
509  void writeVarInt(uint64_t value) override { emitter.emitVarInt(value); }
510 
511  void writeSignedVarInt(int64_t value) override {
512  emitter.emitSignedVarInt(value);
513  }
514 
515  void writeAPIntWithKnownWidth(const APInt &value) override {
516  size_t bitWidth = value.getBitWidth();
517 
518  // If the value is a single byte, just emit it directly without going
519  // through a varint.
520  if (bitWidth <= 8)
521  return emitter.emitByte(value.getLimitedValue());
522 
523  // If the value fits within a single varint, emit it directly.
524  if (bitWidth <= 64)
525  return emitter.emitSignedVarInt(value.getLimitedValue());
526 
527  // Otherwise, we need to encode a variable number of active words. We use
528  // active words instead of the number of total words under the observation
529  // that smaller values will be more common.
530  unsigned numActiveWords = value.getActiveWords();
531  emitter.emitVarInt(numActiveWords);
532 
533  const uint64_t *rawValueData = value.getRawData();
534  for (unsigned i = 0; i < numActiveWords; ++i)
535  emitter.emitSignedVarInt(rawValueData[i]);
536  }
537 
538  void writeAPFloatWithKnownSemantics(const APFloat &value) override {
539  writeAPIntWithKnownWidth(value.bitcastToAPInt());
540  }
541 
542  void writeOwnedString(StringRef str) override {
543  emitter.emitVarInt(stringSection.insert(str));
544  }
545 
546  void writeOwnedBlob(ArrayRef<char> blob) override {
547  emitter.emitVarInt(blob.size());
548  emitter.emitOwnedBlob(ArrayRef<uint8_t>(
549  reinterpret_cast<const uint8_t *>(blob.data()), blob.size()));
550  }
551 
552 private:
553  EncodingEmitter &emitter;
554  IRNumberingState &numberingState;
555  StringSectionBuilder &stringSection;
556 };
557 } // namespace
558 
559 void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
560  EncodingEmitter attrTypeEmitter;
561  EncodingEmitter offsetEmitter;
562  offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()));
563  offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()));
564 
565  // A functor used to emit an attribute or type entry.
566  uint64_t prevOffset = 0;
567  auto emitAttrOrType = [&](auto &entry) {
568  auto entryValue = entry.getValue();
569 
570  // First, try to emit this entry using the dialect bytecode interface.
571  bool hasCustomEncoding = false;
572  if (const BytecodeDialectInterface *interface = entry.dialect->interface) {
573  // The writer used when emitting using a custom bytecode encoding.
574  DialectWriter dialectWriter(attrTypeEmitter, numberingState,
575  stringSection);
576 
577  if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
578  // TODO: We don't currently support custom encoded mutable types.
579  hasCustomEncoding =
580  !entryValue.template hasTrait<TypeTrait::IsMutable>() &&
581  succeeded(interface->writeType(entryValue, dialectWriter));
582  } else {
583  // TODO: We don't currently support custom encoded mutable attributes.
584  hasCustomEncoding =
585  !entryValue.template hasTrait<AttributeTrait::IsMutable>() &&
586  succeeded(interface->writeAttribute(entryValue, dialectWriter));
587  }
588  }
589 
590  // If the entry was not emitted using the dialect interface, emit it using
591  // the textual format.
592  if (!hasCustomEncoding) {
593  RawEmitterOstream(attrTypeEmitter) << entryValue;
594  attrTypeEmitter.emitByte(0);
595  }
596 
597  // Record the offset of this entry.
598  uint64_t curOffset = attrTypeEmitter.size();
599  offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding);
600  prevOffset = curOffset;
601  };
602 
603  // Emit the attribute and type entries for each dialect.
604  writeDialectGrouping(offsetEmitter, numberingState.getAttributes(),
605  emitAttrOrType);
606  writeDialectGrouping(offsetEmitter, numberingState.getTypes(),
607  emitAttrOrType);
608 
609  // Emit the sections to the stream.
610  emitter.emitSection(bytecode::Section::kAttrTypeOffset,
611  std::move(offsetEmitter));
612  emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter));
613 }
614 
615 //===----------------------------------------------------------------------===//
616 // Operations
617 
618 void BytecodeWriter::writeBlock(EncodingEmitter &emitter, Block *block) {
619  ArrayRef<BlockArgument> args = block->getArguments();
620  bool hasArgs = !args.empty();
621 
622  // Emit the number of operations in this block, and if it has arguments. We
623  // use the low bit of the operation count to indicate if the block has
624  // arguments.
625  unsigned numOps = numberingState.getOperationCount(block);
626  emitter.emitVarIntWithFlag(numOps, hasArgs);
627 
628  // Emit the arguments of the block.
629  if (hasArgs) {
630  emitter.emitVarInt(args.size());
631  for (BlockArgument arg : args) {
632  emitter.emitVarInt(numberingState.getNumber(arg.getType()));
633  emitter.emitVarInt(numberingState.getNumber(arg.getLoc()));
634  }
635  }
636 
637  // Emit the operations within the block.
638  for (Operation &op : *block)
639  writeOp(emitter, &op);
640 }
641 
642 void BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
643  emitter.emitVarInt(numberingState.getNumber(op->getName()));
644 
645  // Emit a mask for the operation components. We need to fill this in later
646  // (when we actually know what needs to be emitted), so emit a placeholder for
647  // now.
648  uint64_t maskOffset = emitter.size();
649  uint8_t opEncodingMask = 0;
650  emitter.emitByte(0);
651 
652  // Emit the location for this operation.
653  emitter.emitVarInt(numberingState.getNumber(op->getLoc()));
654 
655  // Emit the attributes of this operation.
656  DictionaryAttr attrs = op->getAttrDictionary();
657  if (!attrs.empty()) {
658  opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
659  emitter.emitVarInt(numberingState.getNumber(op->getAttrDictionary()));
660  }
661 
662  // Emit the result types of the operation.
663  if (unsigned numResults = op->getNumResults()) {
664  opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
665  emitter.emitVarInt(numResults);
666  for (Type type : op->getResultTypes())
667  emitter.emitVarInt(numberingState.getNumber(type));
668  }
669 
670  // Emit the operands of the operation.
671  if (unsigned numOperands = op->getNumOperands()) {
672  opEncodingMask |= bytecode::OpEncodingMask::kHasOperands;
673  emitter.emitVarInt(numOperands);
674  for (Value operand : op->getOperands())
675  emitter.emitVarInt(numberingState.getNumber(operand));
676  }
677 
678  // Emit the successors of the operation.
679  if (unsigned numSuccessors = op->getNumSuccessors()) {
681  emitter.emitVarInt(numSuccessors);
682  for (Block *successor : op->getSuccessors())
683  emitter.emitVarInt(numberingState.getNumber(successor));
684  }
685 
686  // Check for regions.
687  unsigned numRegions = op->getNumRegions();
688  if (numRegions)
690 
691  // Update the mask for the operation.
692  emitter.patchByte(maskOffset, opEncodingMask);
693 
694  // With the mask emitted, we can now emit the regions of the operation. We do
695  // this after mask emission to avoid offset complications that may arise by
696  // emitting the regions first (e.g. if the regions are huge, backpatching the
697  // op encoding mask is more annoying).
698  if (numRegions) {
699  bool isIsolatedFromAbove = op->hasTrait<OpTrait::IsIsolatedFromAbove>();
700  emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove);
701 
702  for (Region &region : op->getRegions())
703  writeRegion(emitter, &region);
704  }
705 }
706 
707 void BytecodeWriter::writeRegion(EncodingEmitter &emitter, Region *region) {
708  // If the region is empty, we only need to emit the number of blocks (which is
709  // zero).
710  if (region->empty())
711  return emitter.emitVarInt(/*numBlocks*/ 0);
712 
713  // Emit the number of blocks and values within the region.
714  unsigned numBlocks, numValues;
715  std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
716  emitter.emitVarInt(numBlocks);
717  emitter.emitVarInt(numValues);
718 
719  // Emit the blocks within the region.
720  for (Block &block : *region)
721  writeBlock(emitter, &block);
722 }
723 
724 void BytecodeWriter::writeIRSection(EncodingEmitter &emitter, Operation *op) {
725  EncodingEmitter irEmitter;
726 
727  // Write the IR section the same way as a block with no arguments. Note that
728  // the low-bit of the operation count for a block is used to indicate if the
729  // block has arguments, which in this case is always false.
730  irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false);
731 
732  // Emit the operations.
733  writeOp(irEmitter, op);
734 
735  emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter));
736 }
737 
738 //===----------------------------------------------------------------------===//
739 // Resources
740 
741 namespace {
742 /// This class represents a resource builder implementation for the MLIR
743 /// bytecode format.
744 class ResourceBuilder : public AsmResourceBuilder {
745 public:
746  using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
747 
748  ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
749  PostProcessFn postProcessFn)
750  : emitter(emitter), stringSection(stringSection),
751  postProcessFn(postProcessFn) {}
752  ~ResourceBuilder() override = default;
753 
754  void buildBlob(StringRef key, ArrayRef<char> data,
755  uint32_t dataAlignment) final {
756  emitter.emitOwnedBlobAndAlignment(data, dataAlignment);
757  postProcessFn(key, AsmResourceEntryKind::Blob);
758  }
759  void buildBool(StringRef key, bool data) final {
760  emitter.emitByte(data);
761  postProcessFn(key, AsmResourceEntryKind::Bool);
762  }
763  void buildString(StringRef key, StringRef data) final {
764  emitter.emitVarInt(stringSection.insert(data));
765  postProcessFn(key, AsmResourceEntryKind::String);
766  }
767 
768 private:
769  EncodingEmitter &emitter;
770  StringSectionBuilder &stringSection;
771  PostProcessFn postProcessFn;
772 };
773 } // namespace
774 
775 void BytecodeWriter::writeResourceSection(
776  Operation *op, EncodingEmitter &emitter,
777  const BytecodeWriterConfig::Impl &config) {
778  EncodingEmitter resourceEmitter;
779  EncodingEmitter resourceOffsetEmitter;
780  uint64_t prevOffset = 0;
782  curResourceEntries;
783 
784  // Functor used to process the offset for a resource of `kind` defined by
785  // 'key'.
786  auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
787  uint64_t curOffset = resourceEmitter.size();
788  curResourceEntries.emplace_back(key, kind, curOffset - prevOffset);
789  prevOffset = curOffset;
790  };
791 
792  // Functor used to emit a resource group defined by 'key'.
793  auto emitResourceGroup = [&](uint64_t key) {
794  resourceOffsetEmitter.emitVarInt(key);
795  resourceOffsetEmitter.emitVarInt(curResourceEntries.size());
796  for (auto [key, kind, size] : curResourceEntries) {
797  resourceOffsetEmitter.emitVarInt(stringSection.insert(key));
798  resourceOffsetEmitter.emitVarInt(size);
799  resourceOffsetEmitter.emitByte(kind);
800  }
801  };
802 
803  // Builder used to emit resources.
804  ResourceBuilder entryBuilder(resourceEmitter, stringSection,
805  appendResourceOffset);
806 
807  // Emit the external resource entries.
808  resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size());
809  for (const auto &printer : config.externalResourcePrinters) {
810  curResourceEntries.clear();
811  printer->buildResources(op, entryBuilder);
812  emitResourceGroup(stringSection.insert(printer->getName()));
813  }
814 
815  // Emit the dialect resource entries.
816  for (DialectNumbering &dialect : numberingState.getDialects()) {
817  if (!dialect.asmInterface)
818  continue;
819  curResourceEntries.clear();
820  dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder);
821 
822  // Emit the declaration resources for this dialect, these didn't get emitted
823  // by the interface. These resources don't have data attached, so just use a
824  // "blob" kind as a placeholder.
825  for (const auto &resource : dialect.resourceMap)
826  if (resource.second->isDeclaration)
827  appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
828 
829  // Emit the resource group for this dialect.
830  if (!curResourceEntries.empty())
831  emitResourceGroup(dialect.number);
832  }
833 
834  // If we didn't emit any resource groups, elide the resource sections.
835  if (resourceOffsetEmitter.size() == 0)
836  return;
837 
838  emitter.emitSection(bytecode::Section::kResourceOffset,
839  std::move(resourceOffsetEmitter));
840  emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter));
841 }
842 
843 //===----------------------------------------------------------------------===//
844 // Strings
845 
846 void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
847  EncodingEmitter stringEmitter;
848  stringSection.write(stringEmitter);
849  emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter));
850 }
851 
852 //===----------------------------------------------------------------------===//
853 // Entry Points
854 //===----------------------------------------------------------------------===//
855 
856 void mlir::writeBytecodeToFile(Operation *op, raw_ostream &os,
857  const BytecodeWriterConfig &config) {
858  BytecodeWriter writer(op);
859  writer.write(op, os, config.getImpl());
860 }
static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, EntryCallbackT &&callback)
Write the given entries in contiguous groups with the same parent dialect.
static constexpr const bool value
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This class represents an opaque handle to a dialect resource entry.
This class is used to build resource entries for use by the printer.
Definition: AsmState.h:236
Attributes are known-constant values of operations.
Definition: Attributes.h:25
This class represents an argument of a Block.
Definition: Value.h:296
Block represents an ordered list of Operations.
Definition: Block.h:30
BlockArgListType getArguments()
Definition: Block.h:76
This class contains the configuration used for the bytecode writer.
BytecodeWriterConfig(StringRef producer="MLIR" LLVM_VERSION_STRING)
producer is an optional string that can be used to identify the producer of the bytecode when reading...
void attachFallbackResourcePrinter(FallbackAsmResourceMap &map)
Attach resource printers to the AsmState for the fallback resources in the given map.
const Impl & getImpl() const
Return an instance of the internal implementation.
void attachResourcePrinter(std::unique_ptr< AsmResourcePrinter > printer)
Attach the given resource printer to the writer configuration.
This class defines a virtual interface for writing to a bytecode stream, providing hooks into the byt...
A fallback map containing external resources not explicitly handled by another parser/printer.
Definition: AsmState.h:409
This class provides the API for ops that are known to be isolated from above.
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:31
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:528
unsigned getNumSuccessors()
Definition: Operation.h:506
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:477
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:154
unsigned getNumOperands()
Definition: Operation.h:263
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition: Operation.h:480
DictionaryAttr getAttrDictionary()
Return all of the attributes on this operation as a DictionaryAttr.
Definition: Operation.h:359
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:50
result_type_range getResultTypes()
Definition: Operation.h:345
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:295
SuccessorRange getSuccessors()
Definition: Operation.h:503
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:321
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
bool empty()
Definition: Region.h:60
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
This class manages numbering IR entities in preparation of bytecode emission.
Definition: IRNumbering.h:134
@ kAttrType
This section contains the attributes and types referenced within an IR module.
Definition: Encoding.h:47
@ kAttrTypeOffset
This section contains the offsets for the attribute and types within the AttrType section.
Definition: Encoding.h:51
@ kIR
This section contains the list of operations serialized into the bytecode, and their nested regions/o...
Definition: Encoding.h:55
@ kResource
This section contains the resources of the bytecode.
Definition: Encoding.h:58
@ kResourceOffset
This section contains the offsets of resources within the Resource section.
Definition: Encoding.h:62
@ kDialect
This section contains the dialects referenced within an IR module.
Definition: Encoding.h:43
@ kString
This section contains strings referenced within the bytecode.
Definition: Encoding.h:40
@ kAlignmentByte
An arbitrary value used to fill alignment padding.
Definition: Encoding.h:30
@ kVersion
The current bytecode version.
Definition: Encoding.h:27
Include the generated interface declarations.
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:68
AsmResourceEntryKind
This enum represents the different kinds of resource values.
Definition: AsmState.h:269
@ String
A string value.
@ Bool
A boolean value.
@ Blob
A blob of data with an accompanying alignment.
void writeBytecodeToFile(Operation *op, raw_ostream &os, const BytecodeWriterConfig &config={})
Write the bytecode for the given operation to the provided output stream.
StringRef producer
The producer of the bytecode.
SmallVector< std::unique_ptr< AsmResourcePrinter > > externalResourcePrinters
A collection of non-dialect resource printers.
This class represents a numbering entry for an Dialect.
Definition: IRNumbering.h:105
unsigned number
The number assigned to the dialect.
Definition: IRNumbering.h:113
This class represents the numbering entry of an operation name.
Definition: IRNumbering.h:64