MLIR 23.0.0git
BytecodeWriter.cpp
Go to the documentation of this file.
1//===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "IRNumbering.h"
14#include "mlir/IR/Attributes.h"
15#include "mlir/IR/Diagnostics.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/CachedHashString.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Support/DebugLog.h"
23#include "llvm/Support/Endian.h"
24#include "llvm/Support/raw_ostream.h"
25#include <optional>
26
27#define DEBUG_TYPE "mlir-bytecode-writer"
28
29using namespace mlir;
30using namespace mlir::bytecode::detail;
31
32//===----------------------------------------------------------------------===//
33// BytecodeWriterConfig
34//===----------------------------------------------------------------------===//
35
37 Impl(StringRef producer) : producer(producer) {}
38
39 /// Version to use when writing.
40 /// Note: This only differs from kVersion if a specific version is set.
42
43 /// A flag specifying whether to elide emission of resources into the bytecode
44 /// file.
46
47 /// A flag specifying whether to elide emission of locations.
49
50 /// A map containing dialect version information for each dialect to emit.
51 llvm::StringMap<std::unique_ptr<DialectVersion>> dialectVersionMap;
52
53 /// The producer of the bytecode.
54 StringRef producer;
55
56 /// Printer callbacks used to emit custom type and attribute encodings.
61
62 /// A collection of non-dialect resource printers.
64};
65
67 : impl(std::make_unique<Impl>(producer)) {}
75
77
80 return impl->attributeWriterCallbacks;
81}
82
85 return impl->typeWriterCallbacks;
86}
87
89 std::unique_ptr<AttrTypeBytecodeWriter<Attribute>> callback) {
90 impl->attributeWriterCallbacks.emplace_back(std::move(callback));
91}
92
94 std::unique_ptr<AttrTypeBytecodeWriter<Type>> callback) {
95 impl->typeWriterCallbacks.emplace_back(std::move(callback));
96}
97
99 std::unique_ptr<AsmResourcePrinter> printer) {
100 impl->externalResourcePrinters.emplace_back(std::move(printer));
101}
102
104 bool shouldElideResourceData) {
105 impl->shouldElideResourceData = shouldElideResourceData;
106}
107
111
113 return impl->shouldElideLocations;
114}
115
117 impl->bytecodeVersion = bytecodeVersion;
118}
119
121 return impl->bytecodeVersion;
122}
123
124llvm::StringMap<std::unique_ptr<DialectVersion>> &
126 return impl->dialectVersionMap;
127}
128
130 llvm::StringRef dialectName,
131 std::unique_ptr<DialectVersion> dialectVersion) const {
132 assert(!impl->dialectVersionMap.contains(dialectName) &&
133 "cannot override a previously set dialect version");
134 impl->dialectVersionMap.insert({dialectName, std::move(dialectVersion)});
135}
136
137//===----------------------------------------------------------------------===//
138// EncodingEmitter
139//===----------------------------------------------------------------------===//
140
141namespace {
142/// This class functions as the underlying encoding emitter for the bytecode
143/// writer. This class is a bit different compared to other types of encoders;
144/// it does not use a single buffer, but instead may contain several buffers
145/// (some owned by the writer, and some not) that get concatted during the final
146/// emission.
147class EncodingEmitter {
148public:
149 EncodingEmitter() = default;
150 EncodingEmitter(const EncodingEmitter &) = delete;
151 EncodingEmitter &operator=(const EncodingEmitter &) = delete;
152
153 /// Write the current contents to the provided stream.
154 void writeTo(raw_ostream &os) const;
155
156 /// Return the current size of the encoded buffer.
157 size_t size() const { return prevResultSize + currentResult.size(); }
158
159 //===--------------------------------------------------------------------===//
160 // Emission
161 //===--------------------------------------------------------------------===//
162
163 /// Backpatch a byte in the result buffer at the given offset.
164 void patchByte(uint64_t offset, uint8_t value, StringLiteral desc) {
165 LDBG() << "patchByte(" << offset << ',' << uint64_t(value) << ")\t" << desc;
166 assert(offset < size() && offset >= prevResultSize &&
167 "cannot patch previously emitted data");
168 currentResult[offset - prevResultSize] = value;
169 }
170
171 /// Emit the provided blob of data, which is owned by the caller and is
172 /// guaranteed to not die before the end of the bytecode process.
173 void emitOwnedBlob(ArrayRef<uint8_t> data, StringLiteral desc) {
174 LDBG() << "emitOwnedBlob(" << data.size() << "b)\t" << desc;
175 // Push the current buffer before adding the provided data.
176 appendResult(std::move(currentResult));
177 appendOwnedResult(data);
178 }
179
180 /// Emit the provided blob of data that has the given alignment, which is
181 /// owned by the caller and is guaranteed to not die before the end of the
182 /// bytecode process. The alignment value is also encoded, making it available
183 /// on load.
184 void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment,
185 StringLiteral desc) {
186 emitVarInt(alignment, desc);
187 emitVarInt(data.size(), desc);
188
189 alignTo(alignment);
190 emitOwnedBlob(data, desc);
191 }
192 void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment,
193 StringLiteral desc) {
194 ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
195 data.size());
196 emitOwnedBlobAndAlignment(castedData, alignment, desc);
197 }
198
199 /// Align the emitter to the given alignment.
200 void alignTo(unsigned alignment) {
201 if (alignment < 2)
202 return;
203 assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
204
205 // Check to see if we need to emit any padding bytes to meet the desired
206 // alignment.
207 size_t curOffset = size();
208 size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
209 while (paddingSize--)
210 emitByte(bytecode::kAlignmentByte, "alignment byte");
211
212 // Keep track of the maximum required alignment.
213 requiredAlignment = std::max(requiredAlignment, alignment);
214 }
215
216 //===--------------------------------------------------------------------===//
217 // Integer Emission
218
219 /// Emit a single byte.
220 template <typename T>
221 void emitByte(T byte, StringLiteral desc) {
222 LDBG() << "emitByte(" << uint64_t(byte) << ")\t" << desc;
223 currentResult.push_back(static_cast<uint8_t>(byte));
224 }
225
226 /// Emit a range of bytes.
227 void emitBytes(ArrayRef<uint8_t> bytes, StringLiteral desc) {
228 LDBG() << "emitBytes(" << bytes.size() << "b)\t" << desc;
229 llvm::append_range(currentResult, bytes);
230 }
231
232 /// Emit a variable length integer. The first encoded byte contains a prefix
233 /// in the low bits indicating the encoded length of the value. This length
234 /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
235 /// indicate the number of _additional_ bytes (not including the prefix byte).
236 /// All remaining bits in the first byte, along with all of the bits in
237 /// additional bytes, provide the value of the integer encoded in
238 /// little-endian order.
239 void emitVarInt(uint64_t value, StringLiteral desc) {
240 LDBG() << "emitVarInt(" << value << ")\t" << desc;
241
242 // In the most common case, the value can be represented in a single byte.
243 // Given how hot this case is, explicitly handle that here.
244 if ((value >> 7) == 0)
245 return emitByte((value << 1) | 0x1, desc);
246 emitMultiByteVarInt(value, desc);
247 }
248
249 /// Emit a signed variable length integer. Signed varints are encoded using
250 /// a varint with zigzag encoding, meaning that we use the low bit of the
251 /// value to indicate the sign of the value. This allows for more efficient
252 /// encoding of negative values by limiting the number of active bits
253 void emitSignedVarInt(uint64_t value, StringLiteral desc) {
254 emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63), desc);
255 }
256
257 /// Emit a variable length integer whose low bit is used to encode the
258 /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
259 void emitVarIntWithFlag(uint64_t value, bool flag, StringLiteral desc) {
260 emitVarInt((value << 1) | (flag ? 1 : 0), desc);
261 }
262
263 //===--------------------------------------------------------------------===//
264 // String Emission
265
266 /// Emit the given string as a nul terminated string.
267 void emitNulTerminatedString(StringRef str, StringLiteral desc) {
268 emitString(str, desc);
269 emitByte(0, "null terminator");
270 }
271
272 /// Emit the given string without a nul terminator.
273 void emitString(StringRef str, StringLiteral desc) {
274 emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()},
275 desc);
276 }
277
278 //===--------------------------------------------------------------------===//
279 // Section Emission
280
281 /// Emit a nested section of the given code, whose contents are encoded in the
282 /// provided emitter.
283 void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
284 // Emit the section code and length. The high bit of the code is used to
285 // indicate whether the section alignment is present, so save an offset to
286 // it.
287 uint64_t codeOffset = currentResult.size();
288 emitByte(code, "section code");
289 emitVarInt(emitter.size(), "section size");
290
291 // Integrate the alignment of the section into this emitter if necessary.
292 unsigned emitterAlign = emitter.requiredAlignment;
293 if (emitterAlign > 1) {
294 if (size() & (emitterAlign - 1)) {
295 emitVarInt(emitterAlign, "section alignment");
296 alignTo(emitterAlign);
297
298 // Indicate that we needed to align the section, the high bit of the
299 // code field is used for this.
300 currentResult[codeOffset] |= 0b10000000;
301 } else {
302 // Otherwise, if we happen to be at a compatible offset, we just
303 // remember that we need this alignment.
304 requiredAlignment = std::max(requiredAlignment, emitterAlign);
305 }
306 }
307
308 // Push our current buffer and then merge the provided section body into
309 // ours.
310 appendResult(std::move(currentResult));
311 for (std::vector<uint8_t> &result : emitter.prevResultStorage)
312 prevResultStorage.push_back(std::move(result));
313 llvm::append_range(prevResultList, emitter.prevResultList);
314 prevResultSize += emitter.prevResultSize;
315 appendResult(std::move(emitter.currentResult));
316 }
317
318private:
319 /// Emit the given value using a variable width encoding. This method is a
320 /// fallback when the number of bytes needed to encode the value is greater
321 /// than 1. We mark it noinline here so that the single byte hot path isn't
322 /// pessimized.
323 LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value,
324 StringLiteral desc);
325
326 /// Append a new result buffer to the current contents.
327 void appendResult(std::vector<uint8_t> &&result) {
328 if (result.empty())
329 return;
330 prevResultStorage.emplace_back(std::move(result));
331 appendOwnedResult(prevResultStorage.back());
332 }
333 void appendOwnedResult(ArrayRef<uint8_t> result) {
334 if (result.empty())
335 return;
336 prevResultSize += result.size();
337 prevResultList.emplace_back(result);
338 }
339
340 /// The result of the emitter currently being built. We refrain from building
341 /// a single buffer to simplify emitting sections, large data, and more. The
342 /// result is thus represented using multiple distinct buffers, some of which
343 /// we own (via prevResultStorage), and some of which are just pointers into
344 /// externally owned buffers.
345 std::vector<uint8_t> currentResult;
346 std::vector<ArrayRef<uint8_t>> prevResultList;
347 std::vector<std::vector<uint8_t>> prevResultStorage;
348
349 /// An up-to-date total size of all of the buffers within `prevResultList`.
350 /// This enables O(1) size checks of the current encoding.
351 size_t prevResultSize = 0;
352
353 /// The highest required alignment for the start of this section.
354 unsigned requiredAlignment = 1;
355};
356
357//===----------------------------------------------------------------------===//
358// StringSectionBuilder
359//===----------------------------------------------------------------------===//
360
361namespace {
362/// This class is used to simplify the process of emitting the string section.
363class StringSectionBuilder {
364public:
365 /// Add the given string to the string section, and return the index of the
366 /// string within the section.
367 size_t insert(StringRef str) {
368 auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()});
369 return it.first->second;
370 }
371
372 /// Write the current set of strings to the given emitter.
373 void write(EncodingEmitter &emitter) {
374 emitter.emitVarInt(strings.size(), "string section size");
375
376 // Emit the sizes in reverse order, so that we don't need to backpatch an
377 // offset to the string data or have a separate section.
378 for (const auto &it : llvm::reverse(strings))
379 emitter.emitVarInt(it.first.size() + 1, "string size");
380 // Emit the string data itself.
381 for (const auto &it : strings)
382 emitter.emitNulTerminatedString(it.first.val(), "string");
383 }
384
385private:
386 /// A set of strings referenced within the bytecode. The value of the map is
387 /// unused.
388 llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
389};
390} // namespace
391
392class DialectWriter : public DialectBytecodeWriter {
393 using DialectVersionMapT = llvm::StringMap<std::unique_ptr<DialectVersion>>;
394
395public:
396 DialectWriter(int64_t bytecodeVersion, EncodingEmitter &emitter,
397 IRNumberingState &numberingState,
398 StringSectionBuilder &stringSection,
399 const DialectVersionMapT &dialectVersionMap)
400 : bytecodeVersion(bytecodeVersion), emitter(emitter),
401 numberingState(numberingState), stringSection(stringSection),
402 dialectVersionMap(dialectVersionMap) {}
403
404 //===--------------------------------------------------------------------===//
405 // IR
406 //===--------------------------------------------------------------------===//
407
408 void writeAttribute(Attribute attr) override {
409 emitter.emitVarInt(numberingState.getNumber(attr), "dialect attr");
410 }
411 void writeOptionalAttribute(Attribute attr) override {
412 if (!attr) {
413 emitter.emitVarInt(0, "dialect optional attr none");
414 return;
415 }
416 emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true,
417 "dialect optional attr");
418 }
419
420 void writeType(Type type) override {
421 emitter.emitVarInt(numberingState.getNumber(type), "dialect type");
422 }
423
424 void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
425 emitter.emitVarInt(numberingState.getNumber(resource), "dialect resource");
426 }
427
428 //===--------------------------------------------------------------------===//
429 // Primitives
430 //===--------------------------------------------------------------------===//
431
432 void writeVarInt(uint64_t value) override {
433 emitter.emitVarInt(value, "dialect writer");
434 }
435
436 void writeSignedVarInt(int64_t value) override {
437 emitter.emitSignedVarInt(value, "dialect writer");
438 }
439
440 void writeAPIntWithKnownWidth(const APInt &value) override {
441 size_t bitWidth = value.getBitWidth();
442
443 // If the value is a single byte, just emit it directly without going
444 // through a varint.
445 if (bitWidth <= 8)
446 return emitter.emitByte(value.getLimitedValue(), "dialect APInt");
447
448 // If the value fits within a single varint, emit it directly.
449 if (bitWidth <= 64)
450 return emitter.emitSignedVarInt(value.getLimitedValue(), "dialect APInt");
451
452 // Otherwise, we need to encode a variable number of active words. We use
453 // active words instead of the number of total words under the observation
454 // that smaller values will be more common.
455 unsigned numActiveWords = value.getActiveWords();
456 emitter.emitVarInt(numActiveWords, "dialect APInt word count");
457
458 const uint64_t *rawValueData = value.getRawData();
459 for (unsigned i = 0; i < numActiveWords; ++i)
460 emitter.emitSignedVarInt(rawValueData[i], "dialect APInt word");
461 }
462
463 void writeAPFloatWithKnownSemantics(const APFloat &value) override {
464 writeAPIntWithKnownWidth(value.bitcastToAPInt());
465 }
466
467 void writeOwnedString(StringRef str) override {
468 emitter.emitVarInt(stringSection.insert(str), "dialect string");
469 }
470
471 void writeOwnedBlob(ArrayRef<char> blob) override {
472 emitter.emitVarInt(blob.size(), "dialect blob");
473 emitter.emitOwnedBlob(
474 ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()),
475 blob.size()),
476 "dialect blob");
477 }
478
479 void writeUnownedBlob(ArrayRef<char> blob) override {
480 emitter.emitVarInt(blob.size(), "dialect blob");
481 emitter.emitBytes(
482 ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()),
483 blob.size()),
484 "dialect blob");
485 }
486
487 void writeOwnedBool(bool value) override {
488 emitter.emitByte(value, "dialect bool");
489 }
490
491 int64_t getBytecodeVersion() const override { return bytecodeVersion; }
492
493 FailureOr<const DialectVersion *>
494 getDialectVersion(StringRef dialectName) const override {
495 auto dialectEntry = dialectVersionMap.find(dialectName);
496 if (dialectEntry == dialectVersionMap.end())
497 return failure();
498 return dialectEntry->getValue().get();
499 }
500
501private:
502 int64_t bytecodeVersion;
503 EncodingEmitter &emitter;
504 IRNumberingState &numberingState;
505 StringSectionBuilder &stringSection;
506 const DialectVersionMapT &dialectVersionMap;
507};
508
509namespace {
510class PropertiesSectionBuilder {
511public:
512 PropertiesSectionBuilder(IRNumberingState &numberingState,
513 StringSectionBuilder &stringSection,
514 const BytecodeWriterConfig::Impl &config)
515 : numberingState(numberingState), stringSection(stringSection),
516 config(config) {}
517
518 /// Emit the op properties in the properties section and return the index of
519 /// the properties within the section. Return -1 if no properties was emitted.
520 std::optional<ssize_t> emit(Operation *op) {
521 EncodingEmitter propertiesEmitter;
522 if (!op->getPropertiesStorageSize())
523 return std::nullopt;
524 if (!op->isRegistered()) {
525 // Unregistered op are storing properties as an optional attribute.
526 Attribute prop = *op->getPropertiesStorage().as<Attribute *>();
527 if (!prop)
528 return std::nullopt;
529 EncodingEmitter sizeEmitter;
530 sizeEmitter.emitVarInt(numberingState.getNumber(prop), "properties size");
531 scratch.clear();
532 llvm::raw_svector_ostream os(scratch);
533 sizeEmitter.writeTo(os);
534 return emit(scratch);
535 }
536
537 EncodingEmitter emitter;
538 DialectWriter propertiesWriter(config.bytecodeVersion, emitter,
539 numberingState, stringSection,
540 config.dialectVersionMap);
541 auto iface = cast<BytecodeOpInterface>(op);
542 iface.writeProperties(propertiesWriter);
543 scratch.clear();
544 llvm::raw_svector_ostream os(scratch);
545 emitter.writeTo(os);
546 return emit(scratch);
547 }
548
549 /// Write the current set of properties to the given emitter.
550 void write(EncodingEmitter &emitter) {
551 emitter.emitVarInt(propertiesStorage.size(), "properties size");
552 if (propertiesStorage.empty())
553 return;
554 for (const auto &storage : propertiesStorage) {
555 if (storage.empty()) {
556 emitter.emitBytes(ArrayRef<uint8_t>(), "empty properties");
557 continue;
558 }
559 emitter.emitBytes(ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]),
560 storage.size()),
561 "property");
562 }
563 }
564
565 /// Returns true if the section is empty.
566 bool empty() { return propertiesStorage.empty(); }
567
568private:
569 /// Emit raw data and returns the offset in the internal buffer.
570 /// Data are deduplicated and will be copied in the internal buffer only if
571 /// they don't exist there already.
572 ssize_t emit(ArrayRef<char> rawProperties) {
573 // Populate a scratch buffer with the properties size.
574 SmallVector<char> sizeScratch;
575 {
576 EncodingEmitter sizeEmitter;
577 sizeEmitter.emitVarInt(rawProperties.size(), "properties");
578 llvm::raw_svector_ostream os(sizeScratch);
579 sizeEmitter.writeTo(os);
580 }
581 // Append a new storage to the table now.
582 size_t index = propertiesStorage.size();
583 propertiesStorage.emplace_back();
584 std::vector<char> &newStorage = propertiesStorage.back();
585 size_t propertiesSize = sizeScratch.size() + rawProperties.size();
586 newStorage.reserve(propertiesSize);
587 llvm::append_range(newStorage, sizeScratch);
588 llvm::append_range(newStorage, rawProperties);
589
590 // Try to de-duplicate the new serialized properties.
591 // If the properties is a duplicate, pop it back from the storage.
592 auto inserted = propertiesUniquing.insert(
593 std::make_pair(ArrayRef<char>(newStorage), index));
594 if (!inserted.second)
595 propertiesStorage.pop_back();
596 return inserted.first->getSecond();
597 }
598
599 /// Storage for properties.
600 std::vector<std::vector<char>> propertiesStorage;
601 SmallVector<char> scratch;
602 DenseMap<ArrayRef<char>, int64_t> propertiesUniquing;
603 IRNumberingState &numberingState;
604 StringSectionBuilder &stringSection;
605 const BytecodeWriterConfig::Impl &config;
606};
607} // namespace
608
609/// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
610/// to go through an intermediate buffer when interacting with code that wants a
611/// raw_ostream.
612class RawEmitterOstream : public raw_ostream {
613public:
614 explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
615 SetUnbuffered();
616 }
617
618private:
619 void write_impl(const char *ptr, size_t size) override {
620 emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size},
621 "raw emitter");
622 }
623 uint64_t current_pos() const override { return emitter.size(); }
624
625 /// The section being emitted to.
626 EncodingEmitter &emitter;
627};
628} // namespace
629
630void EncodingEmitter::writeTo(raw_ostream &os) const {
631 // Reserve space in the ostream for the encoded contents.
632 os.reserveExtraSpace(size());
633
634 for (auto &prevResult : prevResultList)
635 os.write((const char *)prevResult.data(), prevResult.size());
636 os.write((const char *)currentResult.data(), currentResult.size());
637}
638
639void EncodingEmitter::emitMultiByteVarInt(uint64_t value, StringLiteral desc) {
640 // Compute the number of bytes needed to encode the value. Each byte can hold
641 // up to 7-bits of data. We only check up to the number of bits we can encode
642 // in the first byte (8).
643 uint64_t it = value >> 7;
644 for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
645 if (LLVM_LIKELY(it >>= 7) == 0) {
646 uint64_t encodedValue = (value << 1) | 0x1;
647 encodedValue <<= (numBytes - 1);
648 llvm::support::ulittle64_t encodedValueLE(encodedValue);
649 emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes}, desc);
650 return;
651 }
652 }
653
654 // If the value is too large to encode in a single byte, emit a special all
655 // zero marker byte and splat the value directly.
656 emitByte(0, desc);
657 llvm::support::ulittle64_t valueLE(value);
658 emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)}, desc);
659}
660
661//===----------------------------------------------------------------------===//
662// Bytecode Writer
663//===----------------------------------------------------------------------===//
664
665namespace {
666class BytecodeWriter {
667public:
668 BytecodeWriter(Operation *op, const BytecodeWriterConfig &config)
669 : numberingState(op, config), config(config.getImpl()),
670 propertiesSection(numberingState, stringSection, config.getImpl()) {}
671
672 /// Write the bytecode for the given root operation.
673 LogicalResult write(Operation *rootOp, raw_ostream &os);
674
675private:
676 //===--------------------------------------------------------------------===//
677 // Dialects
678
679 void writeDialectSection(EncodingEmitter &emitter);
680
681 //===--------------------------------------------------------------------===//
682 // Attributes and Types
683
684 void writeAttrTypeSection(EncodingEmitter &emitter);
685
686 //===--------------------------------------------------------------------===//
687 // Operations
688
689 LogicalResult writeBlock(EncodingEmitter &emitter, Block *block);
690 LogicalResult writeOp(EncodingEmitter &emitter, Operation *op);
691 LogicalResult writeRegion(EncodingEmitter &emitter, Region *region);
692 LogicalResult writeIRSection(EncodingEmitter &emitter, Operation *op);
693
694 LogicalResult writeRegions(EncodingEmitter &emitter,
695 MutableArrayRef<Region> regions) {
696 return success(llvm::all_of(regions, [&](Region &region) {
697 return succeeded(writeRegion(emitter, &region));
698 }));
699 }
700
701 //===--------------------------------------------------------------------===//
702 // Resources
703
704 void writeResourceSection(Operation *op, EncodingEmitter &emitter);
705
706 //===--------------------------------------------------------------------===//
707 // Strings
708
709 void writeStringSection(EncodingEmitter &emitter);
710
711 //===--------------------------------------------------------------------===//
712 // Properties
713
714 void writePropertiesSection(EncodingEmitter &emitter);
715
716 //===--------------------------------------------------------------------===//
717 // Helpers
718
719 void writeUseListOrders(EncodingEmitter &emitter, uint8_t &opEncodingMask,
720 ValueRange range);
721
722 //===--------------------------------------------------------------------===//
723 // Fields
724
725 /// The builder used for the string section.
726 StringSectionBuilder stringSection;
727
728 /// The IR numbering state generated for the root operation.
729 IRNumberingState numberingState;
730
731 /// Configuration dictating bytecode emission.
732 const BytecodeWriterConfig::Impl &config;
733
734 /// Storage for the properties section
735 PropertiesSectionBuilder propertiesSection;
736};
737} // namespace
738
739LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
740 EncodingEmitter emitter;
741
742 // Emit the bytecode file header. This is how we identify the output as a
743 // bytecode file.
744 emitter.emitString("ML\xefR", "bytecode header");
745
746 // Emit the bytecode version.
747 if (config.bytecodeVersion < bytecode::kMinSupportedVersion ||
748 config.bytecodeVersion > bytecode::kVersion)
749 return rootOp->emitError()
750 << "unsupported version requested " << config.bytecodeVersion
751 << ", must be in range ["
752 << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", "
753 << static_cast<int64_t>(bytecode::kVersion) << ']';
754 emitter.emitVarInt(config.bytecodeVersion, "bytecode version");
755
756 // Emit the producer.
757 emitter.emitNulTerminatedString(config.producer, "bytecode producer");
758
759 // Emit the dialect section.
760 writeDialectSection(emitter);
761
762 // Emit the attributes and types section.
763 writeAttrTypeSection(emitter);
764
765 // Emit the IR section.
766 if (failed(writeIRSection(emitter, rootOp)))
767 return failure();
768
769 // Emit the resources section.
770 writeResourceSection(rootOp, emitter);
771
772 // Emit the string section.
773 writeStringSection(emitter);
774
775 // Emit the properties section.
776 if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding)
777 writePropertiesSection(emitter);
778 else if (!propertiesSection.empty())
779 return rootOp->emitError(
780 "unexpected properties emitted incompatible with bytecode <5");
781
782 // Write the generated bytecode to the provided output stream.
783 emitter.writeTo(os);
784
785 return success();
786}
787
788//===----------------------------------------------------------------------===//
789// Dialects
790//===----------------------------------------------------------------------===//
791
792/// Write the given entries in contiguous groups with the same parent dialect.
793/// Each dialect sub-group is encoded with the parent dialect and number of
794/// elements, followed by the encoding for the entries. The given callback is
795/// invoked to encode each individual entry.
796template <typename EntriesT, typename EntryCallbackT>
797static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
798 EntryCallbackT &&callback) {
799 for (auto it = entries.begin(), e = entries.end(); it != e;) {
800 auto groupStart = it++;
801
802 // Find the end of the group that shares the same parent dialect.
803 DialectNumbering *currentDialect = groupStart->dialect;
804 it = std::find_if(it, e, [&](const auto &entry) {
805 return entry.dialect != currentDialect;
806 });
807
808 // Emit the dialect and number of elements.
809 emitter.emitVarInt(currentDialect->number, "dialect number");
810 emitter.emitVarInt(std::distance(groupStart, it), "dialect offset");
811
812 // Emit the entries within the group.
813 for (auto &entry : llvm::make_range(groupStart, it))
814 callback(entry);
815 }
816}
817
818void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
819 EncodingEmitter dialectEmitter;
820
821 // Emit the referenced dialects.
822 auto dialects = numberingState.getDialects();
823 dialectEmitter.emitVarInt(llvm::size(dialects), "dialects count");
824 for (DialectNumbering &dialect : dialects) {
825 // Write the string section and get the ID.
826 size_t nameID = stringSection.insert(dialect.name);
827
828 if (config.bytecodeVersion < bytecode::kDialectVersioning) {
829 dialectEmitter.emitVarInt(nameID, "dialect name ID");
830 continue;
831 }
832
833 // Try writing the version to the versionEmitter.
834 EncodingEmitter versionEmitter;
835 if (dialect.interface) {
836 // The writer used when emitting using a custom bytecode encoding.
837 DialectWriter versionWriter(config.bytecodeVersion, versionEmitter,
838 numberingState, stringSection,
839 config.dialectVersionMap);
840 dialect.interface->writeVersion(versionWriter);
841 }
842
843 // If the version emitter is empty, version is not available. We can encode
844 // this in the dialect ID, so if there is no version, we don't write the
845 // section.
846 size_t versionAvailable = versionEmitter.size() > 0;
847 dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable,
848 "dialect version");
849 if (versionAvailable)
850 dialectEmitter.emitSection(bytecode::Section::kDialectVersions,
851 std::move(versionEmitter));
852 }
853
854 if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation)
855 dialectEmitter.emitVarInt(size(numberingState.getOpNames()),
856 "op names count");
857
858 // Emit the referenced operation names grouped by dialect.
859 auto emitOpName = [&](OpNameNumbering &name) {
860 size_t stringId = stringSection.insert(name.name.stripDialect());
861 if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding)
862 dialectEmitter.emitVarInt(stringId, "dialect op name");
863 else
864 dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered(),
865 "dialect op name");
866 };
867 writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
868
869 emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter));
870}
871
872//===----------------------------------------------------------------------===//
873// Attributes and Types
874//===----------------------------------------------------------------------===//
875
876void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
877 EncodingEmitter attrTypeEmitter;
878 EncodingEmitter offsetEmitter;
879 offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()),
880 "attributes count");
881 offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()),
882 "types count");
883
884 // A functor used to emit an attribute or type entry.
885 uint64_t prevOffset = 0;
886 auto emitAttrOrType = [&](auto &entry) {
887 auto entryValue = entry.getValue();
888
889 auto emitAttrOrTypeRawImpl = [&]() -> void {
890 RawEmitterOstream(attrTypeEmitter) << entryValue;
891 attrTypeEmitter.emitByte(0, "attr/type separator");
892 };
893 auto emitAttrOrTypeImpl = [&]() -> bool {
894 // TODO: We don't currently support custom encoded mutable types and
895 // attributes.
896 if (entryValue.template hasTrait<TypeTrait::IsMutable>() ||
897 entryValue.template hasTrait<AttributeTrait::IsMutable>()) {
898 emitAttrOrTypeRawImpl();
899 return false;
900 }
901
902 DialectWriter dialectWriter(config.bytecodeVersion, attrTypeEmitter,
903 numberingState, stringSection,
904 config.dialectVersionMap);
905 if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
906 for (const auto &callback : config.typeWriterCallbacks) {
907 if (succeeded(callback->write(entryValue, dialectWriter)))
908 return true;
909 }
910 if (const BytecodeDialectInterface *interface =
911 entry.dialect->interface) {
912 if (succeeded(interface->writeType(entryValue, dialectWriter)))
913 return true;
914 }
915 } else {
916 for (const auto &callback : config.attributeWriterCallbacks) {
917 if (succeeded(callback->write(entryValue, dialectWriter)))
918 return true;
919 }
920 if (const BytecodeDialectInterface *interface =
921 entry.dialect->interface) {
922 if (succeeded(interface->writeAttribute(entryValue, dialectWriter)))
923 return true;
924 }
925 }
926
927 // If the entry was not emitted using a callback or a dialect interface,
928 // emit it using the textual format.
929 emitAttrOrTypeRawImpl();
930 return false;
931 };
932
933 bool hasCustomEncoding = emitAttrOrTypeImpl();
934
935 // Record the offset of this entry.
936 uint64_t curOffset = attrTypeEmitter.size();
937 offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding,
938 "attr/type offset");
939 prevOffset = curOffset;
940 };
941
942 // Emit the attribute and type entries for each dialect.
943 writeDialectGrouping(offsetEmitter, numberingState.getAttributes(),
944 emitAttrOrType);
945 writeDialectGrouping(offsetEmitter, numberingState.getTypes(),
946 emitAttrOrType);
947
948 // Emit the sections to the stream.
949 emitter.emitSection(bytecode::Section::kAttrTypeOffset,
950 std::move(offsetEmitter));
951 emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter));
952}
953
954//===----------------------------------------------------------------------===//
955// Operations
956//===----------------------------------------------------------------------===//
957
958LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter,
959 Block *block) {
961 bool hasArgs = !args.empty();
962
963 // Emit the number of operations in this block, and if it has arguments. We
964 // use the low bit of the operation count to indicate if the block has
965 // arguments.
966 unsigned numOps = numberingState.getOperationCount(block);
967 emitter.emitVarIntWithFlag(numOps, hasArgs, "block num ops");
968
969 // Emit the arguments of the block.
970 if (hasArgs) {
971 emitter.emitVarInt(args.size(), "block args count");
972 for (BlockArgument arg : args) {
973 Location argLoc = arg.getLoc();
974 if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) {
975 emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()),
976 !isa<UnknownLoc>(argLoc), "block arg type");
977 if (!isa<UnknownLoc>(argLoc))
978 emitter.emitVarInt(numberingState.getNumber(argLoc),
979 "block arg location");
980 } else {
981 emitter.emitVarInt(numberingState.getNumber(arg.getType()),
982 "block arg type");
983 emitter.emitVarInt(numberingState.getNumber(argLoc),
984 "block arg location");
985 }
986 }
987 if (config.bytecodeVersion >= bytecode::kUseListOrdering) {
988 uint64_t maskOffset = emitter.size();
989 uint8_t encodingMask = 0;
990 emitter.emitByte(0, "use-list separator");
991 writeUseListOrders(emitter, encodingMask, args);
992 if (encodingMask)
993 emitter.patchByte(maskOffset, encodingMask, "block patch encoding");
994 }
995 }
996
997 // Emit the operations within the block.
998 for (Operation &op : *block)
999 if (failed(writeOp(emitter, &op)))
1000 return failure();
1001 return success();
1002}
1003
1004LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
1005 emitter.emitVarInt(numberingState.getNumber(op->getName()), "op name ID");
1006
1007 // Emit a mask for the operation components. We need to fill this in later
1008 // (when we actually know what needs to be emitted), so emit a placeholder for
1009 // now.
1010 uint64_t maskOffset = emitter.size();
1011 uint8_t opEncodingMask = 0;
1012 emitter.emitByte(0, "op separator");
1013
1014 // Emit the location for this operation.
1015 emitter.emitVarInt(numberingState.getNumber(op->getLoc()), "op location");
1016
1017 // Emit the attributes of this operation.
1018 DictionaryAttr attrs = op->getDiscardableAttrDictionary();
1019 // Allow deployment to version <kNativePropertiesEncoding by merging inherent
1020 // attribute with the discardable ones. We should fail if there are any
1021 // conflicts. When properties are not used by the op, also store everything as
1022 // attributes.
1023 if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding ||
1024 !op->getPropertiesStorage()) {
1025 attrs = op->getAttrDictionary();
1026 }
1027 if (!attrs.empty()) {
1028 opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
1029 emitter.emitVarInt(numberingState.getNumber(attrs), "op attrs count");
1030 }
1031
1032 // Emit the properties of this operation, for now we still support deployment
1033 // to version <kNativePropertiesEncoding.
1034 if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding) {
1035 std::optional<ssize_t> propertiesId = propertiesSection.emit(op);
1036 if (propertiesId.has_value()) {
1038 emitter.emitVarInt(*propertiesId, "op properties ID");
1039 }
1040 }
1041
1042 // Emit the result types of the operation.
1043 if (unsigned numResults = op->getNumResults()) {
1044 opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
1045 emitter.emitVarInt(numResults, "op results count");
1046 for (Type type : op->getResultTypes())
1047 emitter.emitVarInt(numberingState.getNumber(type), "op result type");
1048 }
1049
1050 // Emit the operands of the operation.
1051 if (unsigned numOperands = op->getNumOperands()) {
1053 emitter.emitVarInt(numOperands, "op operands count");
1054 for (Value operand : op->getOperands())
1055 emitter.emitVarInt(numberingState.getNumber(operand), "op operand types");
1056 }
1057
1058 // Emit the successors of the operation.
1059 if (unsigned numSuccessors = op->getNumSuccessors()) {
1061 emitter.emitVarInt(numSuccessors, "op successors count");
1062 for (Block *successor : op->getSuccessors())
1063 emitter.emitVarInt(numberingState.getNumber(successor), "op successor");
1064 }
1065
1066 // Emit the use-list orders to bytecode, so we can reconstruct the same order
1067 // at parsing.
1068 if (config.bytecodeVersion >= bytecode::kUseListOrdering)
1069 writeUseListOrders(emitter, opEncodingMask, ValueRange(op->getResults()));
1070
1071 // Check for regions.
1072 unsigned numRegions = op->getNumRegions();
1073 if (numRegions)
1075
1076 // Update the mask for the operation.
1077 emitter.patchByte(maskOffset, opEncodingMask, "op encoding mask");
1078
1079 // With the mask emitted, we can now emit the regions of the operation. We do
1080 // this after mask emission to avoid offset complications that may arise by
1081 // emitting the regions first (e.g. if the regions are huge, backpatching the
1082 // op encoding mask is more annoying).
1083 if (numRegions) {
1084 bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op);
1085 emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove,
1086 "op regions count");
1087
1088 // If the region is not isolated from above, or we are emitting bytecode
1089 // targeting version <kLazyLoading, we don't use a section.
1090 if (isIsolatedFromAbove &&
1091 config.bytecodeVersion >= bytecode::kLazyLoading) {
1092 EncodingEmitter regionEmitter;
1093 if (failed(writeRegions(regionEmitter, op->getRegions())))
1094 return failure();
1095 emitter.emitSection(bytecode::Section::kIR, std::move(regionEmitter));
1096
1097 } else if (failed(writeRegions(emitter, op->getRegions()))) {
1098 return failure();
1099 }
1100 }
1101 return success();
1102}
1103
1104void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter,
1105 uint8_t &opEncodingMask,
1106 ValueRange range) {
1107 // Loop over the results and store the use-list order per result index.
1108 llvm::MapVector<unsigned, llvm::SmallVector<unsigned>> map;
1109 for (auto item : llvm::enumerate(range)) {
1110 auto value = item.value();
1111 // No need to store a custom use-list order if the result does not have
1112 // multiple uses.
1113 if (value.use_empty() || value.hasOneUse())
1114 continue;
1115
1116 // For each result, assemble the list of pairs (use-list-index,
1117 // global-value-index). While doing so, detect if the global-value-index is
1118 // already ordered with respect to the use-list-index.
1119 bool alreadyOrdered = true;
1120 auto &firstUse = *value.use_begin();
1121 uint64_t prevID = bytecode::getUseID(
1122 firstUse, numberingState.getNumber(firstUse.getOwner()));
1123 llvm::SmallVector<std::pair<unsigned, uint64_t>> useListPairs(
1124 {{0, prevID}});
1125
1126 for (auto use : llvm::drop_begin(llvm::enumerate(value.getUses()))) {
1127 uint64_t currentID = bytecode::getUseID(
1128 use.value(), numberingState.getNumber(use.value().getOwner()));
1129 // The use-list order achieved when building the IR at parsing always
1130 // pushes new uses on front. Hence, if the order by unique ID is
1131 // monotonically decreasing, a roundtrip to bytecode preserves such order.
1132 alreadyOrdered &= (prevID > currentID);
1133 useListPairs.push_back({use.index(), currentID});
1134 prevID = currentID;
1135 }
1136
1137 // Do not emit if the order is already sorted.
1138 if (alreadyOrdered)
1139 continue;
1140
1141 // Sort the use indices by the unique ID indices in descending order.
1142 std::sort(
1143 useListPairs.begin(), useListPairs.end(),
1144 [](auto elem1, auto elem2) { return elem1.second > elem2.second; });
1145
1146 map.try_emplace(item.index(), llvm::map_range(useListPairs, [](auto elem) {
1147 return elem.first;
1148 }));
1149 }
1150
1151 if (map.empty())
1152 return;
1153
1155 // Emit the number of results that have a custom use-list order if the number
1156 // of results is greater than one.
1157 if (range.size() != 1) {
1158 emitter.emitVarInt(map.size(), "custom use-list size");
1159 }
1160
1161 for (const auto &[resultIdx, useListOrder] : map) {
1162 // Compute the number of uses that are actually shuffled. If those are less
1163 // than half of the total uses, encoding the index pair `(src, dst)` is more
1164 // space efficient.
1165 size_t shuffledElements =
1166 llvm::count_if(llvm::enumerate(useListOrder),
1167 [](auto item) { return item.index() != item.value(); });
1168 bool indexPairEncoding = shuffledElements < (useListOrder.size() / 2);
1169
1170 // For single result, we don't need to store the result index.
1171 if (range.size() != 1)
1172 emitter.emitVarInt(resultIdx, "use-list result index");
1173
1174 if (indexPairEncoding) {
1175 emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding,
1176 "use-list index pair size");
1177 for (auto pair : llvm::enumerate(useListOrder)) {
1178 if (pair.index() != pair.value()) {
1179 emitter.emitVarInt(pair.value(), "use-list index pair first");
1180 emitter.emitVarInt(pair.index(), "use-list index pair second");
1181 }
1182 }
1183 } else {
1184 emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding,
1185 "use-list size");
1186 for (const auto &index : useListOrder)
1187 emitter.emitVarInt(index, "use-list order");
1188 }
1189 }
1190}
1191
1192LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter,
1193 Region *region) {
1194 // If the region is empty, we only need to emit the number of blocks (which is
1195 // zero).
1196 if (region->empty()) {
1197 emitter.emitVarInt(/*numBlocks*/ 0, "region block count empty");
1198 return success();
1199 }
1200
1201 // Emit the number of blocks and values within the region.
1202 unsigned numBlocks, numValues;
1203 std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
1204 emitter.emitVarInt(numBlocks, "region block count");
1205 emitter.emitVarInt(numValues, "region value count");
1206
1207 // Emit the blocks within the region.
1208 for (Block &block : *region)
1209 if (failed(writeBlock(emitter, &block)))
1210 return failure();
1211 return success();
1212}
1213
1214LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter,
1215 Operation *op) {
1216 EncodingEmitter irEmitter;
1217
1218 // Write the IR section the same way as a block with no arguments. Note that
1219 // the low-bit of the operation count for a block is used to indicate if the
1220 // block has arguments, which in this case is always false.
1221 irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false, "ir section");
1222
1223 // Emit the operations.
1224 if (failed(writeOp(irEmitter, op)))
1225 return failure();
1226
1227 emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter));
1228 return success();
1229}
1230
1231//===----------------------------------------------------------------------===//
1232// Resources
1233//===----------------------------------------------------------------------===//
1234
1235namespace {
1236/// This class represents a resource builder implementation for the MLIR
1237/// bytecode format.
1238class ResourceBuilder : public AsmResourceBuilder {
1239public:
1240 using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
1241
1242 ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
1243 PostProcessFn postProcessFn, bool shouldElideData)
1244 : emitter(emitter), stringSection(stringSection),
1245 postProcessFn(postProcessFn), shouldElideData(shouldElideData) {}
1246 ~ResourceBuilder() override = default;
1247
1248 void buildBlob(StringRef key, ArrayRef<char> data,
1249 uint32_t dataAlignment) final {
1250 if (!shouldElideData)
1251 emitter.emitOwnedBlobAndAlignment(data, dataAlignment, "resource blob");
1252 postProcessFn(key, AsmResourceEntryKind::Blob);
1253 }
1254 void buildBool(StringRef key, bool data) final {
1255 if (!shouldElideData)
1256 emitter.emitByte(data, "resource bool");
1257 postProcessFn(key, AsmResourceEntryKind::Bool);
1258 }
1259 void buildString(StringRef key, StringRef data) final {
1260 if (!shouldElideData)
1261 emitter.emitVarInt(stringSection.insert(data), "resource string");
1262 postProcessFn(key, AsmResourceEntryKind::String);
1263 }
1264
1265private:
1266 EncodingEmitter &emitter;
1267 StringSectionBuilder &stringSection;
1268 PostProcessFn postProcessFn;
1269 bool shouldElideData = false;
1270};
1271} // namespace
1272
1273void BytecodeWriter::writeResourceSection(Operation *op,
1274 EncodingEmitter &emitter) {
1275 EncodingEmitter resourceEmitter;
1276 EncodingEmitter resourceOffsetEmitter;
1277 uint64_t prevOffset = 0;
1279 curResourceEntries;
1280
1281 // Functor used to process the offset for a resource of `kind` defined by
1282 // 'key'.
1283 auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
1284 uint64_t curOffset = resourceEmitter.size();
1285 curResourceEntries.emplace_back(key, kind, curOffset - prevOffset);
1286 prevOffset = curOffset;
1287 };
1288
1289 // Functor used to emit a resource group defined by 'key'.
1290 auto emitResourceGroup = [&](uint64_t key) {
1291 resourceOffsetEmitter.emitVarInt(key, "resource group key");
1292 resourceOffsetEmitter.emitVarInt(curResourceEntries.size(),
1293 "resource group size");
1294 for (auto [key, kind, size] : curResourceEntries) {
1295 resourceOffsetEmitter.emitVarInt(stringSection.insert(key),
1296 "resource key");
1297 resourceOffsetEmitter.emitVarInt(size, "resource size");
1298 resourceOffsetEmitter.emitByte(kind, "resource kind");
1299 }
1300 };
1301
1302 // Builder used to emit resources.
1303 ResourceBuilder entryBuilder(resourceEmitter, stringSection,
1304 appendResourceOffset,
1305 config.shouldElideResourceData);
1306
1307 // Emit the external resource entries.
1308 resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size(),
1309 "external resource printer count");
1310 for (const auto &printer : config.externalResourcePrinters) {
1311 curResourceEntries.clear();
1312 printer->buildResources(op, entryBuilder);
1313 emitResourceGroup(stringSection.insert(printer->getName()));
1314 }
1315
1316 // Emit the dialect resource entries.
1317 for (DialectNumbering &dialect : numberingState.getDialects()) {
1318 if (!dialect.asmInterface)
1319 continue;
1320 curResourceEntries.clear();
1321 dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder);
1322
1323 // Emit the declaration resources for this dialect, these didn't get emitted
1324 // by the interface. These resources don't have data attached, so just use a
1325 // "blob" kind as a placeholder.
1326 for (const auto &resource : dialect.resourceMap)
1327 if (resource.second->isDeclaration)
1328 appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
1329
1330 // Emit the resource group for this dialect.
1331 if (!curResourceEntries.empty())
1332 emitResourceGroup(dialect.number);
1333 }
1334
1335 // If we didn't emit any resource groups, elide the resource sections.
1336 if (resourceOffsetEmitter.size() == 0)
1337 return;
1338
1339 emitter.emitSection(bytecode::Section::kResourceOffset,
1340 std::move(resourceOffsetEmitter));
1341 emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter));
1342}
1343
1344//===----------------------------------------------------------------------===//
1345// Strings
1346//===----------------------------------------------------------------------===//
1347
1348void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
1349 EncodingEmitter stringEmitter;
1350 stringSection.write(stringEmitter);
1351 emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter));
1352}
1353
1354//===----------------------------------------------------------------------===//
1355// Properties
1356//===----------------------------------------------------------------------===//
1357
1358void BytecodeWriter::writePropertiesSection(EncodingEmitter &emitter) {
1359 EncodingEmitter propertiesEmitter;
1360 propertiesSection.write(propertiesEmitter);
1361 emitter.emitSection(bytecode::Section::kProperties,
1362 std::move(propertiesEmitter));
1363}
1364
1365//===----------------------------------------------------------------------===//
1366// Entry Points
1367//===----------------------------------------------------------------------===//
1368
1370 const BytecodeWriterConfig &config) {
1371 BytecodeWriter writer(op, config);
1372 return writer.write(op, os);
1373}
return success()
static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, EntryCallbackT &&callback)
Write the given entries in contiguous groups with the same parent dialect.
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
static LogicalResult emit(SolverOp solver, const SMTEmissionOptions &options, mlir::raw_indented_ostream &stream)
Emit the SMT operations in the given 'solver' to the 'stream'.
This class is used to build resource entries for use by the printer.
Definition AsmState.h:247
A class to interact with the attributes and types printer when emitting MLIR bytecode.
This class represents an argument of a Block.
Definition Value.h:306
Block represents an ordered list of Operations.
Definition Block.h:33
BlockArgListType getArguments()
Definition Block.h:97
This class contains the configuration used for the bytecode writer.
llvm::StringMap< std::unique_ptr< DialectVersion > > & getDialectVersionMap() const
A map containing the dialect versions to emit.
void setElideResourceDataFlag(bool shouldElideResourceData=true)
Set a boolean flag to skip emission of resources into the bytecode file.
BytecodeWriterConfig(StringRef producer="MLIR" LLVM_VERSION_STRING)
producer is an optional string that can be used to identify the producer of the bytecode when reading...
void attachFallbackResourcePrinter(FallbackAsmResourceMap &map)
Attach resource printers to the AsmState for the fallback resources in the given map.
void attachTypeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Type > > callback)
int64_t getDesiredBytecodeVersion() const
Get the set desired bytecode version to emit.
void setDialectVersion(std::unique_ptr< DialectVersion > dialectVersion) const
Set a given dialect version to emit on the map.
void attachAttributeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > callback)
Attach a custom bytecode printer callback to the configuration for the emission of custom type/attrib...
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > getTypeWriterCallbacks() const
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > getAttributeWriterCallbacks() const
Retrieve the callbacks.
void setDesiredBytecodeVersion(int64_t bytecodeVersion)
Set the desired bytecode version to emit.
void setElideLocations(bool shouldElideLocations=true)
Set a boolean flag to skip emission of unique locations into the bytecode file.
void attachResourcePrinter(std::unique_ptr< AsmResourcePrinter > printer)
Attach the given resource printer to the writer configuration.
This class defines a virtual interface for writing to a bytecode stream, providing hooks into the byt...
A fallback map containing external resources not explicitly handled by another parser/printer.
Definition AsmState.h:421
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
Operation is the basic unit of execution within MLIR.
Definition Operation.h:87
PropertyRef getPropertiesStorage()
Return a generic (but typed) reference to the property type storage.
Definition Operation.h:926
DictionaryAttr getAttrDictionary()
Return all of the attributes on this operation as a DictionaryAttr.
unsigned getNumSuccessors()
Definition Operation.h:731
bool isRegistered()
Returns true if this operation has a registered operation description, otherwise false.
Definition Operation.h:125
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition Operation.h:699
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:240
unsigned getNumOperands()
Definition Operation.h:371
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:115
DictionaryAttr getDiscardableAttrDictionary()
Return all of the discardable attributes on this operation as a DictionaryAttr.
Definition Operation.h:526
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition Operation.h:702
result_type_range getResultTypes()
Definition Operation.h:453
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:403
SuccessorRange getSuccessors()
Definition Operation.h:728
result_range getResults()
Definition Operation.h:440
int getPropertiesStorageSize() const
Returns the properties storage size.
Definition Operation.h:921
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:429
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
bool empty()
Definition Region.h:60
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:389
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
@ kAttrType
This section contains the attributes and types referenced within an IR module.
Definition Encoding.h:73
@ kAttrTypeOffset
This section contains the offsets for the attribute and types within the AttrType section.
Definition Encoding.h:77
@ kIR
This section contains the list of operations serialized into the bytecode, and their nested regions/o...
Definition Encoding.h:81
@ kResource
This section contains the resources of the bytecode.
Definition Encoding.h:84
@ kResourceOffset
This section contains the offsets of resources within the Resource section.
Definition Encoding.h:88
@ kDialect
This section contains the dialects referenced within an IR module.
Definition Encoding.h:69
@ kString
This section contains strings referenced within the bytecode.
Definition Encoding.h:66
@ kDialectVersions
This section contains the versions of each dialect.
Definition Encoding.h:91
@ kProperties
This section contains the properties for the operations.
Definition Encoding.h:94
static uint64_t getUseID(OperandT &val, unsigned ownerID)
Get the unique ID of a value use.
Definition Encoding.h:127
@ kUseListOrdering
Use-list ordering started to be encoded in version 3.
Definition Encoding.h:38
@ kAlignmentByte
An arbitrary value used to fill alignment padding.
Definition Encoding.h:56
@ kVersion
The current bytecode version.
Definition Encoding.h:53
@ kLazyLoading
Support for lazy-loading of isolated region was added in version 2.
Definition Encoding.h:35
@ kDialectVersioning
Dialects versioning was added in version 1.
Definition Encoding.h:32
@ kElideUnknownBlockArgLocation
Avoid recording unknown locations on block arguments (compression) started in version 4.
Definition Encoding.h:42
@ kNativePropertiesEncoding
Support for encoding properties natively in bytecode instead of merged with the discardable attribute...
Definition Encoding.h:46
@ kMinSupportedVersion
The minimum supported version of the bytecode.
Definition Encoding.h:29
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:717
Include the generated interface declarations.
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:120
AsmResourceEntryKind
This enum represents the different kinds of resource values.
Definition AsmState.h:280
@ Blob
A blob of data with an accompanying alignment.
Definition AsmState.h:282
LogicalResult writeBytecodeToFile(Operation *op, raw_ostream &os, const BytecodeWriterConfig &config={})
Write the bytecode for the given operation to the provided output stream.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:147
StringRef producer
The producer of the bytecode.
llvm::StringMap< std::unique_ptr< DialectVersion > > dialectVersionMap
A map containing dialect version information for each dialect to emit.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > attributeWriterCallbacks
Printer callbacks used to emit custom type and attribute encodings.
SmallVector< std::unique_ptr< AsmResourcePrinter > > externalResourcePrinters
A collection of non-dialect resource printers.
bool shouldElideLocations
A flag specifying whether to elide emission of locations.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > typeWriterCallbacks
int64_t bytecodeVersion
Version to use when writing.
bool shouldElideResourceData
A flag specifying whether to elide emission of resources into the bytecode file.
This class represents a numbering entry for an Dialect.
unsigned number
The number assigned to the dialect.