MLIR 22.0.0git
BytecodeWriter.cpp
Go to the documentation of this file.
1//===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "IRNumbering.h"
14#include "mlir/IR/Attributes.h"
15#include "mlir/IR/Diagnostics.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/CachedHashString.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Support/DebugLog.h"
23#include "llvm/Support/Endian.h"
24#include "llvm/Support/raw_ostream.h"
25#include <optional>
26
27#define DEBUG_TYPE "mlir-bytecode-writer"
28
29using namespace mlir;
30using namespace mlir::bytecode::detail;
31
32//===----------------------------------------------------------------------===//
33// BytecodeWriterConfig
34//===----------------------------------------------------------------------===//
35
37 Impl(StringRef producer) : producer(producer) {}
38
39 /// Version to use when writing.
40 /// Note: This only differs from kVersion if a specific version is set.
42
43 /// A flag specifying whether to elide emission of resources into the bytecode
44 /// file.
46
47 /// A map containing dialect version information for each dialect to emit.
48 llvm::StringMap<std::unique_ptr<DialectVersion>> dialectVersionMap;
49
50 /// The producer of the bytecode.
51 StringRef producer;
52
53 /// Printer callbacks used to emit custom type and attribute encodings.
58
59 /// A collection of non-dialect resource printers.
61};
62
64 : impl(std::make_unique<Impl>(producer)) {}
72
74
77 return impl->attributeWriterCallbacks;
78}
79
82 return impl->typeWriterCallbacks;
83}
84
86 std::unique_ptr<AttrTypeBytecodeWriter<Attribute>> callback) {
87 impl->attributeWriterCallbacks.emplace_back(std::move(callback));
88}
89
91 std::unique_ptr<AttrTypeBytecodeWriter<Type>> callback) {
92 impl->typeWriterCallbacks.emplace_back(std::move(callback));
93}
94
96 std::unique_ptr<AsmResourcePrinter> printer) {
97 impl->externalResourcePrinters.emplace_back(std::move(printer));
98}
99
101 bool shouldElideResourceData) {
102 impl->shouldElideResourceData = shouldElideResourceData;
103}
104
106 impl->bytecodeVersion = bytecodeVersion;
107}
108
110 return impl->bytecodeVersion;
111}
112
113llvm::StringMap<std::unique_ptr<DialectVersion>> &
115 return impl->dialectVersionMap;
116}
117
119 llvm::StringRef dialectName,
120 std::unique_ptr<DialectVersion> dialectVersion) const {
121 assert(!impl->dialectVersionMap.contains(dialectName) &&
122 "cannot override a previously set dialect version");
123 impl->dialectVersionMap.insert({dialectName, std::move(dialectVersion)});
124}
125
126//===----------------------------------------------------------------------===//
127// EncodingEmitter
128//===----------------------------------------------------------------------===//
129
130namespace {
131/// This class functions as the underlying encoding emitter for the bytecode
132/// writer. This class is a bit different compared to other types of encoders;
133/// it does not use a single buffer, but instead may contain several buffers
134/// (some owned by the writer, and some not) that get concatted during the final
135/// emission.
136class EncodingEmitter {
137public:
138 EncodingEmitter() = default;
139 EncodingEmitter(const EncodingEmitter &) = delete;
140 EncodingEmitter &operator=(const EncodingEmitter &) = delete;
141
142 /// Write the current contents to the provided stream.
143 void writeTo(raw_ostream &os) const;
144
145 /// Return the current size of the encoded buffer.
146 size_t size() const { return prevResultSize + currentResult.size(); }
147
148 //===--------------------------------------------------------------------===//
149 // Emission
150 //===--------------------------------------------------------------------===//
151
152 /// Backpatch a byte in the result buffer at the given offset.
153 void patchByte(uint64_t offset, uint8_t value, StringLiteral desc) {
154 LDBG() << "patchByte(" << offset << ',' << uint64_t(value) << ")\t" << desc;
155 assert(offset < size() && offset >= prevResultSize &&
156 "cannot patch previously emitted data");
157 currentResult[offset - prevResultSize] = value;
158 }
159
160 /// Emit the provided blob of data, which is owned by the caller and is
161 /// guaranteed to not die before the end of the bytecode process.
162 void emitOwnedBlob(ArrayRef<uint8_t> data, StringLiteral desc) {
163 LDBG() << "emitOwnedBlob(" << data.size() << "b)\t" << desc;
164 // Push the current buffer before adding the provided data.
165 appendResult(std::move(currentResult));
166 appendOwnedResult(data);
167 }
168
169 /// Emit the provided blob of data that has the given alignment, which is
170 /// owned by the caller and is guaranteed to not die before the end of the
171 /// bytecode process. The alignment value is also encoded, making it available
172 /// on load.
173 void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment,
174 StringLiteral desc) {
175 emitVarInt(alignment, desc);
176 emitVarInt(data.size(), desc);
177
178 alignTo(alignment);
179 emitOwnedBlob(data, desc);
180 }
181 void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment,
182 StringLiteral desc) {
183 ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
184 data.size());
185 emitOwnedBlobAndAlignment(castedData, alignment, desc);
186 }
187
188 /// Align the emitter to the given alignment.
189 void alignTo(unsigned alignment) {
190 if (alignment < 2)
191 return;
192 assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
193
194 // Check to see if we need to emit any padding bytes to meet the desired
195 // alignment.
196 size_t curOffset = size();
197 size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
198 while (paddingSize--)
199 emitByte(bytecode::kAlignmentByte, "alignment byte");
200
201 // Keep track of the maximum required alignment.
202 requiredAlignment = std::max(requiredAlignment, alignment);
203 }
204
205 //===--------------------------------------------------------------------===//
206 // Integer Emission
207
208 /// Emit a single byte.
209 template <typename T>
210 void emitByte(T byte, StringLiteral desc) {
211 LDBG() << "emitByte(" << uint64_t(byte) << ")\t" << desc;
212 currentResult.push_back(static_cast<uint8_t>(byte));
213 }
214
215 /// Emit a range of bytes.
216 void emitBytes(ArrayRef<uint8_t> bytes, StringLiteral desc) {
217 LDBG() << "emitBytes(" << bytes.size() << "b)\t" << desc;
218 llvm::append_range(currentResult, bytes);
219 }
220
221 /// Emit a variable length integer. The first encoded byte contains a prefix
222 /// in the low bits indicating the encoded length of the value. This length
223 /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
224 /// indicate the number of _additional_ bytes (not including the prefix byte).
225 /// All remaining bits in the first byte, along with all of the bits in
226 /// additional bytes, provide the value of the integer encoded in
227 /// little-endian order.
228 void emitVarInt(uint64_t value, StringLiteral desc) {
229 LDBG() << "emitVarInt(" << value << ")\t" << desc;
230
231 // In the most common case, the value can be represented in a single byte.
232 // Given how hot this case is, explicitly handle that here.
233 if ((value >> 7) == 0)
234 return emitByte((value << 1) | 0x1, desc);
235 emitMultiByteVarInt(value, desc);
236 }
237
238 /// Emit a signed variable length integer. Signed varints are encoded using
239 /// a varint with zigzag encoding, meaning that we use the low bit of the
240 /// value to indicate the sign of the value. This allows for more efficient
241 /// encoding of negative values by limiting the number of active bits
242 void emitSignedVarInt(uint64_t value, StringLiteral desc) {
243 emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63), desc);
244 }
245
246 /// Emit a variable length integer whose low bit is used to encode the
247 /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
248 void emitVarIntWithFlag(uint64_t value, bool flag, StringLiteral desc) {
249 emitVarInt((value << 1) | (flag ? 1 : 0), desc);
250 }
251
252 //===--------------------------------------------------------------------===//
253 // String Emission
254
255 /// Emit the given string as a nul terminated string.
256 void emitNulTerminatedString(StringRef str, StringLiteral desc) {
257 emitString(str, desc);
258 emitByte(0, "null terminator");
259 }
260
261 /// Emit the given string without a nul terminator.
262 void emitString(StringRef str, StringLiteral desc) {
263 emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()},
264 desc);
265 }
266
267 //===--------------------------------------------------------------------===//
268 // Section Emission
269
270 /// Emit a nested section of the given code, whose contents are encoded in the
271 /// provided emitter.
272 void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
273 // Emit the section code and length. The high bit of the code is used to
274 // indicate whether the section alignment is present, so save an offset to
275 // it.
276 uint64_t codeOffset = currentResult.size();
277 emitByte(code, "section code");
278 emitVarInt(emitter.size(), "section size");
279
280 // Integrate the alignment of the section into this emitter if necessary.
281 unsigned emitterAlign = emitter.requiredAlignment;
282 if (emitterAlign > 1) {
283 if (size() & (emitterAlign - 1)) {
284 emitVarInt(emitterAlign, "section alignment");
285 alignTo(emitterAlign);
286
287 // Indicate that we needed to align the section, the high bit of the
288 // code field is used for this.
289 currentResult[codeOffset] |= 0b10000000;
290 } else {
291 // Otherwise, if we happen to be at a compatible offset, we just
292 // remember that we need this alignment.
293 requiredAlignment = std::max(requiredAlignment, emitterAlign);
294 }
295 }
296
297 // Push our current buffer and then merge the provided section body into
298 // ours.
299 appendResult(std::move(currentResult));
300 for (std::vector<uint8_t> &result : emitter.prevResultStorage)
301 prevResultStorage.push_back(std::move(result));
302 llvm::append_range(prevResultList, emitter.prevResultList);
303 prevResultSize += emitter.prevResultSize;
304 appendResult(std::move(emitter.currentResult));
305 }
306
307private:
308 /// Emit the given value using a variable width encoding. This method is a
309 /// fallback when the number of bytes needed to encode the value is greater
310 /// than 1. We mark it noinline here so that the single byte hot path isn't
311 /// pessimized.
312 LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value,
313 StringLiteral desc);
314
315 /// Append a new result buffer to the current contents.
316 void appendResult(std::vector<uint8_t> &&result) {
317 if (result.empty())
318 return;
319 prevResultStorage.emplace_back(std::move(result));
320 appendOwnedResult(prevResultStorage.back());
321 }
322 void appendOwnedResult(ArrayRef<uint8_t> result) {
323 if (result.empty())
324 return;
325 prevResultSize += result.size();
326 prevResultList.emplace_back(result);
327 }
328
329 /// The result of the emitter currently being built. We refrain from building
330 /// a single buffer to simplify emitting sections, large data, and more. The
331 /// result is thus represented using multiple distinct buffers, some of which
332 /// we own (via prevResultStorage), and some of which are just pointers into
333 /// externally owned buffers.
334 std::vector<uint8_t> currentResult;
335 std::vector<ArrayRef<uint8_t>> prevResultList;
336 std::vector<std::vector<uint8_t>> prevResultStorage;
337
338 /// An up-to-date total size of all of the buffers within `prevResultList`.
339 /// This enables O(1) size checks of the current encoding.
340 size_t prevResultSize = 0;
341
342 /// The highest required alignment for the start of this section.
343 unsigned requiredAlignment = 1;
344};
345
346//===----------------------------------------------------------------------===//
347// StringSectionBuilder
348//===----------------------------------------------------------------------===//
349
350namespace {
351/// This class is used to simplify the process of emitting the string section.
352class StringSectionBuilder {
353public:
354 /// Add the given string to the string section, and return the index of the
355 /// string within the section.
356 size_t insert(StringRef str) {
357 auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()});
358 return it.first->second;
359 }
360
361 /// Write the current set of strings to the given emitter.
362 void write(EncodingEmitter &emitter) {
363 emitter.emitVarInt(strings.size(), "string section size");
364
365 // Emit the sizes in reverse order, so that we don't need to backpatch an
366 // offset to the string data or have a separate section.
367 for (const auto &it : llvm::reverse(strings))
368 emitter.emitVarInt(it.first.size() + 1, "string size");
369 // Emit the string data itself.
370 for (const auto &it : strings)
371 emitter.emitNulTerminatedString(it.first.val(), "string");
372 }
373
374private:
375 /// A set of strings referenced within the bytecode. The value of the map is
376 /// unused.
377 llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
378};
379} // namespace
380
381class DialectWriter : public DialectBytecodeWriter {
382 using DialectVersionMapT = llvm::StringMap<std::unique_ptr<DialectVersion>>;
383
384public:
385 DialectWriter(int64_t bytecodeVersion, EncodingEmitter &emitter,
386 IRNumberingState &numberingState,
387 StringSectionBuilder &stringSection,
388 const DialectVersionMapT &dialectVersionMap)
389 : bytecodeVersion(bytecodeVersion), emitter(emitter),
390 numberingState(numberingState), stringSection(stringSection),
391 dialectVersionMap(dialectVersionMap) {}
392
393 //===--------------------------------------------------------------------===//
394 // IR
395 //===--------------------------------------------------------------------===//
396
397 void writeAttribute(Attribute attr) override {
398 emitter.emitVarInt(numberingState.getNumber(attr), "dialect attr");
399 }
400 void writeOptionalAttribute(Attribute attr) override {
401 if (!attr) {
402 emitter.emitVarInt(0, "dialect optional attr none");
403 return;
404 }
405 emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true,
406 "dialect optional attr");
407 }
408
409 void writeType(Type type) override {
410 emitter.emitVarInt(numberingState.getNumber(type), "dialect type");
411 }
412
413 void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
414 emitter.emitVarInt(numberingState.getNumber(resource), "dialect resource");
415 }
416
417 //===--------------------------------------------------------------------===//
418 // Primitives
419 //===--------------------------------------------------------------------===//
420
421 void writeVarInt(uint64_t value) override {
422 emitter.emitVarInt(value, "dialect writer");
423 }
424
425 void writeSignedVarInt(int64_t value) override {
426 emitter.emitSignedVarInt(value, "dialect writer");
427 }
428
429 void writeAPIntWithKnownWidth(const APInt &value) override {
430 size_t bitWidth = value.getBitWidth();
431
432 // If the value is a single byte, just emit it directly without going
433 // through a varint.
434 if (bitWidth <= 8)
435 return emitter.emitByte(value.getLimitedValue(), "dialect APInt");
436
437 // If the value fits within a single varint, emit it directly.
438 if (bitWidth <= 64)
439 return emitter.emitSignedVarInt(value.getLimitedValue(), "dialect APInt");
440
441 // Otherwise, we need to encode a variable number of active words. We use
442 // active words instead of the number of total words under the observation
443 // that smaller values will be more common.
444 unsigned numActiveWords = value.getActiveWords();
445 emitter.emitVarInt(numActiveWords, "dialect APInt word count");
446
447 const uint64_t *rawValueData = value.getRawData();
448 for (unsigned i = 0; i < numActiveWords; ++i)
449 emitter.emitSignedVarInt(rawValueData[i], "dialect APInt word");
450 }
451
452 void writeAPFloatWithKnownSemantics(const APFloat &value) override {
453 writeAPIntWithKnownWidth(value.bitcastToAPInt());
454 }
455
456 void writeOwnedString(StringRef str) override {
457 emitter.emitVarInt(stringSection.insert(str), "dialect string");
458 }
459
460 void writeOwnedBlob(ArrayRef<char> blob) override {
461 emitter.emitVarInt(blob.size(), "dialect blob");
462 emitter.emitOwnedBlob(
463 ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()),
464 blob.size()),
465 "dialect blob");
466 }
467
468 void writeOwnedBool(bool value) override {
469 emitter.emitByte(value, "dialect bool");
470 }
471
472 int64_t getBytecodeVersion() const override { return bytecodeVersion; }
473
474 FailureOr<const DialectVersion *>
475 getDialectVersion(StringRef dialectName) const override {
476 auto dialectEntry = dialectVersionMap.find(dialectName);
477 if (dialectEntry == dialectVersionMap.end())
478 return failure();
479 return dialectEntry->getValue().get();
480 }
481
482private:
483 int64_t bytecodeVersion;
484 EncodingEmitter &emitter;
485 IRNumberingState &numberingState;
486 StringSectionBuilder &stringSection;
487 const DialectVersionMapT &dialectVersionMap;
488};
489
490namespace {
491class PropertiesSectionBuilder {
492public:
493 PropertiesSectionBuilder(IRNumberingState &numberingState,
494 StringSectionBuilder &stringSection,
495 const BytecodeWriterConfig::Impl &config)
496 : numberingState(numberingState), stringSection(stringSection),
497 config(config) {}
498
499 /// Emit the op properties in the properties section and return the index of
500 /// the properties within the section. Return -1 if no properties was emitted.
501 std::optional<ssize_t> emit(Operation *op) {
502 EncodingEmitter propertiesEmitter;
503 if (!op->getPropertiesStorageSize())
504 return std::nullopt;
505 if (!op->isRegistered()) {
506 // Unregistered op are storing properties as an optional attribute.
507 Attribute prop = *op->getPropertiesStorage().as<Attribute *>();
508 if (!prop)
509 return std::nullopt;
510 EncodingEmitter sizeEmitter;
511 sizeEmitter.emitVarInt(numberingState.getNumber(prop), "properties size");
512 scratch.clear();
513 llvm::raw_svector_ostream os(scratch);
514 sizeEmitter.writeTo(os);
515 return emit(scratch);
516 }
517
518 EncodingEmitter emitter;
519 DialectWriter propertiesWriter(config.bytecodeVersion, emitter,
520 numberingState, stringSection,
521 config.dialectVersionMap);
522 auto iface = cast<BytecodeOpInterface>(op);
523 iface.writeProperties(propertiesWriter);
524 scratch.clear();
525 llvm::raw_svector_ostream os(scratch);
526 emitter.writeTo(os);
527 return emit(scratch);
528 }
529
530 /// Write the current set of properties to the given emitter.
531 void write(EncodingEmitter &emitter) {
532 emitter.emitVarInt(propertiesStorage.size(), "properties size");
533 if (propertiesStorage.empty())
534 return;
535 for (const auto &storage : propertiesStorage) {
536 if (storage.empty()) {
537 emitter.emitBytes(ArrayRef<uint8_t>(), "empty properties");
538 continue;
539 }
540 emitter.emitBytes(ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]),
541 storage.size()),
542 "property");
543 }
544 }
545
546 /// Returns true if the section is empty.
547 bool empty() { return propertiesStorage.empty(); }
548
549private:
550 /// Emit raw data and returns the offset in the internal buffer.
551 /// Data are deduplicated and will be copied in the internal buffer only if
552 /// they don't exist there already.
553 ssize_t emit(ArrayRef<char> rawProperties) {
554 // Populate a scratch buffer with the properties size.
555 SmallVector<char> sizeScratch;
556 {
557 EncodingEmitter sizeEmitter;
558 sizeEmitter.emitVarInt(rawProperties.size(), "properties");
559 llvm::raw_svector_ostream os(sizeScratch);
560 sizeEmitter.writeTo(os);
561 }
562 // Append a new storage to the table now.
563 size_t index = propertiesStorage.size();
564 propertiesStorage.emplace_back();
565 std::vector<char> &newStorage = propertiesStorage.back();
566 size_t propertiesSize = sizeScratch.size() + rawProperties.size();
567 newStorage.reserve(propertiesSize);
568 llvm::append_range(newStorage, sizeScratch);
569 llvm::append_range(newStorage, rawProperties);
570
571 // Try to de-duplicate the new serialized properties.
572 // If the properties is a duplicate, pop it back from the storage.
573 auto inserted = propertiesUniquing.insert(
574 std::make_pair(ArrayRef<char>(newStorage), index));
575 if (!inserted.second)
576 propertiesStorage.pop_back();
577 return inserted.first->getSecond();
578 }
579
580 /// Storage for properties.
581 std::vector<std::vector<char>> propertiesStorage;
582 SmallVector<char> scratch;
583 DenseMap<ArrayRef<char>, int64_t> propertiesUniquing;
584 IRNumberingState &numberingState;
585 StringSectionBuilder &stringSection;
586 const BytecodeWriterConfig::Impl &config;
587};
588} // namespace
589
590/// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
591/// to go through an intermediate buffer when interacting with code that wants a
592/// raw_ostream.
593class RawEmitterOstream : public raw_ostream {
594public:
595 explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
596 SetUnbuffered();
597 }
598
599private:
600 void write_impl(const char *ptr, size_t size) override {
601 emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size},
602 "raw emitter");
603 }
604 uint64_t current_pos() const override { return emitter.size(); }
605
606 /// The section being emitted to.
607 EncodingEmitter &emitter;
608};
609} // namespace
610
611void EncodingEmitter::writeTo(raw_ostream &os) const {
612 // Reserve space in the ostream for the encoded contents.
613 os.reserveExtraSpace(size());
614
615 for (auto &prevResult : prevResultList)
616 os.write((const char *)prevResult.data(), prevResult.size());
617 os.write((const char *)currentResult.data(), currentResult.size());
618}
619
620void EncodingEmitter::emitMultiByteVarInt(uint64_t value, StringLiteral desc) {
621 // Compute the number of bytes needed to encode the value. Each byte can hold
622 // up to 7-bits of data. We only check up to the number of bits we can encode
623 // in the first byte (8).
624 uint64_t it = value >> 7;
625 for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
626 if (LLVM_LIKELY(it >>= 7) == 0) {
627 uint64_t encodedValue = (value << 1) | 0x1;
628 encodedValue <<= (numBytes - 1);
629 llvm::support::ulittle64_t encodedValueLE(encodedValue);
630 emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes}, desc);
631 return;
632 }
633 }
634
635 // If the value is too large to encode in a single byte, emit a special all
636 // zero marker byte and splat the value directly.
637 emitByte(0, desc);
638 llvm::support::ulittle64_t valueLE(value);
639 emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)}, desc);
640}
641
642//===----------------------------------------------------------------------===//
643// Bytecode Writer
644//===----------------------------------------------------------------------===//
645
646namespace {
647class BytecodeWriter {
648public:
649 BytecodeWriter(Operation *op, const BytecodeWriterConfig &config)
650 : numberingState(op, config), config(config.getImpl()),
651 propertiesSection(numberingState, stringSection, config.getImpl()) {}
652
653 /// Write the bytecode for the given root operation.
654 LogicalResult write(Operation *rootOp, raw_ostream &os);
655
656private:
657 //===--------------------------------------------------------------------===//
658 // Dialects
659
660 void writeDialectSection(EncodingEmitter &emitter);
661
662 //===--------------------------------------------------------------------===//
663 // Attributes and Types
664
665 void writeAttrTypeSection(EncodingEmitter &emitter);
666
667 //===--------------------------------------------------------------------===//
668 // Operations
669
670 LogicalResult writeBlock(EncodingEmitter &emitter, Block *block);
671 LogicalResult writeOp(EncodingEmitter &emitter, Operation *op);
672 LogicalResult writeRegion(EncodingEmitter &emitter, Region *region);
673 LogicalResult writeIRSection(EncodingEmitter &emitter, Operation *op);
674
675 LogicalResult writeRegions(EncodingEmitter &emitter,
676 MutableArrayRef<Region> regions) {
677 return success(llvm::all_of(regions, [&](Region &region) {
678 return succeeded(writeRegion(emitter, &region));
679 }));
680 }
681
682 //===--------------------------------------------------------------------===//
683 // Resources
684
685 void writeResourceSection(Operation *op, EncodingEmitter &emitter);
686
687 //===--------------------------------------------------------------------===//
688 // Strings
689
690 void writeStringSection(EncodingEmitter &emitter);
691
692 //===--------------------------------------------------------------------===//
693 // Properties
694
695 void writePropertiesSection(EncodingEmitter &emitter);
696
697 //===--------------------------------------------------------------------===//
698 // Helpers
699
700 void writeUseListOrders(EncodingEmitter &emitter, uint8_t &opEncodingMask,
701 ValueRange range);
702
703 //===--------------------------------------------------------------------===//
704 // Fields
705
706 /// The builder used for the string section.
707 StringSectionBuilder stringSection;
708
709 /// The IR numbering state generated for the root operation.
710 IRNumberingState numberingState;
711
712 /// Configuration dictating bytecode emission.
713 const BytecodeWriterConfig::Impl &config;
714
715 /// Storage for the properties section
716 PropertiesSectionBuilder propertiesSection;
717};
718} // namespace
719
720LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
721 EncodingEmitter emitter;
722
723 // Emit the bytecode file header. This is how we identify the output as a
724 // bytecode file.
725 emitter.emitString("ML\xefR", "bytecode header");
726
727 // Emit the bytecode version.
728 if (config.bytecodeVersion < bytecode::kMinSupportedVersion ||
729 config.bytecodeVersion > bytecode::kVersion)
730 return rootOp->emitError()
731 << "unsupported version requested " << config.bytecodeVersion
732 << ", must be in range ["
733 << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", "
734 << static_cast<int64_t>(bytecode::kVersion) << ']';
735 emitter.emitVarInt(config.bytecodeVersion, "bytecode version");
736
737 // Emit the producer.
738 emitter.emitNulTerminatedString(config.producer, "bytecode producer");
739
740 // Emit the dialect section.
741 writeDialectSection(emitter);
742
743 // Emit the attributes and types section.
744 writeAttrTypeSection(emitter);
745
746 // Emit the IR section.
747 if (failed(writeIRSection(emitter, rootOp)))
748 return failure();
749
750 // Emit the resources section.
751 writeResourceSection(rootOp, emitter);
752
753 // Emit the string section.
754 writeStringSection(emitter);
755
756 // Emit the properties section.
757 if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding)
758 writePropertiesSection(emitter);
759 else if (!propertiesSection.empty())
760 return rootOp->emitError(
761 "unexpected properties emitted incompatible with bytecode <5");
762
763 // Write the generated bytecode to the provided output stream.
764 emitter.writeTo(os);
765
766 return success();
767}
768
769//===----------------------------------------------------------------------===//
770// Dialects
771//===----------------------------------------------------------------------===//
772
773/// Write the given entries in contiguous groups with the same parent dialect.
774/// Each dialect sub-group is encoded with the parent dialect and number of
775/// elements, followed by the encoding for the entries. The given callback is
776/// invoked to encode each individual entry.
777template <typename EntriesT, typename EntryCallbackT>
778static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
779 EntryCallbackT &&callback) {
780 for (auto it = entries.begin(), e = entries.end(); it != e;) {
781 auto groupStart = it++;
782
783 // Find the end of the group that shares the same parent dialect.
784 DialectNumbering *currentDialect = groupStart->dialect;
785 it = std::find_if(it, e, [&](const auto &entry) {
786 return entry.dialect != currentDialect;
787 });
788
789 // Emit the dialect and number of elements.
790 emitter.emitVarInt(currentDialect->number, "dialect number");
791 emitter.emitVarInt(std::distance(groupStart, it), "dialect offset");
792
793 // Emit the entries within the group.
794 for (auto &entry : llvm::make_range(groupStart, it))
795 callback(entry);
796 }
797}
798
799void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
800 EncodingEmitter dialectEmitter;
801
802 // Emit the referenced dialects.
803 auto dialects = numberingState.getDialects();
804 dialectEmitter.emitVarInt(llvm::size(dialects), "dialects count");
805 for (DialectNumbering &dialect : dialects) {
806 // Write the string section and get the ID.
807 size_t nameID = stringSection.insert(dialect.name);
808
809 if (config.bytecodeVersion < bytecode::kDialectVersioning) {
810 dialectEmitter.emitVarInt(nameID, "dialect name ID");
811 continue;
812 }
813
814 // Try writing the version to the versionEmitter.
815 EncodingEmitter versionEmitter;
816 if (dialect.interface) {
817 // The writer used when emitting using a custom bytecode encoding.
818 DialectWriter versionWriter(config.bytecodeVersion, versionEmitter,
819 numberingState, stringSection,
820 config.dialectVersionMap);
821 dialect.interface->writeVersion(versionWriter);
822 }
823
824 // If the version emitter is empty, version is not available. We can encode
825 // this in the dialect ID, so if there is no version, we don't write the
826 // section.
827 size_t versionAvailable = versionEmitter.size() > 0;
828 dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable,
829 "dialect version");
830 if (versionAvailable)
831 dialectEmitter.emitSection(bytecode::Section::kDialectVersions,
832 std::move(versionEmitter));
833 }
834
835 if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation)
836 dialectEmitter.emitVarInt(size(numberingState.getOpNames()),
837 "op names count");
838
839 // Emit the referenced operation names grouped by dialect.
840 auto emitOpName = [&](OpNameNumbering &name) {
841 size_t stringId = stringSection.insert(name.name.stripDialect());
842 if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding)
843 dialectEmitter.emitVarInt(stringId, "dialect op name");
844 else
845 dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered(),
846 "dialect op name");
847 };
848 writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
849
850 emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter));
851}
852
853//===----------------------------------------------------------------------===//
854// Attributes and Types
855//===----------------------------------------------------------------------===//
856
857void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
858 EncodingEmitter attrTypeEmitter;
859 EncodingEmitter offsetEmitter;
860 offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()),
861 "attributes count");
862 offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()),
863 "types count");
864
865 // A functor used to emit an attribute or type entry.
866 uint64_t prevOffset = 0;
867 auto emitAttrOrType = [&](auto &entry) {
868 auto entryValue = entry.getValue();
869
870 auto emitAttrOrTypeRawImpl = [&]() -> void {
871 RawEmitterOstream(attrTypeEmitter) << entryValue;
872 attrTypeEmitter.emitByte(0, "attr/type separator");
873 };
874 auto emitAttrOrTypeImpl = [&]() -> bool {
875 // TODO: We don't currently support custom encoded mutable types and
876 // attributes.
877 if (entryValue.template hasTrait<TypeTrait::IsMutable>() ||
878 entryValue.template hasTrait<AttributeTrait::IsMutable>()) {
879 emitAttrOrTypeRawImpl();
880 return false;
881 }
882
883 DialectWriter dialectWriter(config.bytecodeVersion, attrTypeEmitter,
884 numberingState, stringSection,
885 config.dialectVersionMap);
886 if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
887 for (const auto &callback : config.typeWriterCallbacks) {
888 if (succeeded(callback->write(entryValue, dialectWriter)))
889 return true;
890 }
891 if (const BytecodeDialectInterface *interface =
892 entry.dialect->interface) {
893 if (succeeded(interface->writeType(entryValue, dialectWriter)))
894 return true;
895 }
896 } else {
897 for (const auto &callback : config.attributeWriterCallbacks) {
898 if (succeeded(callback->write(entryValue, dialectWriter)))
899 return true;
900 }
901 if (const BytecodeDialectInterface *interface =
902 entry.dialect->interface) {
903 if (succeeded(interface->writeAttribute(entryValue, dialectWriter)))
904 return true;
905 }
906 }
907
908 // If the entry was not emitted using a callback or a dialect interface,
909 // emit it using the textual format.
910 emitAttrOrTypeRawImpl();
911 return false;
912 };
913
914 bool hasCustomEncoding = emitAttrOrTypeImpl();
915
916 // Record the offset of this entry.
917 uint64_t curOffset = attrTypeEmitter.size();
918 offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding,
919 "attr/type offset");
920 prevOffset = curOffset;
921 };
922
923 // Emit the attribute and type entries for each dialect.
924 writeDialectGrouping(offsetEmitter, numberingState.getAttributes(),
925 emitAttrOrType);
926 writeDialectGrouping(offsetEmitter, numberingState.getTypes(),
927 emitAttrOrType);
928
929 // Emit the sections to the stream.
930 emitter.emitSection(bytecode::Section::kAttrTypeOffset,
931 std::move(offsetEmitter));
932 emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter));
933}
934
935//===----------------------------------------------------------------------===//
936// Operations
937//===----------------------------------------------------------------------===//
938
939LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter,
940 Block *block) {
942 bool hasArgs = !args.empty();
943
944 // Emit the number of operations in this block, and if it has arguments. We
945 // use the low bit of the operation count to indicate if the block has
946 // arguments.
947 unsigned numOps = numberingState.getOperationCount(block);
948 emitter.emitVarIntWithFlag(numOps, hasArgs, "block num ops");
949
950 // Emit the arguments of the block.
951 if (hasArgs) {
952 emitter.emitVarInt(args.size(), "block args count");
953 for (BlockArgument arg : args) {
954 Location argLoc = arg.getLoc();
955 if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) {
956 emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()),
957 !isa<UnknownLoc>(argLoc), "block arg type");
958 if (!isa<UnknownLoc>(argLoc))
959 emitter.emitVarInt(numberingState.getNumber(argLoc),
960 "block arg location");
961 } else {
962 emitter.emitVarInt(numberingState.getNumber(arg.getType()),
963 "block arg type");
964 emitter.emitVarInt(numberingState.getNumber(argLoc),
965 "block arg location");
966 }
967 }
968 if (config.bytecodeVersion >= bytecode::kUseListOrdering) {
969 uint64_t maskOffset = emitter.size();
970 uint8_t encodingMask = 0;
971 emitter.emitByte(0, "use-list separator");
972 writeUseListOrders(emitter, encodingMask, args);
973 if (encodingMask)
974 emitter.patchByte(maskOffset, encodingMask, "block patch encoding");
975 }
976 }
977
978 // Emit the operations within the block.
979 for (Operation &op : *block)
980 if (failed(writeOp(emitter, &op)))
981 return failure();
982 return success();
983}
984
985LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
986 emitter.emitVarInt(numberingState.getNumber(op->getName()), "op name ID");
987
988 // Emit a mask for the operation components. We need to fill this in later
989 // (when we actually know what needs to be emitted), so emit a placeholder for
990 // now.
991 uint64_t maskOffset = emitter.size();
992 uint8_t opEncodingMask = 0;
993 emitter.emitByte(0, "op separator");
994
995 // Emit the location for this operation.
996 emitter.emitVarInt(numberingState.getNumber(op->getLoc()), "op location");
997
998 // Emit the attributes of this operation.
999 DictionaryAttr attrs = op->getDiscardableAttrDictionary();
1000 // Allow deployment to version <kNativePropertiesEncoding by merging inherent
1001 // attribute with the discardable ones. We should fail if there are any
1002 // conflicts. When properties are not used by the op, also store everything as
1003 // attributes.
1004 if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding ||
1005 !op->getPropertiesStorage()) {
1006 attrs = op->getAttrDictionary();
1007 }
1008 if (!attrs.empty()) {
1009 opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
1010 emitter.emitVarInt(numberingState.getNumber(attrs), "op attrs count");
1011 }
1012
1013 // Emit the properties of this operation, for now we still support deployment
1014 // to version <kNativePropertiesEncoding.
1015 if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding) {
1016 std::optional<ssize_t> propertiesId = propertiesSection.emit(op);
1017 if (propertiesId.has_value()) {
1019 emitter.emitVarInt(*propertiesId, "op properties ID");
1020 }
1021 }
1022
1023 // Emit the result types of the operation.
1024 if (unsigned numResults = op->getNumResults()) {
1025 opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
1026 emitter.emitVarInt(numResults, "op results count");
1027 for (Type type : op->getResultTypes())
1028 emitter.emitVarInt(numberingState.getNumber(type), "op result type");
1029 }
1030
1031 // Emit the operands of the operation.
1032 if (unsigned numOperands = op->getNumOperands()) {
1034 emitter.emitVarInt(numOperands, "op operands count");
1035 for (Value operand : op->getOperands())
1036 emitter.emitVarInt(numberingState.getNumber(operand), "op operand types");
1037 }
1038
1039 // Emit the successors of the operation.
1040 if (unsigned numSuccessors = op->getNumSuccessors()) {
1042 emitter.emitVarInt(numSuccessors, "op successors count");
1043 for (Block *successor : op->getSuccessors())
1044 emitter.emitVarInt(numberingState.getNumber(successor), "op successor");
1045 }
1046
1047 // Emit the use-list orders to bytecode, so we can reconstruct the same order
1048 // at parsing.
1049 if (config.bytecodeVersion >= bytecode::kUseListOrdering)
1050 writeUseListOrders(emitter, opEncodingMask, ValueRange(op->getResults()));
1051
1052 // Check for regions.
1053 unsigned numRegions = op->getNumRegions();
1054 if (numRegions)
1056
1057 // Update the mask for the operation.
1058 emitter.patchByte(maskOffset, opEncodingMask, "op encoding mask");
1059
1060 // With the mask emitted, we can now emit the regions of the operation. We do
1061 // this after mask emission to avoid offset complications that may arise by
1062 // emitting the regions first (e.g. if the regions are huge, backpatching the
1063 // op encoding mask is more annoying).
1064 if (numRegions) {
1065 bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op);
1066 emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove,
1067 "op regions count");
1068
1069 // If the region is not isolated from above, or we are emitting bytecode
1070 // targeting version <kLazyLoading, we don't use a section.
1071 if (isIsolatedFromAbove &&
1072 config.bytecodeVersion >= bytecode::kLazyLoading) {
1073 EncodingEmitter regionEmitter;
1074 if (failed(writeRegions(regionEmitter, op->getRegions())))
1075 return failure();
1076 emitter.emitSection(bytecode::Section::kIR, std::move(regionEmitter));
1077
1078 } else if (failed(writeRegions(emitter, op->getRegions()))) {
1079 return failure();
1080 }
1081 }
1082 return success();
1083}
1084
1085void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter,
1086 uint8_t &opEncodingMask,
1087 ValueRange range) {
1088 // Loop over the results and store the use-list order per result index.
1090 for (auto item : llvm::enumerate(range)) {
1091 auto value = item.value();
1092 // No need to store a custom use-list order if the result does not have
1093 // multiple uses.
1094 if (value.use_empty() || value.hasOneUse())
1095 continue;
1096
1097 // For each result, assemble the list of pairs (use-list-index,
1098 // global-value-index). While doing so, detect if the global-value-index is
1099 // already ordered with respect to the use-list-index.
1100 bool alreadyOrdered = true;
1101 auto &firstUse = *value.use_begin();
1102 uint64_t prevID = bytecode::getUseID(
1103 firstUse, numberingState.getNumber(firstUse.getOwner()));
1104 llvm::SmallVector<std::pair<unsigned, uint64_t>> useListPairs(
1105 {{0, prevID}});
1106
1107 for (auto use : llvm::drop_begin(llvm::enumerate(value.getUses()))) {
1108 uint64_t currentID = bytecode::getUseID(
1109 use.value(), numberingState.getNumber(use.value().getOwner()));
1110 // The use-list order achieved when building the IR at parsing always
1111 // pushes new uses on front. Hence, if the order by unique ID is
1112 // monotonically decreasing, a roundtrip to bytecode preserves such order.
1113 alreadyOrdered &= (prevID > currentID);
1114 useListPairs.push_back({use.index(), currentID});
1115 prevID = currentID;
1116 }
1117
1118 // Do not emit if the order is already sorted.
1119 if (alreadyOrdered)
1120 continue;
1121
1122 // Sort the use indices by the unique ID indices in descending order.
1123 std::sort(
1124 useListPairs.begin(), useListPairs.end(),
1125 [](auto elem1, auto elem2) { return elem1.second > elem2.second; });
1126
1127 map.try_emplace(item.index(), llvm::map_range(useListPairs, [](auto elem) {
1128 return elem.first;
1129 }));
1130 }
1131
1132 if (map.empty())
1133 return;
1134
1136 // Emit the number of results that have a custom use-list order if the number
1137 // of results is greater than one.
1138 if (range.size() != 1) {
1139 emitter.emitVarInt(map.size(), "custom use-list size");
1140 }
1141
1142 for (const auto &item : map) {
1143 auto resultIdx = item.getFirst();
1144 auto useListOrder = item.getSecond();
1145
1146 // Compute the number of uses that are actually shuffled. If those are less
1147 // than half of the total uses, encoding the index pair `(src, dst)` is more
1148 // space efficient.
1149 size_t shuffledElements =
1150 llvm::count_if(llvm::enumerate(useListOrder),
1151 [](auto item) { return item.index() != item.value(); });
1152 bool indexPairEncoding = shuffledElements < (useListOrder.size() / 2);
1153
1154 // For single result, we don't need to store the result index.
1155 if (range.size() != 1)
1156 emitter.emitVarInt(resultIdx, "use-list result index");
1157
1158 if (indexPairEncoding) {
1159 emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding,
1160 "use-list index pair size");
1161 for (auto pair : llvm::enumerate(useListOrder)) {
1162 if (pair.index() != pair.value()) {
1163 emitter.emitVarInt(pair.value(), "use-list index pair first");
1164 emitter.emitVarInt(pair.index(), "use-list index pair second");
1165 }
1166 }
1167 } else {
1168 emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding,
1169 "use-list size");
1170 for (const auto &index : useListOrder)
1171 emitter.emitVarInt(index, "use-list order");
1172 }
1173 }
1174}
1175
1176LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter,
1177 Region *region) {
1178 // If the region is empty, we only need to emit the number of blocks (which is
1179 // zero).
1180 if (region->empty()) {
1181 emitter.emitVarInt(/*numBlocks*/ 0, "region block count empty");
1182 return success();
1183 }
1184
1185 // Emit the number of blocks and values within the region.
1186 unsigned numBlocks, numValues;
1187 std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
1188 emitter.emitVarInt(numBlocks, "region block count");
1189 emitter.emitVarInt(numValues, "region value count");
1190
1191 // Emit the blocks within the region.
1192 for (Block &block : *region)
1193 if (failed(writeBlock(emitter, &block)))
1194 return failure();
1195 return success();
1196}
1197
1198LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter,
1199 Operation *op) {
1200 EncodingEmitter irEmitter;
1201
1202 // Write the IR section the same way as a block with no arguments. Note that
1203 // the low-bit of the operation count for a block is used to indicate if the
1204 // block has arguments, which in this case is always false.
1205 irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false, "ir section");
1206
1207 // Emit the operations.
1208 if (failed(writeOp(irEmitter, op)))
1209 return failure();
1210
1211 emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter));
1212 return success();
1213}
1214
1215//===----------------------------------------------------------------------===//
1216// Resources
1217//===----------------------------------------------------------------------===//
1218
1219namespace {
1220/// This class represents a resource builder implementation for the MLIR
1221/// bytecode format.
1222class ResourceBuilder : public AsmResourceBuilder {
1223public:
1224 using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
1225
1226 ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
1227 PostProcessFn postProcessFn, bool shouldElideData)
1228 : emitter(emitter), stringSection(stringSection),
1229 postProcessFn(postProcessFn), shouldElideData(shouldElideData) {}
1230 ~ResourceBuilder() override = default;
1231
1232 void buildBlob(StringRef key, ArrayRef<char> data,
1233 uint32_t dataAlignment) final {
1234 if (!shouldElideData)
1235 emitter.emitOwnedBlobAndAlignment(data, dataAlignment, "resource blob");
1236 postProcessFn(key, AsmResourceEntryKind::Blob);
1237 }
1238 void buildBool(StringRef key, bool data) final {
1239 if (!shouldElideData)
1240 emitter.emitByte(data, "resource bool");
1241 postProcessFn(key, AsmResourceEntryKind::Bool);
1242 }
1243 void buildString(StringRef key, StringRef data) final {
1244 if (!shouldElideData)
1245 emitter.emitVarInt(stringSection.insert(data), "resource string");
1246 postProcessFn(key, AsmResourceEntryKind::String);
1247 }
1248
1249private:
1250 EncodingEmitter &emitter;
1251 StringSectionBuilder &stringSection;
1252 PostProcessFn postProcessFn;
1253 bool shouldElideData = false;
1254};
1255} // namespace
1256
1257void BytecodeWriter::writeResourceSection(Operation *op,
1258 EncodingEmitter &emitter) {
1259 EncodingEmitter resourceEmitter;
1260 EncodingEmitter resourceOffsetEmitter;
1261 uint64_t prevOffset = 0;
1263 curResourceEntries;
1264
1265 // Functor used to process the offset for a resource of `kind` defined by
1266 // 'key'.
1267 auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
1268 uint64_t curOffset = resourceEmitter.size();
1269 curResourceEntries.emplace_back(key, kind, curOffset - prevOffset);
1270 prevOffset = curOffset;
1271 };
1272
1273 // Functor used to emit a resource group defined by 'key'.
1274 auto emitResourceGroup = [&](uint64_t key) {
1275 resourceOffsetEmitter.emitVarInt(key, "resource group key");
1276 resourceOffsetEmitter.emitVarInt(curResourceEntries.size(),
1277 "resource group size");
1278 for (auto [key, kind, size] : curResourceEntries) {
1279 resourceOffsetEmitter.emitVarInt(stringSection.insert(key),
1280 "resource key");
1281 resourceOffsetEmitter.emitVarInt(size, "resource size");
1282 resourceOffsetEmitter.emitByte(kind, "resource kind");
1283 }
1284 };
1285
1286 // Builder used to emit resources.
1287 ResourceBuilder entryBuilder(resourceEmitter, stringSection,
1288 appendResourceOffset,
1289 config.shouldElideResourceData);
1290
1291 // Emit the external resource entries.
1292 resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size(),
1293 "external resource printer count");
1294 for (const auto &printer : config.externalResourcePrinters) {
1295 curResourceEntries.clear();
1296 printer->buildResources(op, entryBuilder);
1297 emitResourceGroup(stringSection.insert(printer->getName()));
1298 }
1299
1300 // Emit the dialect resource entries.
1301 for (DialectNumbering &dialect : numberingState.getDialects()) {
1302 if (!dialect.asmInterface)
1303 continue;
1304 curResourceEntries.clear();
1305 dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder);
1306
1307 // Emit the declaration resources for this dialect, these didn't get emitted
1308 // by the interface. These resources don't have data attached, so just use a
1309 // "blob" kind as a placeholder.
1310 for (const auto &resource : dialect.resourceMap)
1311 if (resource.second->isDeclaration)
1312 appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
1313
1314 // Emit the resource group for this dialect.
1315 if (!curResourceEntries.empty())
1316 emitResourceGroup(dialect.number);
1317 }
1318
1319 // If we didn't emit any resource groups, elide the resource sections.
1320 if (resourceOffsetEmitter.size() == 0)
1321 return;
1322
1323 emitter.emitSection(bytecode::Section::kResourceOffset,
1324 std::move(resourceOffsetEmitter));
1325 emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter));
1326}
1327
1328//===----------------------------------------------------------------------===//
1329// Strings
1330//===----------------------------------------------------------------------===//
1331
1332void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
1333 EncodingEmitter stringEmitter;
1334 stringSection.write(stringEmitter);
1335 emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter));
1336}
1337
1338//===----------------------------------------------------------------------===//
1339// Properties
1340//===----------------------------------------------------------------------===//
1341
1342void BytecodeWriter::writePropertiesSection(EncodingEmitter &emitter) {
1343 EncodingEmitter propertiesEmitter;
1344 propertiesSection.write(propertiesEmitter);
1345 emitter.emitSection(bytecode::Section::kProperties,
1346 std::move(propertiesEmitter));
1347}
1348
1349//===----------------------------------------------------------------------===//
1350// Entry Points
1351//===----------------------------------------------------------------------===//
1352
1355 BytecodeWriter writer(op, config);
1356 return writer.write(op, os);
1357}
return success()
static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, EntryCallbackT &&callback)
Write the given entries in contiguous groups with the same parent dialect.
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
static LogicalResult emit(SolverOp solver, const SMTEmissionOptions &options, mlir::raw_indented_ostream &stream)
Emit the SMT operations in the given 'solver' to the 'stream'.
This class is used to build resource entries for use by the printer.
Definition AsmState.h:247
A class to interact with the attributes and types printer when emitting MLIR bytecode.
This class represents an argument of a Block.
Definition Value.h:309
Block represents an ordered list of Operations.
Definition Block.h:33
BlockArgListType getArguments()
Definition Block.h:87
This class contains the configuration used for the bytecode writer.
llvm::StringMap< std::unique_ptr< DialectVersion > > & getDialectVersionMap() const
A map containing the dialect versions to emit.
void setElideResourceDataFlag(bool shouldElideResourceData=true)
Set a boolean flag to skip emission of resources into the bytecode file.
BytecodeWriterConfig(StringRef producer="MLIR" LLVM_VERSION_STRING)
producer is an optional string that can be used to identify the producer of the bytecode when reading...
void attachFallbackResourcePrinter(FallbackAsmResourceMap &map)
Attach resource printers to the AsmState for the fallback resources in the given map.
void attachTypeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Type > > callback)
int64_t getDesiredBytecodeVersion() const
Get the set desired bytecode version to emit.
void setDialectVersion(std::unique_ptr< DialectVersion > dialectVersion) const
Set a given dialect version to emit on the map.
void attachAttributeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > callback)
Attach a custom bytecode printer callback to the configuration for the emission of custom type/attrib...
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > getTypeWriterCallbacks() const
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > getAttributeWriterCallbacks() const
Retrieve the callbacks.
void setDesiredBytecodeVersion(int64_t bytecodeVersion)
Set the desired bytecode version to emit.
void attachResourcePrinter(std::unique_ptr< AsmResourcePrinter > printer)
Attach the given resource printer to the writer configuration.
This class defines a virtual interface for writing to a bytecode stream, providing hooks into the byt...
A fallback map containing external resources not explicitly handled by another parser/printer.
Definition AsmState.h:421
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
DictionaryAttr getAttrDictionary()
Return all of the attributes on this operation as a DictionaryAttr.
unsigned getNumSuccessors()
Definition Operation.h:706
bool isRegistered()
Returns true if this operation has a registered operation description, otherwise false.
Definition Operation.h:129
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition Operation.h:674
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
unsigned getNumOperands()
Definition Operation.h:346
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119
DictionaryAttr getDiscardableAttrDictionary()
Return all of the discardable attributes on this operation as a DictionaryAttr.
Definition Operation.h:501
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition Operation.h:677
result_type_range getResultTypes()
Definition Operation.h:428
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:378
SuccessorRange getSuccessors()
Definition Operation.h:703
result_range getResults()
Definition Operation.h:415
int getPropertiesStorageSize() const
Returns the properties storage size.
Definition Operation.h:896
OpaqueProperties getPropertiesStorage()
Returns the properties storage.
Definition Operation.h:900
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:404
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
bool empty()
Definition Region.h:60
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
@ kAttrType
This section contains the attributes and types referenced within an IR module.
Definition Encoding.h:73
@ kAttrTypeOffset
This section contains the offsets for the attribute and types within the AttrType section.
Definition Encoding.h:77
@ kIR
This section contains the list of operations serialized into the bytecode, and their nested regions/o...
Definition Encoding.h:81
@ kResource
This section contains the resources of the bytecode.
Definition Encoding.h:84
@ kResourceOffset
This section contains the offsets of resources within the Resource section.
Definition Encoding.h:88
@ kDialect
This section contains the dialects referenced within an IR module.
Definition Encoding.h:69
@ kString
This section contains strings referenced within the bytecode.
Definition Encoding.h:66
@ kDialectVersions
This section contains the versions of each dialect.
Definition Encoding.h:91
@ kProperties
This section contains the properties for the operations.
Definition Encoding.h:94
static uint64_t getUseID(OperandT &val, unsigned ownerID)
Get the unique ID of a value use.
Definition Encoding.h:127
@ kUseListOrdering
Use-list ordering started to be encoded in version 3.
Definition Encoding.h:38
@ kAlignmentByte
An arbitrary value used to fill alignment padding.
Definition Encoding.h:56
@ kVersion
The current bytecode version.
Definition Encoding.h:53
@ kLazyLoading
Support for lazy-loading of isolated region was added in version 2.
Definition Encoding.h:35
@ kDialectVersioning
Dialects versioning was added in version 1.
Definition Encoding.h:32
@ kElideUnknownBlockArgLocation
Avoid recording unknown locations on block arguments (compression) started in version 4.
Definition Encoding.h:42
@ kNativePropertiesEncoding
Support for encoding properties natively in bytecode instead of merged with the discardable attribute...
Definition Encoding.h:46
@ kMinSupportedVersion
The minimum supported version of the bytecode.
Definition Encoding.h:29
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:561
Include the generated interface declarations.
const FrozenRewritePatternSet GreedyRewriteConfig config
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:126
AsmResourceEntryKind
This enum represents the different kinds of resource values.
Definition AsmState.h:280
@ Blob
A blob of data with an accompanying alignment.
Definition AsmState.h:282
LogicalResult writeBytecodeToFile(Operation *op, raw_ostream &os, const BytecodeWriterConfig &config={})
Write the bytecode for the given operation to the provided output stream.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152
StringRef producer
The producer of the bytecode.
llvm::StringMap< std::unique_ptr< DialectVersion > > dialectVersionMap
A map containing dialect version information for each dialect to emit.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > attributeWriterCallbacks
Printer callbacks used to emit custom type and attribute encodings.
SmallVector< std::unique_ptr< AsmResourcePrinter > > externalResourcePrinters
A collection of non-dialect resource printers.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > typeWriterCallbacks
int64_t bytecodeVersion
Version to use when writing.
bool shouldElideResourceData
A flag specifying whether to elide emission of resources into the bytecode file.
This class represents a numbering entry for an Dialect.
unsigned number
The number assigned to the dialect.