MLIR 23.0.0git
BytecodeWriter.cpp
Go to the documentation of this file.
1//===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "IRNumbering.h"
14#include "mlir/IR/Attributes.h"
15#include "mlir/IR/Diagnostics.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/CachedHashString.h"
19#include "llvm/ADT/MapVector.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Support/Debug.h"
22#include "llvm/Support/DebugLog.h"
23#include "llvm/Support/Endian.h"
24#include "llvm/Support/raw_ostream.h"
25#include <optional>
26
27#define DEBUG_TYPE "mlir-bytecode-writer"
28
29using namespace mlir;
30using namespace mlir::bytecode::detail;
31
32//===----------------------------------------------------------------------===//
33// BytecodeWriterConfig
34//===----------------------------------------------------------------------===//
35
37 Impl(StringRef producer) : producer(producer) {}
38
39 /// Version to use when writing.
40 /// Note: This only differs from kVersion if a specific version is set.
42
43 /// A flag specifying whether to elide emission of resources into the bytecode
44 /// file.
46
47 /// A map containing dialect version information for each dialect to emit.
48 llvm::StringMap<std::unique_ptr<DialectVersion>> dialectVersionMap;
49
50 /// The producer of the bytecode.
51 StringRef producer;
52
53 /// Printer callbacks used to emit custom type and attribute encodings.
58
59 /// A collection of non-dialect resource printers.
61};
62
64 : impl(std::make_unique<Impl>(producer)) {}
72
74
77 return impl->attributeWriterCallbacks;
78}
79
82 return impl->typeWriterCallbacks;
83}
84
86 std::unique_ptr<AttrTypeBytecodeWriter<Attribute>> callback) {
87 impl->attributeWriterCallbacks.emplace_back(std::move(callback));
88}
89
91 std::unique_ptr<AttrTypeBytecodeWriter<Type>> callback) {
92 impl->typeWriterCallbacks.emplace_back(std::move(callback));
93}
94
96 std::unique_ptr<AsmResourcePrinter> printer) {
97 impl->externalResourcePrinters.emplace_back(std::move(printer));
98}
99
101 bool shouldElideResourceData) {
102 impl->shouldElideResourceData = shouldElideResourceData;
103}
104
106 impl->bytecodeVersion = bytecodeVersion;
107}
108
110 return impl->bytecodeVersion;
111}
112
113llvm::StringMap<std::unique_ptr<DialectVersion>> &
115 return impl->dialectVersionMap;
116}
117
119 llvm::StringRef dialectName,
120 std::unique_ptr<DialectVersion> dialectVersion) const {
121 assert(!impl->dialectVersionMap.contains(dialectName) &&
122 "cannot override a previously set dialect version");
123 impl->dialectVersionMap.insert({dialectName, std::move(dialectVersion)});
124}
125
126//===----------------------------------------------------------------------===//
127// EncodingEmitter
128//===----------------------------------------------------------------------===//
129
130namespace {
131/// This class functions as the underlying encoding emitter for the bytecode
132/// writer. This class is a bit different compared to other types of encoders;
133/// it does not use a single buffer, but instead may contain several buffers
134/// (some owned by the writer, and some not) that get concatted during the final
135/// emission.
136class EncodingEmitter {
137public:
138 EncodingEmitter() = default;
139 EncodingEmitter(const EncodingEmitter &) = delete;
140 EncodingEmitter &operator=(const EncodingEmitter &) = delete;
141
142 /// Write the current contents to the provided stream.
143 void writeTo(raw_ostream &os) const;
144
145 /// Return the current size of the encoded buffer.
146 size_t size() const { return prevResultSize + currentResult.size(); }
147
148 //===--------------------------------------------------------------------===//
149 // Emission
150 //===--------------------------------------------------------------------===//
151
152 /// Backpatch a byte in the result buffer at the given offset.
153 void patchByte(uint64_t offset, uint8_t value, StringLiteral desc) {
154 LDBG() << "patchByte(" << offset << ',' << uint64_t(value) << ")\t" << desc;
155 assert(offset < size() && offset >= prevResultSize &&
156 "cannot patch previously emitted data");
157 currentResult[offset - prevResultSize] = value;
158 }
159
160 /// Emit the provided blob of data, which is owned by the caller and is
161 /// guaranteed to not die before the end of the bytecode process.
162 void emitOwnedBlob(ArrayRef<uint8_t> data, StringLiteral desc) {
163 LDBG() << "emitOwnedBlob(" << data.size() << "b)\t" << desc;
164 // Push the current buffer before adding the provided data.
165 appendResult(std::move(currentResult));
166 appendOwnedResult(data);
167 }
168
169 /// Emit the provided blob of data that has the given alignment, which is
170 /// owned by the caller and is guaranteed to not die before the end of the
171 /// bytecode process. The alignment value is also encoded, making it available
172 /// on load.
173 void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment,
174 StringLiteral desc) {
175 emitVarInt(alignment, desc);
176 emitVarInt(data.size(), desc);
177
178 alignTo(alignment);
179 emitOwnedBlob(data, desc);
180 }
181 void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment,
182 StringLiteral desc) {
183 ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
184 data.size());
185 emitOwnedBlobAndAlignment(castedData, alignment, desc);
186 }
187
188 /// Align the emitter to the given alignment.
189 void alignTo(unsigned alignment) {
190 if (alignment < 2)
191 return;
192 assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
193
194 // Check to see if we need to emit any padding bytes to meet the desired
195 // alignment.
196 size_t curOffset = size();
197 size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
198 while (paddingSize--)
199 emitByte(bytecode::kAlignmentByte, "alignment byte");
200
201 // Keep track of the maximum required alignment.
202 requiredAlignment = std::max(requiredAlignment, alignment);
203 }
204
205 //===--------------------------------------------------------------------===//
206 // Integer Emission
207
208 /// Emit a single byte.
209 template <typename T>
210 void emitByte(T byte, StringLiteral desc) {
211 LDBG() << "emitByte(" << uint64_t(byte) << ")\t" << desc;
212 currentResult.push_back(static_cast<uint8_t>(byte));
213 }
214
215 /// Emit a range of bytes.
216 void emitBytes(ArrayRef<uint8_t> bytes, StringLiteral desc) {
217 LDBG() << "emitBytes(" << bytes.size() << "b)\t" << desc;
218 llvm::append_range(currentResult, bytes);
219 }
220
221 /// Emit a variable length integer. The first encoded byte contains a prefix
222 /// in the low bits indicating the encoded length of the value. This length
223 /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
224 /// indicate the number of _additional_ bytes (not including the prefix byte).
225 /// All remaining bits in the first byte, along with all of the bits in
226 /// additional bytes, provide the value of the integer encoded in
227 /// little-endian order.
228 void emitVarInt(uint64_t value, StringLiteral desc) {
229 LDBG() << "emitVarInt(" << value << ")\t" << desc;
230
231 // In the most common case, the value can be represented in a single byte.
232 // Given how hot this case is, explicitly handle that here.
233 if ((value >> 7) == 0)
234 return emitByte((value << 1) | 0x1, desc);
235 emitMultiByteVarInt(value, desc);
236 }
237
238 /// Emit a signed variable length integer. Signed varints are encoded using
239 /// a varint with zigzag encoding, meaning that we use the low bit of the
240 /// value to indicate the sign of the value. This allows for more efficient
241 /// encoding of negative values by limiting the number of active bits
242 void emitSignedVarInt(uint64_t value, StringLiteral desc) {
243 emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63), desc);
244 }
245
246 /// Emit a variable length integer whose low bit is used to encode the
247 /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
248 void emitVarIntWithFlag(uint64_t value, bool flag, StringLiteral desc) {
249 emitVarInt((value << 1) | (flag ? 1 : 0), desc);
250 }
251
252 //===--------------------------------------------------------------------===//
253 // String Emission
254
255 /// Emit the given string as a nul terminated string.
256 void emitNulTerminatedString(StringRef str, StringLiteral desc) {
257 emitString(str, desc);
258 emitByte(0, "null terminator");
259 }
260
261 /// Emit the given string without a nul terminator.
262 void emitString(StringRef str, StringLiteral desc) {
263 emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()},
264 desc);
265 }
266
267 //===--------------------------------------------------------------------===//
268 // Section Emission
269
270 /// Emit a nested section of the given code, whose contents are encoded in the
271 /// provided emitter.
272 void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
273 // Emit the section code and length. The high bit of the code is used to
274 // indicate whether the section alignment is present, so save an offset to
275 // it.
276 uint64_t codeOffset = currentResult.size();
277 emitByte(code, "section code");
278 emitVarInt(emitter.size(), "section size");
279
280 // Integrate the alignment of the section into this emitter if necessary.
281 unsigned emitterAlign = emitter.requiredAlignment;
282 if (emitterAlign > 1) {
283 if (size() & (emitterAlign - 1)) {
284 emitVarInt(emitterAlign, "section alignment");
285 alignTo(emitterAlign);
286
287 // Indicate that we needed to align the section, the high bit of the
288 // code field is used for this.
289 currentResult[codeOffset] |= 0b10000000;
290 } else {
291 // Otherwise, if we happen to be at a compatible offset, we just
292 // remember that we need this alignment.
293 requiredAlignment = std::max(requiredAlignment, emitterAlign);
294 }
295 }
296
297 // Push our current buffer and then merge the provided section body into
298 // ours.
299 appendResult(std::move(currentResult));
300 for (std::vector<uint8_t> &result : emitter.prevResultStorage)
301 prevResultStorage.push_back(std::move(result));
302 llvm::append_range(prevResultList, emitter.prevResultList);
303 prevResultSize += emitter.prevResultSize;
304 appendResult(std::move(emitter.currentResult));
305 }
306
307private:
308 /// Emit the given value using a variable width encoding. This method is a
309 /// fallback when the number of bytes needed to encode the value is greater
310 /// than 1. We mark it noinline here so that the single byte hot path isn't
311 /// pessimized.
312 LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value,
313 StringLiteral desc);
314
315 /// Append a new result buffer to the current contents.
316 void appendResult(std::vector<uint8_t> &&result) {
317 if (result.empty())
318 return;
319 prevResultStorage.emplace_back(std::move(result));
320 appendOwnedResult(prevResultStorage.back());
321 }
322 void appendOwnedResult(ArrayRef<uint8_t> result) {
323 if (result.empty())
324 return;
325 prevResultSize += result.size();
326 prevResultList.emplace_back(result);
327 }
328
329 /// The result of the emitter currently being built. We refrain from building
330 /// a single buffer to simplify emitting sections, large data, and more. The
331 /// result is thus represented using multiple distinct buffers, some of which
332 /// we own (via prevResultStorage), and some of which are just pointers into
333 /// externally owned buffers.
334 std::vector<uint8_t> currentResult;
335 std::vector<ArrayRef<uint8_t>> prevResultList;
336 std::vector<std::vector<uint8_t>> prevResultStorage;
337
338 /// An up-to-date total size of all of the buffers within `prevResultList`.
339 /// This enables O(1) size checks of the current encoding.
340 size_t prevResultSize = 0;
341
342 /// The highest required alignment for the start of this section.
343 unsigned requiredAlignment = 1;
344};
345
346//===----------------------------------------------------------------------===//
347// StringSectionBuilder
348//===----------------------------------------------------------------------===//
349
350namespace {
351/// This class is used to simplify the process of emitting the string section.
352class StringSectionBuilder {
353public:
354 /// Add the given string to the string section, and return the index of the
355 /// string within the section.
356 size_t insert(StringRef str) {
357 auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()});
358 return it.first->second;
359 }
360
361 /// Write the current set of strings to the given emitter.
362 void write(EncodingEmitter &emitter) {
363 emitter.emitVarInt(strings.size(), "string section size");
364
365 // Emit the sizes in reverse order, so that we don't need to backpatch an
366 // offset to the string data or have a separate section.
367 for (const auto &it : llvm::reverse(strings))
368 emitter.emitVarInt(it.first.size() + 1, "string size");
369 // Emit the string data itself.
370 for (const auto &it : strings)
371 emitter.emitNulTerminatedString(it.first.val(), "string");
372 }
373
374private:
375 /// A set of strings referenced within the bytecode. The value of the map is
376 /// unused.
377 llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
378};
379} // namespace
380
381class DialectWriter : public DialectBytecodeWriter {
382 using DialectVersionMapT = llvm::StringMap<std::unique_ptr<DialectVersion>>;
383
384public:
385 DialectWriter(int64_t bytecodeVersion, EncodingEmitter &emitter,
386 IRNumberingState &numberingState,
387 StringSectionBuilder &stringSection,
388 const DialectVersionMapT &dialectVersionMap)
389 : bytecodeVersion(bytecodeVersion), emitter(emitter),
390 numberingState(numberingState), stringSection(stringSection),
391 dialectVersionMap(dialectVersionMap) {}
392
393 //===--------------------------------------------------------------------===//
394 // IR
395 //===--------------------------------------------------------------------===//
396
397 void writeAttribute(Attribute attr) override {
398 emitter.emitVarInt(numberingState.getNumber(attr), "dialect attr");
399 }
400 void writeOptionalAttribute(Attribute attr) override {
401 if (!attr) {
402 emitter.emitVarInt(0, "dialect optional attr none");
403 return;
404 }
405 emitter.emitVarIntWithFlag(numberingState.getNumber(attr), true,
406 "dialect optional attr");
407 }
408
409 void writeType(Type type) override {
410 emitter.emitVarInt(numberingState.getNumber(type), "dialect type");
411 }
412
413 void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
414 emitter.emitVarInt(numberingState.getNumber(resource), "dialect resource");
415 }
416
417 //===--------------------------------------------------------------------===//
418 // Primitives
419 //===--------------------------------------------------------------------===//
420
421 void writeVarInt(uint64_t value) override {
422 emitter.emitVarInt(value, "dialect writer");
423 }
424
425 void writeSignedVarInt(int64_t value) override {
426 emitter.emitSignedVarInt(value, "dialect writer");
427 }
428
429 void writeAPIntWithKnownWidth(const APInt &value) override {
430 size_t bitWidth = value.getBitWidth();
431
432 // If the value is a single byte, just emit it directly without going
433 // through a varint.
434 if (bitWidth <= 8)
435 return emitter.emitByte(value.getLimitedValue(), "dialect APInt");
436
437 // If the value fits within a single varint, emit it directly.
438 if (bitWidth <= 64)
439 return emitter.emitSignedVarInt(value.getLimitedValue(), "dialect APInt");
440
441 // Otherwise, we need to encode a variable number of active words. We use
442 // active words instead of the number of total words under the observation
443 // that smaller values will be more common.
444 unsigned numActiveWords = value.getActiveWords();
445 emitter.emitVarInt(numActiveWords, "dialect APInt word count");
446
447 const uint64_t *rawValueData = value.getRawData();
448 for (unsigned i = 0; i < numActiveWords; ++i)
449 emitter.emitSignedVarInt(rawValueData[i], "dialect APInt word");
450 }
451
452 void writeAPFloatWithKnownSemantics(const APFloat &value) override {
453 writeAPIntWithKnownWidth(value.bitcastToAPInt());
454 }
455
456 void writeOwnedString(StringRef str) override {
457 emitter.emitVarInt(stringSection.insert(str), "dialect string");
458 }
459
460 void writeOwnedBlob(ArrayRef<char> blob) override {
461 emitter.emitVarInt(blob.size(), "dialect blob");
462 emitter.emitOwnedBlob(
463 ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()),
464 blob.size()),
465 "dialect blob");
466 }
467
468 void writeUnownedBlob(ArrayRef<char> blob) override {
469 emitter.emitVarInt(blob.size(), "dialect blob");
470 emitter.emitBytes(
471 ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(blob.data()),
472 blob.size()),
473 "dialect blob");
474 }
475
476 void writeOwnedBool(bool value) override {
477 emitter.emitByte(value, "dialect bool");
478 }
479
480 int64_t getBytecodeVersion() const override { return bytecodeVersion; }
481
482 FailureOr<const DialectVersion *>
483 getDialectVersion(StringRef dialectName) const override {
484 auto dialectEntry = dialectVersionMap.find(dialectName);
485 if (dialectEntry == dialectVersionMap.end())
486 return failure();
487 return dialectEntry->getValue().get();
488 }
489
490private:
491 int64_t bytecodeVersion;
492 EncodingEmitter &emitter;
493 IRNumberingState &numberingState;
494 StringSectionBuilder &stringSection;
495 const DialectVersionMapT &dialectVersionMap;
496};
497
498namespace {
499class PropertiesSectionBuilder {
500public:
501 PropertiesSectionBuilder(IRNumberingState &numberingState,
502 StringSectionBuilder &stringSection,
503 const BytecodeWriterConfig::Impl &config)
504 : numberingState(numberingState), stringSection(stringSection),
505 config(config) {}
506
507 /// Emit the op properties in the properties section and return the index of
508 /// the properties within the section. Return -1 if no properties was emitted.
509 std::optional<ssize_t> emit(Operation *op) {
510 EncodingEmitter propertiesEmitter;
511 if (!op->getPropertiesStorageSize())
512 return std::nullopt;
513 if (!op->isRegistered()) {
514 // Unregistered op are storing properties as an optional attribute.
515 Attribute prop = *op->getPropertiesStorage().as<Attribute *>();
516 if (!prop)
517 return std::nullopt;
518 EncodingEmitter sizeEmitter;
519 sizeEmitter.emitVarInt(numberingState.getNumber(prop), "properties size");
520 scratch.clear();
521 llvm::raw_svector_ostream os(scratch);
522 sizeEmitter.writeTo(os);
523 return emit(scratch);
524 }
525
526 EncodingEmitter emitter;
527 DialectWriter propertiesWriter(config.bytecodeVersion, emitter,
528 numberingState, stringSection,
529 config.dialectVersionMap);
530 auto iface = cast<BytecodeOpInterface>(op);
531 iface.writeProperties(propertiesWriter);
532 scratch.clear();
533 llvm::raw_svector_ostream os(scratch);
534 emitter.writeTo(os);
535 return emit(scratch);
536 }
537
538 /// Write the current set of properties to the given emitter.
539 void write(EncodingEmitter &emitter) {
540 emitter.emitVarInt(propertiesStorage.size(), "properties size");
541 if (propertiesStorage.empty())
542 return;
543 for (const auto &storage : propertiesStorage) {
544 if (storage.empty()) {
545 emitter.emitBytes(ArrayRef<uint8_t>(), "empty properties");
546 continue;
547 }
548 emitter.emitBytes(ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]),
549 storage.size()),
550 "property");
551 }
552 }
553
554 /// Returns true if the section is empty.
555 bool empty() { return propertiesStorage.empty(); }
556
557private:
558 /// Emit raw data and returns the offset in the internal buffer.
559 /// Data are deduplicated and will be copied in the internal buffer only if
560 /// they don't exist there already.
561 ssize_t emit(ArrayRef<char> rawProperties) {
562 // Populate a scratch buffer with the properties size.
563 SmallVector<char> sizeScratch;
564 {
565 EncodingEmitter sizeEmitter;
566 sizeEmitter.emitVarInt(rawProperties.size(), "properties");
567 llvm::raw_svector_ostream os(sizeScratch);
568 sizeEmitter.writeTo(os);
569 }
570 // Append a new storage to the table now.
571 size_t index = propertiesStorage.size();
572 propertiesStorage.emplace_back();
573 std::vector<char> &newStorage = propertiesStorage.back();
574 size_t propertiesSize = sizeScratch.size() + rawProperties.size();
575 newStorage.reserve(propertiesSize);
576 llvm::append_range(newStorage, sizeScratch);
577 llvm::append_range(newStorage, rawProperties);
578
579 // Try to de-duplicate the new serialized properties.
580 // If the properties is a duplicate, pop it back from the storage.
581 auto inserted = propertiesUniquing.insert(
582 std::make_pair(ArrayRef<char>(newStorage), index));
583 if (!inserted.second)
584 propertiesStorage.pop_back();
585 return inserted.first->getSecond();
586 }
587
588 /// Storage for properties.
589 std::vector<std::vector<char>> propertiesStorage;
590 SmallVector<char> scratch;
591 DenseMap<ArrayRef<char>, int64_t> propertiesUniquing;
592 IRNumberingState &numberingState;
593 StringSectionBuilder &stringSection;
594 const BytecodeWriterConfig::Impl &config;
595};
596} // namespace
597
598/// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
599/// to go through an intermediate buffer when interacting with code that wants a
600/// raw_ostream.
601class RawEmitterOstream : public raw_ostream {
602public:
603 explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
604 SetUnbuffered();
605 }
606
607private:
608 void write_impl(const char *ptr, size_t size) override {
609 emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size},
610 "raw emitter");
611 }
612 uint64_t current_pos() const override { return emitter.size(); }
613
614 /// The section being emitted to.
615 EncodingEmitter &emitter;
616};
617} // namespace
618
619void EncodingEmitter::writeTo(raw_ostream &os) const {
620 // Reserve space in the ostream for the encoded contents.
621 os.reserveExtraSpace(size());
622
623 for (auto &prevResult : prevResultList)
624 os.write((const char *)prevResult.data(), prevResult.size());
625 os.write((const char *)currentResult.data(), currentResult.size());
626}
627
628void EncodingEmitter::emitMultiByteVarInt(uint64_t value, StringLiteral desc) {
629 // Compute the number of bytes needed to encode the value. Each byte can hold
630 // up to 7-bits of data. We only check up to the number of bits we can encode
631 // in the first byte (8).
632 uint64_t it = value >> 7;
633 for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
634 if (LLVM_LIKELY(it >>= 7) == 0) {
635 uint64_t encodedValue = (value << 1) | 0x1;
636 encodedValue <<= (numBytes - 1);
637 llvm::support::ulittle64_t encodedValueLE(encodedValue);
638 emitBytes({reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes}, desc);
639 return;
640 }
641 }
642
643 // If the value is too large to encode in a single byte, emit a special all
644 // zero marker byte and splat the value directly.
645 emitByte(0, desc);
646 llvm::support::ulittle64_t valueLE(value);
647 emitBytes({reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)}, desc);
648}
649
650//===----------------------------------------------------------------------===//
651// Bytecode Writer
652//===----------------------------------------------------------------------===//
653
654namespace {
655class BytecodeWriter {
656public:
657 BytecodeWriter(Operation *op, const BytecodeWriterConfig &config)
658 : numberingState(op, config), config(config.getImpl()),
659 propertiesSection(numberingState, stringSection, config.getImpl()) {}
660
661 /// Write the bytecode for the given root operation.
662 LogicalResult write(Operation *rootOp, raw_ostream &os);
663
664private:
665 //===--------------------------------------------------------------------===//
666 // Dialects
667
668 void writeDialectSection(EncodingEmitter &emitter);
669
670 //===--------------------------------------------------------------------===//
671 // Attributes and Types
672
673 void writeAttrTypeSection(EncodingEmitter &emitter);
674
675 //===--------------------------------------------------------------------===//
676 // Operations
677
678 LogicalResult writeBlock(EncodingEmitter &emitter, Block *block);
679 LogicalResult writeOp(EncodingEmitter &emitter, Operation *op);
680 LogicalResult writeRegion(EncodingEmitter &emitter, Region *region);
681 LogicalResult writeIRSection(EncodingEmitter &emitter, Operation *op);
682
683 LogicalResult writeRegions(EncodingEmitter &emitter,
684 MutableArrayRef<Region> regions) {
685 return success(llvm::all_of(regions, [&](Region &region) {
686 return succeeded(writeRegion(emitter, &region));
687 }));
688 }
689
690 //===--------------------------------------------------------------------===//
691 // Resources
692
693 void writeResourceSection(Operation *op, EncodingEmitter &emitter);
694
695 //===--------------------------------------------------------------------===//
696 // Strings
697
698 void writeStringSection(EncodingEmitter &emitter);
699
700 //===--------------------------------------------------------------------===//
701 // Properties
702
703 void writePropertiesSection(EncodingEmitter &emitter);
704
705 //===--------------------------------------------------------------------===//
706 // Helpers
707
708 void writeUseListOrders(EncodingEmitter &emitter, uint8_t &opEncodingMask,
709 ValueRange range);
710
711 //===--------------------------------------------------------------------===//
712 // Fields
713
714 /// The builder used for the string section.
715 StringSectionBuilder stringSection;
716
717 /// The IR numbering state generated for the root operation.
718 IRNumberingState numberingState;
719
720 /// Configuration dictating bytecode emission.
721 const BytecodeWriterConfig::Impl &config;
722
723 /// Storage for the properties section
724 PropertiesSectionBuilder propertiesSection;
725};
726} // namespace
727
728LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
729 EncodingEmitter emitter;
730
731 // Emit the bytecode file header. This is how we identify the output as a
732 // bytecode file.
733 emitter.emitString("ML\xefR", "bytecode header");
734
735 // Emit the bytecode version.
736 if (config.bytecodeVersion < bytecode::kMinSupportedVersion ||
737 config.bytecodeVersion > bytecode::kVersion)
738 return rootOp->emitError()
739 << "unsupported version requested " << config.bytecodeVersion
740 << ", must be in range ["
741 << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", "
742 << static_cast<int64_t>(bytecode::kVersion) << ']';
743 emitter.emitVarInt(config.bytecodeVersion, "bytecode version");
744
745 // Emit the producer.
746 emitter.emitNulTerminatedString(config.producer, "bytecode producer");
747
748 // Emit the dialect section.
749 writeDialectSection(emitter);
750
751 // Emit the attributes and types section.
752 writeAttrTypeSection(emitter);
753
754 // Emit the IR section.
755 if (failed(writeIRSection(emitter, rootOp)))
756 return failure();
757
758 // Emit the resources section.
759 writeResourceSection(rootOp, emitter);
760
761 // Emit the string section.
762 writeStringSection(emitter);
763
764 // Emit the properties section.
765 if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding)
766 writePropertiesSection(emitter);
767 else if (!propertiesSection.empty())
768 return rootOp->emitError(
769 "unexpected properties emitted incompatible with bytecode <5");
770
771 // Write the generated bytecode to the provided output stream.
772 emitter.writeTo(os);
773
774 return success();
775}
776
777//===----------------------------------------------------------------------===//
778// Dialects
779//===----------------------------------------------------------------------===//
780
781/// Write the given entries in contiguous groups with the same parent dialect.
782/// Each dialect sub-group is encoded with the parent dialect and number of
783/// elements, followed by the encoding for the entries. The given callback is
784/// invoked to encode each individual entry.
785template <typename EntriesT, typename EntryCallbackT>
786static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
787 EntryCallbackT &&callback) {
788 for (auto it = entries.begin(), e = entries.end(); it != e;) {
789 auto groupStart = it++;
790
791 // Find the end of the group that shares the same parent dialect.
792 DialectNumbering *currentDialect = groupStart->dialect;
793 it = std::find_if(it, e, [&](const auto &entry) {
794 return entry.dialect != currentDialect;
795 });
796
797 // Emit the dialect and number of elements.
798 emitter.emitVarInt(currentDialect->number, "dialect number");
799 emitter.emitVarInt(std::distance(groupStart, it), "dialect offset");
800
801 // Emit the entries within the group.
802 for (auto &entry : llvm::make_range(groupStart, it))
803 callback(entry);
804 }
805}
806
807void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
808 EncodingEmitter dialectEmitter;
809
810 // Emit the referenced dialects.
811 auto dialects = numberingState.getDialects();
812 dialectEmitter.emitVarInt(llvm::size(dialects), "dialects count");
813 for (DialectNumbering &dialect : dialects) {
814 // Write the string section and get the ID.
815 size_t nameID = stringSection.insert(dialect.name);
816
817 if (config.bytecodeVersion < bytecode::kDialectVersioning) {
818 dialectEmitter.emitVarInt(nameID, "dialect name ID");
819 continue;
820 }
821
822 // Try writing the version to the versionEmitter.
823 EncodingEmitter versionEmitter;
824 if (dialect.interface) {
825 // The writer used when emitting using a custom bytecode encoding.
826 DialectWriter versionWriter(config.bytecodeVersion, versionEmitter,
827 numberingState, stringSection,
828 config.dialectVersionMap);
829 dialect.interface->writeVersion(versionWriter);
830 }
831
832 // If the version emitter is empty, version is not available. We can encode
833 // this in the dialect ID, so if there is no version, we don't write the
834 // section.
835 size_t versionAvailable = versionEmitter.size() > 0;
836 dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable,
837 "dialect version");
838 if (versionAvailable)
839 dialectEmitter.emitSection(bytecode::Section::kDialectVersions,
840 std::move(versionEmitter));
841 }
842
843 if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation)
844 dialectEmitter.emitVarInt(size(numberingState.getOpNames()),
845 "op names count");
846
847 // Emit the referenced operation names grouped by dialect.
848 auto emitOpName = [&](OpNameNumbering &name) {
849 size_t stringId = stringSection.insert(name.name.stripDialect());
850 if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding)
851 dialectEmitter.emitVarInt(stringId, "dialect op name");
852 else
853 dialectEmitter.emitVarIntWithFlag(stringId, name.name.isRegistered(),
854 "dialect op name");
855 };
856 writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
857
858 emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter));
859}
860
861//===----------------------------------------------------------------------===//
862// Attributes and Types
863//===----------------------------------------------------------------------===//
864
865void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
866 EncodingEmitter attrTypeEmitter;
867 EncodingEmitter offsetEmitter;
868 offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()),
869 "attributes count");
870 offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()),
871 "types count");
872
873 // A functor used to emit an attribute or type entry.
874 uint64_t prevOffset = 0;
875 auto emitAttrOrType = [&](auto &entry) {
876 auto entryValue = entry.getValue();
877
878 auto emitAttrOrTypeRawImpl = [&]() -> void {
879 RawEmitterOstream(attrTypeEmitter) << entryValue;
880 attrTypeEmitter.emitByte(0, "attr/type separator");
881 };
882 auto emitAttrOrTypeImpl = [&]() -> bool {
883 // TODO: We don't currently support custom encoded mutable types and
884 // attributes.
885 if (entryValue.template hasTrait<TypeTrait::IsMutable>() ||
886 entryValue.template hasTrait<AttributeTrait::IsMutable>()) {
887 emitAttrOrTypeRawImpl();
888 return false;
889 }
890
891 DialectWriter dialectWriter(config.bytecodeVersion, attrTypeEmitter,
892 numberingState, stringSection,
893 config.dialectVersionMap);
894 if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
895 for (const auto &callback : config.typeWriterCallbacks) {
896 if (succeeded(callback->write(entryValue, dialectWriter)))
897 return true;
898 }
899 if (const BytecodeDialectInterface *interface =
900 entry.dialect->interface) {
901 if (succeeded(interface->writeType(entryValue, dialectWriter)))
902 return true;
903 }
904 } else {
905 for (const auto &callback : config.attributeWriterCallbacks) {
906 if (succeeded(callback->write(entryValue, dialectWriter)))
907 return true;
908 }
909 if (const BytecodeDialectInterface *interface =
910 entry.dialect->interface) {
911 if (succeeded(interface->writeAttribute(entryValue, dialectWriter)))
912 return true;
913 }
914 }
915
916 // If the entry was not emitted using a callback or a dialect interface,
917 // emit it using the textual format.
918 emitAttrOrTypeRawImpl();
919 return false;
920 };
921
922 bool hasCustomEncoding = emitAttrOrTypeImpl();
923
924 // Record the offset of this entry.
925 uint64_t curOffset = attrTypeEmitter.size();
926 offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding,
927 "attr/type offset");
928 prevOffset = curOffset;
929 };
930
931 // Emit the attribute and type entries for each dialect.
932 writeDialectGrouping(offsetEmitter, numberingState.getAttributes(),
933 emitAttrOrType);
934 writeDialectGrouping(offsetEmitter, numberingState.getTypes(),
935 emitAttrOrType);
936
937 // Emit the sections to the stream.
938 emitter.emitSection(bytecode::Section::kAttrTypeOffset,
939 std::move(offsetEmitter));
940 emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter));
941}
942
943//===----------------------------------------------------------------------===//
944// Operations
945//===----------------------------------------------------------------------===//
946
947LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter,
948 Block *block) {
950 bool hasArgs = !args.empty();
951
952 // Emit the number of operations in this block, and if it has arguments. We
953 // use the low bit of the operation count to indicate if the block has
954 // arguments.
955 unsigned numOps = numberingState.getOperationCount(block);
956 emitter.emitVarIntWithFlag(numOps, hasArgs, "block num ops");
957
958 // Emit the arguments of the block.
959 if (hasArgs) {
960 emitter.emitVarInt(args.size(), "block args count");
961 for (BlockArgument arg : args) {
962 Location argLoc = arg.getLoc();
963 if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) {
964 emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()),
965 !isa<UnknownLoc>(argLoc), "block arg type");
966 if (!isa<UnknownLoc>(argLoc))
967 emitter.emitVarInt(numberingState.getNumber(argLoc),
968 "block arg location");
969 } else {
970 emitter.emitVarInt(numberingState.getNumber(arg.getType()),
971 "block arg type");
972 emitter.emitVarInt(numberingState.getNumber(argLoc),
973 "block arg location");
974 }
975 }
976 if (config.bytecodeVersion >= bytecode::kUseListOrdering) {
977 uint64_t maskOffset = emitter.size();
978 uint8_t encodingMask = 0;
979 emitter.emitByte(0, "use-list separator");
980 writeUseListOrders(emitter, encodingMask, args);
981 if (encodingMask)
982 emitter.patchByte(maskOffset, encodingMask, "block patch encoding");
983 }
984 }
985
986 // Emit the operations within the block.
987 for (Operation &op : *block)
988 if (failed(writeOp(emitter, &op)))
989 return failure();
990 return success();
991}
992
993LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
994 emitter.emitVarInt(numberingState.getNumber(op->getName()), "op name ID");
995
996 // Emit a mask for the operation components. We need to fill this in later
997 // (when we actually know what needs to be emitted), so emit a placeholder for
998 // now.
999 uint64_t maskOffset = emitter.size();
1000 uint8_t opEncodingMask = 0;
1001 emitter.emitByte(0, "op separator");
1002
1003 // Emit the location for this operation.
1004 emitter.emitVarInt(numberingState.getNumber(op->getLoc()), "op location");
1005
1006 // Emit the attributes of this operation.
1007 DictionaryAttr attrs = op->getDiscardableAttrDictionary();
1008 // Allow deployment to version <kNativePropertiesEncoding by merging inherent
1009 // attribute with the discardable ones. We should fail if there are any
1010 // conflicts. When properties are not used by the op, also store everything as
1011 // attributes.
1012 if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding ||
1013 !op->getPropertiesStorage()) {
1014 attrs = op->getAttrDictionary();
1015 }
1016 if (!attrs.empty()) {
1017 opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
1018 emitter.emitVarInt(numberingState.getNumber(attrs), "op attrs count");
1019 }
1020
1021 // Emit the properties of this operation, for now we still support deployment
1022 // to version <kNativePropertiesEncoding.
1023 if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding) {
1024 std::optional<ssize_t> propertiesId = propertiesSection.emit(op);
1025 if (propertiesId.has_value()) {
1027 emitter.emitVarInt(*propertiesId, "op properties ID");
1028 }
1029 }
1030
1031 // Emit the result types of the operation.
1032 if (unsigned numResults = op->getNumResults()) {
1033 opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
1034 emitter.emitVarInt(numResults, "op results count");
1035 for (Type type : op->getResultTypes())
1036 emitter.emitVarInt(numberingState.getNumber(type), "op result type");
1037 }
1038
1039 // Emit the operands of the operation.
1040 if (unsigned numOperands = op->getNumOperands()) {
1042 emitter.emitVarInt(numOperands, "op operands count");
1043 for (Value operand : op->getOperands())
1044 emitter.emitVarInt(numberingState.getNumber(operand), "op operand types");
1045 }
1046
1047 // Emit the successors of the operation.
1048 if (unsigned numSuccessors = op->getNumSuccessors()) {
1050 emitter.emitVarInt(numSuccessors, "op successors count");
1051 for (Block *successor : op->getSuccessors())
1052 emitter.emitVarInt(numberingState.getNumber(successor), "op successor");
1053 }
1054
1055 // Emit the use-list orders to bytecode, so we can reconstruct the same order
1056 // at parsing.
1057 if (config.bytecodeVersion >= bytecode::kUseListOrdering)
1058 writeUseListOrders(emitter, opEncodingMask, ValueRange(op->getResults()));
1059
1060 // Check for regions.
1061 unsigned numRegions = op->getNumRegions();
1062 if (numRegions)
1064
1065 // Update the mask for the operation.
1066 emitter.patchByte(maskOffset, opEncodingMask, "op encoding mask");
1067
1068 // With the mask emitted, we can now emit the regions of the operation. We do
1069 // this after mask emission to avoid offset complications that may arise by
1070 // emitting the regions first (e.g. if the regions are huge, backpatching the
1071 // op encoding mask is more annoying).
1072 if (numRegions) {
1073 bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op);
1074 emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove,
1075 "op regions count");
1076
1077 // If the region is not isolated from above, or we are emitting bytecode
1078 // targeting version <kLazyLoading, we don't use a section.
1079 if (isIsolatedFromAbove &&
1080 config.bytecodeVersion >= bytecode::kLazyLoading) {
1081 EncodingEmitter regionEmitter;
1082 if (failed(writeRegions(regionEmitter, op->getRegions())))
1083 return failure();
1084 emitter.emitSection(bytecode::Section::kIR, std::move(regionEmitter));
1085
1086 } else if (failed(writeRegions(emitter, op->getRegions()))) {
1087 return failure();
1088 }
1089 }
1090 return success();
1091}
1092
1093void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter,
1094 uint8_t &opEncodingMask,
1095 ValueRange range) {
1096 // Loop over the results and store the use-list order per result index.
1098 for (auto item : llvm::enumerate(range)) {
1099 auto value = item.value();
1100 // No need to store a custom use-list order if the result does not have
1101 // multiple uses.
1102 if (value.use_empty() || value.hasOneUse())
1103 continue;
1104
1105 // For each result, assemble the list of pairs (use-list-index,
1106 // global-value-index). While doing so, detect if the global-value-index is
1107 // already ordered with respect to the use-list-index.
1108 bool alreadyOrdered = true;
1109 auto &firstUse = *value.use_begin();
1110 uint64_t prevID = bytecode::getUseID(
1111 firstUse, numberingState.getNumber(firstUse.getOwner()));
1112 llvm::SmallVector<std::pair<unsigned, uint64_t>> useListPairs(
1113 {{0, prevID}});
1114
1115 for (auto use : llvm::drop_begin(llvm::enumerate(value.getUses()))) {
1116 uint64_t currentID = bytecode::getUseID(
1117 use.value(), numberingState.getNumber(use.value().getOwner()));
1118 // The use-list order achieved when building the IR at parsing always
1119 // pushes new uses on front. Hence, if the order by unique ID is
1120 // monotonically decreasing, a roundtrip to bytecode preserves such order.
1121 alreadyOrdered &= (prevID > currentID);
1122 useListPairs.push_back({use.index(), currentID});
1123 prevID = currentID;
1124 }
1125
1126 // Do not emit if the order is already sorted.
1127 if (alreadyOrdered)
1128 continue;
1129
1130 // Sort the use indices by the unique ID indices in descending order.
1131 std::sort(
1132 useListPairs.begin(), useListPairs.end(),
1133 [](auto elem1, auto elem2) { return elem1.second > elem2.second; });
1134
1135 map.try_emplace(item.index(), llvm::map_range(useListPairs, [](auto elem) {
1136 return elem.first;
1137 }));
1138 }
1139
1140 if (map.empty())
1141 return;
1142
1144 // Emit the number of results that have a custom use-list order if the number
1145 // of results is greater than one.
1146 if (range.size() != 1) {
1147 emitter.emitVarInt(map.size(), "custom use-list size");
1148 }
1149
1150 for (const auto &item : map) {
1151 auto resultIdx = item.getFirst();
1152 auto useListOrder = item.getSecond();
1153
1154 // Compute the number of uses that are actually shuffled. If those are less
1155 // than half of the total uses, encoding the index pair `(src, dst)` is more
1156 // space efficient.
1157 size_t shuffledElements =
1158 llvm::count_if(llvm::enumerate(useListOrder),
1159 [](auto item) { return item.index() != item.value(); });
1160 bool indexPairEncoding = shuffledElements < (useListOrder.size() / 2);
1161
1162 // For single result, we don't need to store the result index.
1163 if (range.size() != 1)
1164 emitter.emitVarInt(resultIdx, "use-list result index");
1165
1166 if (indexPairEncoding) {
1167 emitter.emitVarIntWithFlag(shuffledElements * 2, indexPairEncoding,
1168 "use-list index pair size");
1169 for (auto pair : llvm::enumerate(useListOrder)) {
1170 if (pair.index() != pair.value()) {
1171 emitter.emitVarInt(pair.value(), "use-list index pair first");
1172 emitter.emitVarInt(pair.index(), "use-list index pair second");
1173 }
1174 }
1175 } else {
1176 emitter.emitVarIntWithFlag(useListOrder.size(), indexPairEncoding,
1177 "use-list size");
1178 for (const auto &index : useListOrder)
1179 emitter.emitVarInt(index, "use-list order");
1180 }
1181 }
1182}
1183
1184LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter,
1185 Region *region) {
1186 // If the region is empty, we only need to emit the number of blocks (which is
1187 // zero).
1188 if (region->empty()) {
1189 emitter.emitVarInt(/*numBlocks*/ 0, "region block count empty");
1190 return success();
1191 }
1192
1193 // Emit the number of blocks and values within the region.
1194 unsigned numBlocks, numValues;
1195 std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
1196 emitter.emitVarInt(numBlocks, "region block count");
1197 emitter.emitVarInt(numValues, "region value count");
1198
1199 // Emit the blocks within the region.
1200 for (Block &block : *region)
1201 if (failed(writeBlock(emitter, &block)))
1202 return failure();
1203 return success();
1204}
1205
1206LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter,
1207 Operation *op) {
1208 EncodingEmitter irEmitter;
1209
1210 // Write the IR section the same way as a block with no arguments. Note that
1211 // the low-bit of the operation count for a block is used to indicate if the
1212 // block has arguments, which in this case is always false.
1213 irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false, "ir section");
1214
1215 // Emit the operations.
1216 if (failed(writeOp(irEmitter, op)))
1217 return failure();
1218
1219 emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter));
1220 return success();
1221}
1222
1223//===----------------------------------------------------------------------===//
1224// Resources
1225//===----------------------------------------------------------------------===//
1226
1227namespace {
1228/// This class represents a resource builder implementation for the MLIR
1229/// bytecode format.
1230class ResourceBuilder : public AsmResourceBuilder {
1231public:
1232 using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
1233
1234 ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
1235 PostProcessFn postProcessFn, bool shouldElideData)
1236 : emitter(emitter), stringSection(stringSection),
1237 postProcessFn(postProcessFn), shouldElideData(shouldElideData) {}
1238 ~ResourceBuilder() override = default;
1239
1240 void buildBlob(StringRef key, ArrayRef<char> data,
1241 uint32_t dataAlignment) final {
1242 if (!shouldElideData)
1243 emitter.emitOwnedBlobAndAlignment(data, dataAlignment, "resource blob");
1244 postProcessFn(key, AsmResourceEntryKind::Blob);
1245 }
1246 void buildBool(StringRef key, bool data) final {
1247 if (!shouldElideData)
1248 emitter.emitByte(data, "resource bool");
1249 postProcessFn(key, AsmResourceEntryKind::Bool);
1250 }
1251 void buildString(StringRef key, StringRef data) final {
1252 if (!shouldElideData)
1253 emitter.emitVarInt(stringSection.insert(data), "resource string");
1254 postProcessFn(key, AsmResourceEntryKind::String);
1255 }
1256
1257private:
1258 EncodingEmitter &emitter;
1259 StringSectionBuilder &stringSection;
1260 PostProcessFn postProcessFn;
1261 bool shouldElideData = false;
1262};
1263} // namespace
1264
1265void BytecodeWriter::writeResourceSection(Operation *op,
1266 EncodingEmitter &emitter) {
1267 EncodingEmitter resourceEmitter;
1268 EncodingEmitter resourceOffsetEmitter;
1269 uint64_t prevOffset = 0;
1271 curResourceEntries;
1272
1273 // Functor used to process the offset for a resource of `kind` defined by
1274 // 'key'.
1275 auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
1276 uint64_t curOffset = resourceEmitter.size();
1277 curResourceEntries.emplace_back(key, kind, curOffset - prevOffset);
1278 prevOffset = curOffset;
1279 };
1280
1281 // Functor used to emit a resource group defined by 'key'.
1282 auto emitResourceGroup = [&](uint64_t key) {
1283 resourceOffsetEmitter.emitVarInt(key, "resource group key");
1284 resourceOffsetEmitter.emitVarInt(curResourceEntries.size(),
1285 "resource group size");
1286 for (auto [key, kind, size] : curResourceEntries) {
1287 resourceOffsetEmitter.emitVarInt(stringSection.insert(key),
1288 "resource key");
1289 resourceOffsetEmitter.emitVarInt(size, "resource size");
1290 resourceOffsetEmitter.emitByte(kind, "resource kind");
1291 }
1292 };
1293
1294 // Builder used to emit resources.
1295 ResourceBuilder entryBuilder(resourceEmitter, stringSection,
1296 appendResourceOffset,
1297 config.shouldElideResourceData);
1298
1299 // Emit the external resource entries.
1300 resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size(),
1301 "external resource printer count");
1302 for (const auto &printer : config.externalResourcePrinters) {
1303 curResourceEntries.clear();
1304 printer->buildResources(op, entryBuilder);
1305 emitResourceGroup(stringSection.insert(printer->getName()));
1306 }
1307
1308 // Emit the dialect resource entries.
1309 for (DialectNumbering &dialect : numberingState.getDialects()) {
1310 if (!dialect.asmInterface)
1311 continue;
1312 curResourceEntries.clear();
1313 dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder);
1314
1315 // Emit the declaration resources for this dialect, these didn't get emitted
1316 // by the interface. These resources don't have data attached, so just use a
1317 // "blob" kind as a placeholder.
1318 for (const auto &resource : dialect.resourceMap)
1319 if (resource.second->isDeclaration)
1320 appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
1321
1322 // Emit the resource group for this dialect.
1323 if (!curResourceEntries.empty())
1324 emitResourceGroup(dialect.number);
1325 }
1326
1327 // If we didn't emit any resource groups, elide the resource sections.
1328 if (resourceOffsetEmitter.size() == 0)
1329 return;
1330
1331 emitter.emitSection(bytecode::Section::kResourceOffset,
1332 std::move(resourceOffsetEmitter));
1333 emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter));
1334}
1335
1336//===----------------------------------------------------------------------===//
1337// Strings
1338//===----------------------------------------------------------------------===//
1339
1340void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
1341 EncodingEmitter stringEmitter;
1342 stringSection.write(stringEmitter);
1343 emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter));
1344}
1345
1346//===----------------------------------------------------------------------===//
1347// Properties
1348//===----------------------------------------------------------------------===//
1349
1350void BytecodeWriter::writePropertiesSection(EncodingEmitter &emitter) {
1351 EncodingEmitter propertiesEmitter;
1352 propertiesSection.write(propertiesEmitter);
1353 emitter.emitSection(bytecode::Section::kProperties,
1354 std::move(propertiesEmitter));
1355}
1356
1357//===----------------------------------------------------------------------===//
1358// Entry Points
1359//===----------------------------------------------------------------------===//
1360
1363 BytecodeWriter writer(op, config);
1364 return writer.write(op, os);
1365}
return success()
static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries, EntryCallbackT &&callback)
Write the given entries in contiguous groups with the same parent dialect.
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
static LogicalResult emit(SolverOp solver, const SMTEmissionOptions &options, mlir::raw_indented_ostream &stream)
Emit the SMT operations in the given 'solver' to the 'stream'.
This class is used to build resource entries for use by the printer.
Definition AsmState.h:247
A class to interact with the attributes and types printer when emitting MLIR bytecode.
This class represents an argument of a Block.
Definition Value.h:309
Block represents an ordered list of Operations.
Definition Block.h:33
BlockArgListType getArguments()
Definition Block.h:97
This class contains the configuration used for the bytecode writer.
llvm::StringMap< std::unique_ptr< DialectVersion > > & getDialectVersionMap() const
A map containing the dialect versions to emit.
void setElideResourceDataFlag(bool shouldElideResourceData=true)
Set a boolean flag to skip emission of resources into the bytecode file.
BytecodeWriterConfig(StringRef producer="MLIR" LLVM_VERSION_STRING)
producer is an optional string that can be used to identify the producer of the bytecode when reading...
void attachFallbackResourcePrinter(FallbackAsmResourceMap &map)
Attach resource printers to the AsmState for the fallback resources in the given map.
void attachTypeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Type > > callback)
int64_t getDesiredBytecodeVersion() const
Get the set desired bytecode version to emit.
void setDialectVersion(std::unique_ptr< DialectVersion > dialectVersion) const
Set a given dialect version to emit on the map.
void attachAttributeCallback(std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > callback)
Attach a custom bytecode printer callback to the configuration for the emission of custom type/attrib...
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > getTypeWriterCallbacks() const
ArrayRef< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > getAttributeWriterCallbacks() const
Retrieve the callbacks.
void setDesiredBytecodeVersion(int64_t bytecodeVersion)
Set the desired bytecode version to emit.
void attachResourcePrinter(std::unique_ptr< AsmResourcePrinter > printer)
Attach the given resource printer to the writer configuration.
This class defines a virtual interface for writing to a bytecode stream, providing hooks into the byt...
A fallback map containing external resources not explicitly handled by another parser/printer.
Definition AsmState.h:421
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
DictionaryAttr getAttrDictionary()
Return all of the attributes on this operation as a DictionaryAttr.
unsigned getNumSuccessors()
Definition Operation.h:706
bool isRegistered()
Returns true if this operation has a registered operation description, otherwise false.
Definition Operation.h:129
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition Operation.h:674
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
unsigned getNumOperands()
Definition Operation.h:346
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119
DictionaryAttr getDiscardableAttrDictionary()
Return all of the discardable attributes on this operation as a DictionaryAttr.
Definition Operation.h:501
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
Definition Operation.h:677
result_type_range getResultTypes()
Definition Operation.h:428
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:378
SuccessorRange getSuccessors()
Definition Operation.h:703
result_range getResults()
Definition Operation.h:415
int getPropertiesStorageSize() const
Returns the properties storage size.
Definition Operation.h:896
OpaqueProperties getPropertiesStorage()
Returns the properties storage.
Definition Operation.h:900
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:404
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
bool empty()
Definition Region.h:60
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
@ kAttrType
This section contains the attributes and types referenced within an IR module.
Definition Encoding.h:73
@ kAttrTypeOffset
This section contains the offsets for the attribute and types within the AttrType section.
Definition Encoding.h:77
@ kIR
This section contains the list of operations serialized into the bytecode, and their nested regions/o...
Definition Encoding.h:81
@ kResource
This section contains the resources of the bytecode.
Definition Encoding.h:84
@ kResourceOffset
This section contains the offsets of resources within the Resource section.
Definition Encoding.h:88
@ kDialect
This section contains the dialects referenced within an IR module.
Definition Encoding.h:69
@ kString
This section contains strings referenced within the bytecode.
Definition Encoding.h:66
@ kDialectVersions
This section contains the versions of each dialect.
Definition Encoding.h:91
@ kProperties
This section contains the properties for the operations.
Definition Encoding.h:94
static uint64_t getUseID(OperandT &val, unsigned ownerID)
Get the unique ID of a value use.
Definition Encoding.h:127
@ kUseListOrdering
Use-list ordering started to be encoded in version 3.
Definition Encoding.h:38
@ kAlignmentByte
An arbitrary value used to fill alignment padding.
Definition Encoding.h:56
@ kVersion
The current bytecode version.
Definition Encoding.h:53
@ kLazyLoading
Support for lazy-loading of isolated region was added in version 2.
Definition Encoding.h:35
@ kDialectVersioning
Dialects versioning was added in version 1.
Definition Encoding.h:32
@ kElideUnknownBlockArgLocation
Avoid recording unknown locations on block arguments (compression) started in version 4.
Definition Encoding.h:42
@ kNativePropertiesEncoding
Support for encoding properties natively in bytecode instead of merged with the discardable attribute...
Definition Encoding.h:46
@ kMinSupportedVersion
The minimum supported version of the bytecode.
Definition Encoding.h:29
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:717
Include the generated interface declarations.
const FrozenRewritePatternSet GreedyRewriteConfig config
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:118
AsmResourceEntryKind
This enum represents the different kinds of resource values.
Definition AsmState.h:280
@ Blob
A blob of data with an accompanying alignment.
Definition AsmState.h:282
LogicalResult writeBytecodeToFile(Operation *op, raw_ostream &os, const BytecodeWriterConfig &config={})
Write the bytecode for the given operation to the provided output stream.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:144
StringRef producer
The producer of the bytecode.
llvm::StringMap< std::unique_ptr< DialectVersion > > dialectVersionMap
A map containing dialect version information for each dialect to emit.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Attribute > > > attributeWriterCallbacks
Printer callbacks used to emit custom type and attribute encodings.
SmallVector< std::unique_ptr< AsmResourcePrinter > > externalResourcePrinters
A collection of non-dialect resource printers.
llvm::SmallVector< std::unique_ptr< AttrTypeBytecodeWriter< Type > > > typeWriterCallbacks
int64_t bytecodeVersion
Version to use when writing.
bool shouldElideResourceData
A flag specifying whether to elide emission of resources into the bytecode file.
This class represents a numbering entry for an Dialect.
unsigned number
The number assigned to the dialect.