MLIR 23.0.0git
XeGPUBlocking.cpp
Go to the documentation of this file.
1//===---- XeGPUBlocking.cpp ---- XeGPU Blocking Pass ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/Support/DebugLog.h"
26
27namespace mlir {
28namespace xegpu {
29#define GEN_PASS_DEF_XEGPUBLOCKING
30#include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
31} // namespace xegpu
32} // namespace mlir
33
34#define DEBUG_TYPE "xegpu-blocking"
35
36using namespace mlir;
37
38namespace {
39
40//===------------------------------------------------------------------------===//
41// The XeGPUBlockingPass leverages the unroll patterns for XeGPU and Vector ops
42// to partition operations that process large shapes into multiple operations on
43// smaller shapes, as specified by the inst_data in the layout attribute. This
44// enables each resulting operation to be efficiently mapped to a hardware
45// instruction.
46//===------------------------------------------------------------------------===//
47
48class XeGPUBlockingPass final
49 : public xegpu::impl::XeGPUBlockingBase<XeGPUBlockingPass> {
50public:
51 void runOnOperation() override;
52
53private:
54 // Get the tile shape for a given OpOperand or OpResult by examining the
55 // corresponding layout attribute. If layout is not present or is not a
56 // subgroup level layout, it returns std::nullopt.
57 template <typename T,
58 typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
59 std::is_same_v<T, OpResult>>>
60 std::optional<SmallVector<int64_t>>
61 getTileShape(const T &operandOrResult) const;
62
63 // Get the tile shape for a given operation.
64 std::optional<SmallVector<int64_t>> getTileShape(Operation *op) const;
65
66 // Determine if the operation requires unrolling. Return false if all operands
67 // and results have tile shapes identical to their original types. Otherwise,
68 // return true.
69 bool needsUnroll(Operation *op) const;
70};
71} // namespace
72
73template <typename T, typename>
74std::optional<SmallVector<int64_t>>
75XeGPUBlockingPass::getTileShape(const T &operandOrResult) const {
76 Value value;
77 if constexpr (std::is_same_v<T, OpOperand>) {
78 value = operandOrResult.get();
79 } else {
80 value = (Value)operandOrResult;
81 }
82
83 xegpu::DistributeLayoutAttr layout =
84 xegpu::getDistributeLayoutAttr(operandOrResult);
85 if (layout && layout.isForSubgroup()) {
86 if (!layout.getEffectiveInstDataAsInt().empty()) {
87 SmallVector<int64_t> instData = layout.getEffectiveInstDataAsInt();
88 return instData;
89 }
90 if (auto type = dyn_cast<ShapedType>(value.getType()))
91 return llvm::to_vector(type.getShape());
92 }
93 LDBG() << "failed to getTileShape for: " << value;
94 return std::nullopt;
95}
96
97std::optional<SmallVector<int64_t>>
98XeGPUBlockingPass::getTileShape(Operation *op) const {
99 if (isa<xegpu::CreateNdDescOp, xegpu::LoadMatrixOp>(op))
100 return getTileShape(op->getOpResult(0));
101 if (isa<xegpu::PrefetchNdOp, xegpu::LoadNdOp, xegpu::PrefetchOp,
102 xegpu::StoreMatrixOp>(op))
103 return getTileShape(op->getOpOperand(0));
104 if (isa<xegpu::StoreNdOp>(op))
105 return getTileShape(op->getOpOperand(1));
106
107 if (isa<xegpu::LoadGatherOp>(op))
108 return getTileShape(op->getOpResult(0));
109
110 if (auto convertLayoutOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
111 auto inputInstData =
112 convertLayoutOp.getInputLayout().getEffectiveInstDataAsInt();
113 auto targetInstData =
114 convertLayoutOp.getTargetLayout().getEffectiveInstDataAsInt();
115 // return the one with larger size
116 if (computeProduct(inputInstData) >= computeProduct(targetInstData))
117 return inputInstData;
118 else
119 return targetInstData;
120 }
121
122 if (isa<xegpu::StoreScatterOp>(op))
123 return getTileShape(op->getOpOperand(0));
124
125 // Helper lambda to validate and get A/B tiles
126 auto validateABTiles = [&](Operation *op)
127 -> std::optional<std::pair<SmallVector<int64_t>, SmallVector<int64_t>>> {
128 std::optional<SmallVector<int64_t>> aTile =
130 std::optional<SmallVector<int64_t>> bTile =
132
133 if (!aTile || aTile->size() < 2 || !bTile || bTile->size() < 2)
134 return std::nullopt;
135
136 // Both must have the same number of batch dimensions.
137 int64_t aBatchRank = aTile->size() - 2;
138 int64_t bBatchRank = bTile->size() - 2;
139 if (aBatchRank != bBatchRank)
140 return std::nullopt;
141
142 // Batch dimensions must match.
143 for (int64_t i = 0; i < aBatchRank; ++i) {
144 if ((*aTile)[i] != (*bTile)[i])
145 return std::nullopt;
146 }
147
148 // Semantic check for A and B: K dimension must match.
149 // A[..., M, K] x B[..., K, N]
150 if ((*aTile).back() != (*bTile)[bBatchRank])
151 return std::nullopt;
152
153 return std::make_pair(*aTile, *bTile);
154 };
155
156 // Helper lambda to validate C tile
157 auto validateCTile = [&](Operation *op, unsigned cOperandIdx,
158 const SmallVector<int64_t> &aTile,
159 const SmallVector<int64_t> &bTile) -> bool {
160 if (op->getNumOperands() <= cOperandIdx)
161 return true;
162
163 std::optional<SmallVector<int64_t>> cTile =
164 getTileShape(op->getOpOperand(cOperandIdx));
165 if (!cTile)
166 return false;
167 // Expected C tile: batch dims from A + [M, N]
168 int64_t aBatchRank = aTile.size() - 2;
169 SmallVector<int64_t> expectedCTile(aTile.begin(),
170 aTile.begin() + aBatchRank);
171 expectedCTile.push_back(aTile[aBatchRank]); // M from A
172 expectedCTile.push_back(bTile.back()); // N from B
173 if (!llvm::equal(*cTile, expectedCTile))
174 return false;
175 return true;
176 };
177
178 // Helper lambda to validate scale A tile for DpasMxOp
179 auto validateScaleATile =
180 [&](Operation *op, unsigned scaleAOperandIdx,
181 const SmallVector<int64_t> &aTile) -> std::optional<int64_t> {
182 std::optional<SmallVector<int64_t>> aScaleTile =
183 getTileShape(op->getOpOperand(scaleAOperandIdx));
184
185 if (!aScaleTile || aScaleTile->size() < 2)
186 return std::nullopt;
187
188 // Validate scale_a tile: [batch..., M_tile, K_scale]
189 // M dimension (second-to-last) must match A's M dimension
190 int64_t scaleRank = aScaleTile->size();
191 int64_t aBatchRank = aTile.size() - 2;
192 if ((*aScaleTile)[scaleRank - 2] != aTile[aBatchRank])
193 return std::nullopt;
194
195 // Return the K scale factor (last dim)
196 return aScaleTile->back();
197 };
198
199 // Helper lambda to validate scale B tile for DpasMxOp
200 auto validateScaleBTile =
201 [&](Operation *op, unsigned scaleBOperandIdx,
202 const SmallVector<int64_t> &bTile) -> std::optional<int64_t> {
203 std::optional<SmallVector<int64_t>> bScaleTile =
204 getTileShape(op->getOpOperand(scaleBOperandIdx));
205
206 if (!bScaleTile || bScaleTile->size() < 2)
207 return std::nullopt;
208
209 // Validate scale_b tile: [batch..., K_scale, N_tile]
210 // N dimension (last) must match B's N dimension (last)
211 if (bScaleTile->back() != bTile.back())
212 return std::nullopt;
213
214 // Return the K scale factor (second-to-last dim)
215 int64_t scaleRank = bScaleTile->size();
216 return (*bScaleTile)[scaleRank - 2];
217 };
218
219 if (isa<xegpu::DpasOp>(op)) {
220 auto abTiles = validateABTiles(op);
221 if (!abTiles)
222 return std::nullopt;
223
224 auto [aTile, bTile] = *abTiles;
225
226 // Semantic check for C.
227 if (!validateCTile(op, 2, aTile, bTile))
228 return std::nullopt;
229
230 // Return [batch..., M, K, N] as the target shape for unrolling.
231 int64_t aBatchRank = aTile.size() - 2;
232 SmallVector<int64_t> tileShape(aTile.begin(), aTile.begin() + aBatchRank);
233 tileShape.push_back(aTile[aBatchRank]); // M
234 tileShape.push_back(aTile[aBatchRank + 1]); // K
235 tileShape.push_back(bTile.back()); // N
236 return tileShape;
237 }
238
239 if (auto dpasMxOp = dyn_cast<xegpu::DpasMxOp>(op)) {
240 auto abTiles = validateABTiles(op);
241 if (!abTiles)
242 return std::nullopt;
243
244 auto [aTile, bTile] = *abTiles;
245
246 // Validate C tile if present using op-specific accessor
247 if (dpasMxOp.getAcc()) {
248 unsigned accOperandIdx = 2; // acc is the 3rd operand
249 if (!validateCTile(op, accOperandIdx, aTile, bTile))
250 return std::nullopt;
251 }
252
253 // Validate scale tiles if present using op-specific accessors
254 int64_t kScaleFactor = 1;
255 std::optional<int64_t> scaleAFactor;
256 std::optional<int64_t> scaleBFactor;
257
258 if (dpasMxOp.getScaleA()) {
259 unsigned scaleAOperandIdx = 2 + (dpasMxOp.getAcc() ? 1 : 0);
260 scaleAFactor = validateScaleATile(op, scaleAOperandIdx, aTile);
261 if (!scaleAFactor)
262 return std::nullopt;
263 }
264
265 if (dpasMxOp.getScaleB()) {
266 unsigned scaleBOperandIdx =
267 2 + (dpasMxOp.getAcc() ? 1 : 0) + (dpasMxOp.getScaleA() ? 1 : 0);
268 scaleBFactor = validateScaleBTile(op, scaleBOperandIdx, bTile);
269 if (!scaleBFactor)
270 return std::nullopt;
271 }
272
273 // If both scales are present, their K dimensions must match
274 if (scaleAFactor && scaleBFactor) {
275 if (*scaleAFactor != *scaleBFactor)
276 return std::nullopt;
277 kScaleFactor = *scaleAFactor;
278 } else if (scaleAFactor) {
279 kScaleFactor = *scaleAFactor;
280 } else if (scaleBFactor) {
281 kScaleFactor = *scaleBFactor;
282 }
283
284 // Return [batch..., M, K, N, S] as the target shape for unrolling.
285 int64_t aBatchRank = aTile.size() - 2;
286 SmallVector<int64_t> tileShape(aTile.begin(), aTile.begin() + aBatchRank);
287 tileShape.push_back(aTile[aBatchRank]); // M
288 tileShape.push_back(aTile[aBatchRank + 1]); // K
289 tileShape.push_back(bTile.back()); // N
290 tileShape.push_back(kScaleFactor); // S
291 return tileShape;
292 }
293
295 return getTileShape(op->getOpResult(0));
296
297 if (isa<vector::MultiDimReductionOp>(op))
298 return getTileShape(op->getOpOperand(0));
299
300 if (isa<vector::TransposeOp, vector::BroadcastOp, vector::StepOp,
301 vector::ShapeCastOp, vector::ConstantMaskOp, vector::CreateMaskOp,
302 vector::BitCastOp, vector::InterleaveOp, vector::DeinterleaveOp>(op))
303 return getTileShape(op->getOpResult(0));
304
305 return std::nullopt;
306}
307
308bool XeGPUBlockingPass::needsUnroll(Operation *op) const {
309 // skip the op if any of its operands or results has workgroup level layouts
310 bool hasWgLayoutOperands =
311 llvm::any_of(op->getOpOperands(), [](OpOperand &opr) {
312 xegpu::DistributeLayoutAttr layout =
313 xegpu::getDistributeLayoutAttr(opr);
314 return layout && layout.isForWorkgroup();
315 });
316 bool hasWgLayoutResults =
317 llvm::any_of(op->getOpResults(), [](OpResult result) {
318 xegpu::DistributeLayoutAttr layout =
319 xegpu::getDistributeLayoutAttr(result);
320 return layout && layout.isForWorkgroup();
321 });
322 if (hasWgLayoutOperands || hasWgLayoutResults) {
323 LDBG() << "skip unrolling for op with workgroup level layout: " << *op;
324 return false;
325 }
326
327 auto isUnrollable = [](Value value, ArrayRef<int64_t> tileShape) {
328 Type valTy = value.getType();
329 if (auto tdescTy = dyn_cast<xegpu::TensorDescType>(valTy)) {
330 xegpu::DistributeLayoutAttr layout = tdescTy.getLayoutAttr();
331 return layout && !layout.getEffectiveInstDataAsInt().empty();
332 }
333 auto shapedType = dyn_cast<ShapedType>(valTy);
334 return shapedType && !llvm::equal(tileShape, shapedType.getShape());
335 };
336
337 bool hasUnrollableOperands =
338 llvm::any_of(op->getOpOperands(), [&](OpOperand &opr) {
339 std::optional<SmallVector<int64_t>> tileShape = getTileShape(opr);
340 return tileShape.has_value() && isUnrollable(opr.get(), *tileShape);
341 });
342 bool hasUnrollableResults =
343 llvm::any_of(op->getOpResults(), [&](OpResult result) {
344 std::optional<SmallVector<int64_t>> tileShape = getTileShape(result);
345 return tileShape.has_value() && isUnrollable(result, *tileShape);
346 });
347 // ConvertLayoutOp must be processed to drop the inst_data in the layout
348 bool isConvertLayoutWithInstData = false;
349 if (auto convertLayoutOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
350 auto targettLayout = convertLayoutOp.getTargetLayout();
351 if (targettLayout && !targettLayout.getEffectiveInstDataAsInt().empty()) {
352 isConvertLayoutWithInstData = true;
353 }
354 }
355 return hasUnrollableOperands || hasUnrollableResults ||
356 isConvertLayoutWithInstData;
357}
358
359void XeGPUBlockingPass::runOnOperation() {
360 MLIRContext *ctx = &getContext();
361 Operation *op = getOperation();
362
364 signalPassFailure();
365 return;
366 }
367
368 auto getTileShapeAndCount = [](llvm::ArrayRef<int64_t> shape,
369 xegpu::DistributeLayoutAttr layout) {
370 int count = 1;
371 SmallVector<int64_t> tileShape(shape);
372 if (layout && !layout.getEffectiveInstDataAsInt().empty()) {
373 tileShape = layout.getEffectiveInstDataAsInt();
374 count = computeProduct(shape) / computeProduct(tileShape);
375 }
376 assert(count >= 1 && "count must be at least 1");
377 return std::make_pair(tileShape, count);
378 };
379
380 // Perform context-aware type conversion for SCF structural ops.
381 // Inspects Values to find inst_data layout information for 1:N conversion.
382 llvm::SmallSetVector<UnrealizedConversionCastOp, 8> existingCasts;
383 op->walk(
384 [&](UnrealizedConversionCastOp castOp) { existingCasts.insert(castOp); });
385
386 {
387 TypeConverter converter;
388 converter.addConversion([](Type type) -> Type { return type; });
389
390 // TensorDescType 1:N converter (type-based, layout is in the type).
391 converter.addConversion(
392 [&](xegpu::TensorDescType type,
393 SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {
394 Type elemTy = type.getElementType();
395 ArrayRef<int64_t> shape = type.getShape();
396
397 xegpu::DistributeLayoutAttr layout = type.getLayoutAttr();
398 if (layout && layout.isForWorkgroup())
399 return failure();
400
401 int count;
402 SmallVector<int64_t> subShape;
403 std::tie(subShape, count) = getTileShapeAndCount(shape, layout);
404
405 if (layout)
406 layout = layout.dropInstData();
407
408 auto newTy = xegpu::TensorDescType::get(
409 type.getContext(), subShape, elemTy, type.getEncoding(), layout);
410 result.append(count, newTy);
411 return success();
412 });
413
414 // Context-aware VectorType conversion based on inst_data (1:1
415 // shape-changing or 1:N).
416 auto getSubShapeAndCount = [&](VectorType vecTy,
417 xegpu::DistributeLayoutAttr layout)
418 -> std::pair<SmallVector<int64_t>, int> {
419 return getTileShapeAndCount(vecTy.getShape(), layout);
420 };
421 auto loopArgTypes =
422 xegpu::precomputeLoopBlockArgTypes(op, getSubShapeAndCount);
423 xegpu::addVectorTypeConversion(converter, getSubShapeAndCount,
424 std::move(loopArgTypes));
425
426 // Loop-carried types are now in the converter's map, so the transient
427 // per-position layout attrs on SCF loop ops are no longer needed. Strip
428 // them before converting: the SCF converters copy old attrs onto the new
429 // op (ConvertForOpTypes::setAttrs), and after 1:N result expansion a stale
430 // `layout_result_N` lands on the wrong (renumbered) result, corrupting the
431 // count invariant and leaving the loop illegal.
432 op->walk([](Operation *loopOp) {
433 if (!isa<scf::ForOp, scf::WhileOp, scf::ConditionOp>(loopOp))
434 return;
435 SmallVector<StringRef> toRemove;
436 for (const NamedAttribute &attr : loopOp->getAttrs()) {
437 StringRef name = attr.getName().strref();
438 if (name.starts_with("layout_operand_") ||
439 name.starts_with("layout_result_"))
440 toRemove.push_back(name);
441 }
442 for (StringRef name : toRemove)
443 loopOp->removeAttr(name);
444 });
445
446 // Source (N:1) and target (1:1) materializations using
447 // UnrealizedConversionCastOp.
448 auto materializeCast = [](OpBuilder &builder, Type type, ValueRange inputs,
449 Location loc) -> Value {
450 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
451 .getResult(0);
452 };
453 converter.addSourceMaterialization(materializeCast);
454 converter.addTargetMaterialization(materializeCast);
455 // Blocking runs SCF conversion separately (not combined with XeGPU
456 // patterns), so it also needs a 1:N target materialization.
457 converter.addTargetMaterialization(
458 [](mlir::OpBuilder &builder, mlir::TypeRange types,
459 mlir::ValueRange inputs, mlir::Location loc) -> SmallVector<Value> {
460 auto castOp =
461 UnrealizedConversionCastOp::create(builder, loc, types, inputs);
462 return SmallVector<Value>(castOp.getResults());
463 });
464
465 ConversionTarget target(*ctx);
466 target.addLegalOp<UnrealizedConversionCastOp>();
467 target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
468
469 RewritePatternSet scfPatterns(ctx);
471 target);
472 if (failed(applyPartialConversion(op, target, std::move(scfPatterns))))
473 return signalPassFailure();
474
475 // Fold cancelling cast chains and erase dead casts.
477 }
478
479 xegpu::UnrollOptions options;
480 options.setFilterConstraint(
481 [&](Operation *op) -> LogicalResult { return success(needsUnroll(op)); });
482
483 options.setNativeShapeFn([&](Operation *op) { return getTileShape(op); });
484
485 options.setUnrolledTypesFn([&](ShapedType type, ArrayRef<int64_t> tileShape) {
486 Type elemTy = type.getElementType();
487
488 if (auto tdescTy = dyn_cast<xegpu::TensorDescType>(type)) {
489
490 Attribute encoding = tdescTy.getEncoding();
491
492 xegpu::TensorDescType newTy =
493 xegpu::TensorDescType::get(ctx, tileShape, elemTy, encoding,
494 tdescTy.getLayoutAttr().dropInstData());
495 // Compute the product of batch (higher) dimensions.
496 ArrayRef<int64_t> shape = type.getShape();
497 int64_t batchCount =
498 shape.size() > 2 ? computeProduct(shape.drop_back(2)) : 1;
499 return SmallVector<Type>(batchCount, newTy);
500 }
501 Type newTy = VectorType::get(tileShape, elemTy);
502
503 std::optional<SmallVector<int64_t>> ratio =
504 computeShapeRatio(type.getShape(), tileShape);
505 assert(ratio && "The shape of the type must be a multiple of tileShape.");
506 return SmallVector<Type>(computeProduct(*ratio), newTy);
507 });
508
509 RewritePatternSet patterns(ctx);
510 vector::UnrollVectorOptions vectorOptions;
511 vectorOptions.setNativeShapeFn(options.nativeShape);
512
514 vector::populateVectorUnrollPatterns(patterns, vectorOptions);
515
516 // Note: The pattern driver does op folding as well and clean up.
517 // But intermediate insert/extract strided slice ops with
518 // unrealized conversion cast ops in the middle does not get
519 // cleaned up in this step. One more round of folding is needed
520 // after the walk to resolve those unrealized conversion cast ops.
521 (void)applyPatternsGreedily(op, std::move(patterns));
522
523 op->walk([](Operation *op) {
524 // Remove the layout attributes cached per operands.
525 for (OpOperand &opr : op->getOpOperands()) {
526 std::string name = xegpu::getTemporaryLayoutName(opr);
527 if (op->hasAttrOfType<xegpu::DistributeLayoutAttr>(name))
528 op->removeAttr(name);
529 }
530
531 // Update the layout attributes per result.
532 for (OpResult result : op->getOpResults()) {
533 std::string name = xegpu::getTemporaryLayoutName(result);
534 if (auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
535 op->removeAttr(name);
536 if (!isa<LoopLikeOpInterface>(op))
537 xegpu::setDistributeLayoutAttr(result, layout.dropInstData());
538 }
539 }
540
541 // Drop left-over inst_data if the unroll pattern does not being applied,
542 // say, inst_data just matches their shape.
543 SmallVector<NamedAttribute> newAttrs =
545 op->setAttrs(newAttrs);
546 });
547
548 // Resolve UnrealizedConversionCastOps generated by SCF structural type
549 // conversion and by XeGPU/Vector unrolling (cancelling cast chains and
550 // unpaired pack/unpack casts).
552
553 // One more round of folding to clean up the intermediate
554 // insert/extract strided slice ops.
555 RewritePatternSet emptyPatterns(ctx);
556 (void)applyPatternsGreedily(op, std::move(emptyPatterns));
557}
return success()
b getContext())
static std::array< int64_t, 2 > getTileShape(ArrayRef< int64_t > operandShape, Type elementType, int64_t lineSizeBits)
Returns the number of 8 x [128|256|512] bit tiles that compose the given operand shape.
Definition MMAUtils.cpp:37
static llvm::ManagedStatic< PassManagerOptions > options
Operation is the basic unit of execution within MLIR.
Definition Operation.h:87
OpResult getOpResult(unsigned idx)
Definition Operation.h:446
AttrClass getAttrOfType(StringAttr name)
Definition Operation.h:575
bool hasAttrOfType(NameT &&name)
Definition Operation.h:600
void setAttrs(DictionaryAttr newAttrs)
Set the attributes from a dictionary on this operation.
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition Operation.h:537
MutableArrayRef< OpOperand > getOpOperands()
Definition Operation.h:408
unsigned getNumOperands()
Definition Operation.h:371
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:822
result_range getOpResults()
Definition Operation.h:445
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
Definition Operation.h:625
OpOperand & getOpOperand(unsigned idx)
Definition Operation.h:413
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:429
Type getType() const
Return the type of this value.
Definition Value.h:105
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:717
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
void populateXeGPUUnrollPatterns(RewritePatternSet &patterns, const UnrollOptions &options)
Collect a set of patterns to unroll xegpu operations to a smaller shapes.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
SmallVector< NamedAttribute > dropInstDataOnAttrs(ArrayRef< NamedAttribute > attrs)
Updates the NamedAttribute sequence by dropping inst-data information from any DistributeLayoutAttr f...
bool recoverTemporaryLayouts(Operation *rootOp)
Attach layout attributes to all vector-type operands of operations within the given operation's neste...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
DenseMap< Value, SmallVector< Type > > precomputeLoopBlockArgTypes(Operation *topLevelOp, SubShapeAndCountFn getSubShapeAndCount)
Pre-computes distributed VectorType mappings for every value carried through an SCF loop under topLev...
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
void addVectorTypeConversion(TypeConverter &converter, SubShapeAndCountFn getSubShapeAndCount, DenseMap< Value, SmallVector< Type > > loopArgTypes)
Adds a context-aware VectorType conversion to converter (1:1 shape-changing or 1:N,...
void cleanupUnrealizedConversionCasts(Operation *root, const llvm::SmallSetVector< UnrealizedConversionCastOp, 8 > &existingCasts)
Cleans up UnrealizedConversionCastOps inserted during SCF structural type conversion and/or XeGPU unr...
Include the generated interface declarations.
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
int64_t computeProduct(ArrayRef< int64_t > basis)
Self-explicit.
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
UnrollVectorOptions & setNativeShapeFn(NativeShapeFnType fn)