MLIR 23.0.0git
XeGPUUtils.cpp
Go to the documentation of this file.
1//===---- XeGPUUtils.cpp - MLIR Utilities for XeGPUOps ------------------===//
2//
3// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utility methods for working with the XeGPU dialect.
10//
11//===----------------------------------------------------------------------===//
12
20#include "mlir/IR/Builders.h"
21#include "mlir/IR/Operation.h"
22#include "mlir/IR/ValueRange.h"
25#include "llvm/Support/Casting.h"
26#include "llvm/Support/FormatVariadic.h"
27#include <cstdint>
28#include <numeric>
29
30using namespace mlir;
31
32/// convert ArrayRef<ValueRange> into SmallVector<Value>
35 for (const auto &vals : values)
36 llvm::append_range(result, vals);
37 return result;
38}
39
40FailureOr<VectorType>
41mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) {
42 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
43 // It only works for subgroup level layout, which only has lane_layout
44 // and lane_data, and is to distribute a SIMD code into SIMT code.
45 if (!layout || !layout.isForSubgroup())
46 return failure();
47
48 SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
49 SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
50 auto tdescShape = tdescTy.getShape();
51 auto elementType = tdescTy.getElementType();
52
53 // compute sgSize by multiply elements of laneLayout
54 // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
55 // e.g. for 1D layout, sgSize = laneLayout[0]
56 int64_t sgSize = llvm::product_of(laneLayout);
57
58 // Case 1: regular loads/stores
59 auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
60 if (scatterAttr) {
61 auto chunkSize = scatterAttr.getChunkSize().getInt();
62 // Verify if the first dimension of the tensor descriptor shape is
63 // distributable.
64 assert(tdescShape[0] == laneLayout[0] &&
65 "tensor descriptor shape is not distributable");
66 return VectorType::get({chunkSize}, elementType);
67 }
68
69 // Case 2: block loads/stores
70 // Check if the tensor descriptor shape is distributable.
71 int64_t tensorSize = 1;
72 for (auto [tdescDim, laneDim, laneDataDim] :
73 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
74 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
75 "tensor descriptor shape is not distributable");
76 tensorSize *= tdescDim;
77 }
78 // tensorSize must be adjusted for array_length.
79 tensorSize *= tdescTy.getArrayLength();
80
81 return VectorType::get({tensorSize / sgSize}, elementType);
82}
83
84FailureOr<VectorType>
85mlir::xegpu::getDistributedVectorType(VectorType originalType,
86 xegpu::LayoutAttr layout) {
87 int64_t rank = originalType.getRank();
88 // Distributed vector type is only supported for 1D, 2D and 3D vectors.
89 if (rank < 1 || rank > 3)
90 return failure();
91 ArrayRef<int64_t> shape = originalType.getShape();
92 // arrayLength is 1 for 1D and 2D vectors, and equal to the first dimension
93 // of the 3D vector.
94 int arrayLength = 1;
95 if (rank == 3) {
96 arrayLength = shape[0];
97 shape = shape.drop_front();
98 }
99 auto helperTdescTy = xegpu::TensorDescType::get(
100 shape, originalType.getElementType(), arrayLength,
101 /*boundary_check=*/true,
102 /*memory_space=*/xegpu::MemorySpace::Global, layout);
103 return xegpu::getDistributedVectorType(helperTdescTy);
104}
105
106FailureOr<VectorType>
107xegpu::getDistVecTypeBasedOnLaneLayout(xegpu::DistributeLayoutAttr layout,
108 VectorType originalType) {
109 if (!layout)
110 return failure();
111 assert((isa<xegpu::LayoutAttr>(layout) || isa<xegpu::SliceAttr>(layout)) &&
112 "Expecting a valid layout.");
113 SmallVector<int64_t> effectiveLaneLayout =
114 layout.getEffectiveLaneLayoutAsInt();
115 assert(static_cast<size_t>(originalType.getRank()) >=
116 effectiveLaneLayout.size() &&
117 "Rank of the original vector type should be greater or equal to the "
118 "size of the lane layout to distribute the vector type.");
119 SmallVector<int64_t> distributedShape(originalType.getShape());
120 // Only distribute the last `laneLayout.size()` dimensions. The remaining
121 // dimensions are not distributed.
122 unsigned distributionStart =
123 originalType.getRank() - effectiveLaneLayout.size();
124 for (auto [i, dim] : llvm::enumerate(originalType.getShape())) {
125 if (i < distributionStart)
126 continue;
127 // Check if the dimension can be distributed evenly.
128 if (dim % effectiveLaneLayout[i - distributionStart] != 0)
129 return failure();
130 distributedShape[i] = dim / effectiveLaneLayout[i - distributionStart];
131 }
132 return VectorType::get(distributedShape, originalType.getElementType());
133}
134
135std::string xegpu::getTemporaryLayoutName(const OpOperand &operand) {
136 const StringRef prefix("layout_operand_");
137 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
138 return llvm::formatv("{0}{1}", prefix, idx).str();
139}
140
142 const StringRef prefix = "layout_result_";
143 return llvm::formatv("{0}{1}", prefix, result.getResultNumber()).str();
144}
145
146xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {
147 if (!value)
148 return nullptr;
149
150 if (auto tdescTy =
151 dyn_cast_if_present<xegpu::TensorDescType>(value.getType()))
152 return tdescTy.getLayoutAttr();
153
154 if (auto result = dyn_cast<OpResult>(value)) {
155 Operation *defOp = result.getDefiningOp();
156 assert(defOp && "result must have a defining op");
157
158 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
159 auto layout = anchorOp.getAnchorLayout();
160 return layout;
161 }
162
163 std::string layoutName = getTemporaryLayoutName(result);
164 if (defOp->hasAttr(layoutName)) {
165 auto layout =
166 defOp->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
167 return layout;
168 }
169 }
170
171 if (auto arg = dyn_cast<BlockArgument>(value)) {
172 auto *parentOp = arg.getOwner()->getParentOp();
173 if (auto loop = dyn_cast_if_present<LoopLikeOpInterface>(parentOp)) {
174 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
175 if (tiedInit)
176 return getDistributeLayoutAttr(tiedInit->get());
177 }
178 }
179
180 return nullptr;
181}
182xegpu::DistributeLayoutAttr
184 Operation *op = opr.getOwner();
185 unsigned idx = const_cast<OpOperand &>(opr).getOperandNumber();
186
187 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(op)) {
188 if (auto dpasOp = dyn_cast<xegpu::DpasOp>(op)) {
189 if (idx == 0) {
190 return dpasOp.getLayoutAAttr();
191 } else if (idx == 1) {
192 return dpasOp.getLayoutBAttr();
193 } else if (idx == 2) {
194 return dpasOp.getLayoutCdAttr();
195 }
196 }
197 if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
198 return convertOp.getInputLayoutAttr();
199 }
200 auto layout = anchorOp.getAnchorLayout();
201
202 if (idx == 0)
203 return layout;
204
205 // For store operations (StoreScatterOp, StoreNdOp, StoreMatrixOp),
206 // the layout is valid for the first two operands: value and memref/tdesc.
207 // For other operations, the layout applies to the first operand only.
208 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
209 op) &&
210 (idx < 2))
211 return layout;
212 }
213
214 std::string layoutName = xegpu::getTemporaryLayoutName(opr);
215 if (op->hasAttr(layoutName)) {
216 auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
217 return layout;
218 }
219
220 return nullptr;
221}
222
223// Returns the permanent layout attribute for the given result if it's
224// available on the defining op. Otherwise returns the provided layout.
225xegpu::DistributeLayoutAttr
226maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
227 const OpResult &result, mlir::Operation *owner,
228 const std::string &name) {
229 xegpu::DistributeLayoutAttr candidate = layout;
230
231 if (auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
232 if (auto perm = loadOp.getLayoutAttr())
233 candidate = perm;
234 }
235
236 return candidate;
237}
238
239// Returns the permanent layout attribute for the given operand if it's
240// available on the defining op. Otherwise returns the provided layout.
241xegpu::DistributeLayoutAttr
242maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
243 const OpOperand &operand, mlir::Operation *owner,
244 const std::string &name) {
245 xegpu::DistributeLayoutAttr candidate = layout;
246 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
247
248 if (auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
249 if (idx == 0) {
250 if (auto perm = storeOp.getLayoutAttr())
251 candidate = perm;
252 }
253 }
254
255 return candidate;
256}
257
258// TODO-LayoutRefactor: Remove this function after replacing use
259// with setTemporaryLayout or setAnchorLayout
261 const mlir::OpResult &result,
262 const mlir::xegpu::DistributeLayoutAttr layout) {
263 Operation *owner = result.getOwner();
264
265 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
266 if (anchorOp.getAnchorLayout() == layout)
267 return;
268 anchorOp.setAnchorLayout(layout);
269 return;
270 }
271
272 std::string name = xegpu::getTemporaryLayoutName(result);
273 if (owner->hasAttrOfType<DistributeLayoutAttr>(name)) {
274 return;
275 }
276 if (layout) {
277 owner->setAttr(name, layout);
278 }
279}
280
281// TODO-LayoutRefactor: Remove this function after replacing use
282// with setTemporaryLayout or setAnchorLayout
284 const DistributeLayoutAttr layout) {
285 Operation *owner = operand.getOwner();
286 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
287
288 if (!layout) {
289 return;
290 }
291 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
292 if (auto dpasOp = dyn_cast<xegpu::DpasOp>(owner)) {
293 if (idx == 0) {
294 return dpasOp.setLayoutAAttr(layout);
295 } else if (idx == 1) {
296 return dpasOp.setLayoutBAttr(layout);
297 } else if (idx == 2) {
298 return dpasOp.setLayoutCdAttr(layout);
299 }
300 }
301 if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(owner)) {
302 return convertOp.setInputLayoutAttr(layout);
303 }
304
305 // For store operations (StoreScatterOp, StoreNdOp, StoreMatrixOp),
306 // the layout is valid for the first two operands: value and memref/tdesc.
307 // For other operations, the layout applies to the first operand only.
308 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
309 owner)) {
310 if (idx < 2) {
311 anchorOp.setAnchorLayout(layout);
312 }
313 } else {
314 if (idx == 0) {
315 anchorOp.setAnchorLayout(layout);
316 }
317 }
318 }
319
320 std::string name = xegpu::getTemporaryLayoutName(operand);
321 if (owner->hasAttrOfType<DistributeLayoutAttr>(name)) {
322 return;
323 }
324 if (layout) {
325 owner->setAttr(name, layout);
326 }
327}
328
329template <typename T, typename>
330xegpu::DistributeLayoutAttr
331xegpu::getTemporaryLayout(const T &operandOrResult) {
332 Operation *op = operandOrResult.getOwner();
333
334 std::string layoutName = xegpu::getTemporaryLayoutName(operandOrResult);
335 if (op->hasAttr(layoutName)) {
336 auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
337 return layout;
338 }
339
340 return nullptr;
341}
342
343template xegpu::DistributeLayoutAttr
345template xegpu::DistributeLayoutAttr
347
348template <typename T, typename>
349void xegpu::setTemporaryLayout(const T &operandOrResult,
350 const xegpu::DistributeLayoutAttr layout) {
351 Operation *owner = operandOrResult.getOwner();
352 std::string name = xegpu::getTemporaryLayoutName(operandOrResult);
353 if (owner->hasAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
354 return;
355 }
356 if (layout) {
357 owner->setAttr(name, layout);
358 }
359}
360
362 const mlir::OpResult &result,
363 const mlir::xegpu::DistributeLayoutAttr layout);
364
366 const mlir::OpOperand &operand,
367 const mlir::xegpu::DistributeLayoutAttr layout);
368
370 op->walk([&](Operation *nestOp) {
371 for (OpOperand &opr : nestOp->getOpOperands()) {
372 auto layout = getDistributeLayoutAttr(opr.get());
373 setDistributeLayoutAttr(opr, layout);
374 }
375
376 for (OpResult result : nestOp->getOpResults()) {
377 auto layout = getDistributeLayoutAttr(result);
379 }
380 });
381}
382
383/// Attach layout attributes to all vector-type operands of operations within
384/// the given operation's region. Reports an error if any vector operand lacks
385/// a layout attribute.
387 auto result = rootOp->walk([&](Operation *op) {
388 for (OpOperand &operand : op->getOpOperands()) {
389 // Layouts are needed for vector type only.
390 if (!isa<VectorType>(operand.get().getType()))
391 continue;
392 auto layout = xegpu::getDistributeLayoutAttr(operand.get());
393 if (!layout) {
394 op->emitWarning("Could not find layout attribute for operand ")
395 << operand.getOperandNumber() << " of operation " << op->getName();
396 continue;
397 }
398 xegpu::setDistributeLayoutAttr(operand, layout);
399 }
400 return WalkResult::advance();
401 });
402 return !result.wasInterrupted();
403}
404
405template <typename T, typename>
406void xegpu::removeLayoutAttr(const T &operandOrResult) {
407 Operation *owner = operandOrResult.getOwner();
408 std::string name = xegpu::getTemporaryLayoutName(operandOrResult);
409 if (owner->hasAttrOfType<DistributeLayoutAttr>(name))
410 owner->removeAttr(name);
411}
412
416 out.reserve(attrs.size());
417
418 for (auto attr : attrs) {
419 if (auto dist = dyn_cast<xegpu::DistributeLayoutAttr>(attr.getValue())) {
420 auto newLayout = dist.dropSgLayoutAndData();
421 if (newLayout)
422 out.emplace_back(attr.getName(), newLayout);
423 } else {
424 out.push_back(attr);
425 }
426 }
427
428 return out;
429}
430
434 out.reserve(attrs.size());
435
436 for (auto attr : attrs) {
437 if (auto dist = dyn_cast<xegpu::DistributeLayoutAttr>(attr.getValue())) {
438 auto newLayout = dist.dropInstData();
439 if (newLayout)
440 out.emplace_back(attr.getName(), newLayout);
441 } else {
442 out.push_back(attr);
443 }
444 }
445
446 return out;
447}
448
449// Explicit instantiation for OpResult
450template void
452
453// Explicit instantiation for OpOperand
454template void
456
458 op->walk([&](Operation *nestOp) {
459 for (OpOperand &opr : nestOp->getOpOperands())
460 removeLayoutAttr(opr);
461 for (OpResult result : nestOp->getOpResults())
463 if (op->hasAttrOfType<DistributeLayoutAttr>("layout"))
464 op->removeAttr("layout");
465 if (op->hasAttrOfType<DistributeLayoutAttr>("layout_a"))
466 op->removeAttr("layout_a");
467 if (op->hasAttrOfType<DistributeLayoutAttr>("layout_b"))
468 op->removeAttr("layout_b");
469 if (op->hasAttrOfType<DistributeLayoutAttr>("layout_cd"))
470 op->removeAttr("layout_cd");
471 });
472}
473
477 auto vecTy = dyn_cast<VectorType>(value.getType());
478 if (!vecTy)
479 return {value};
480
481 ArrayRef<int64_t> srcShape = vecTy.getShape();
482 if (!computeShapeRatio(srcShape, shape))
483 return {value};
484
485 int64_t srcShapeRank = srcShape.size();
486 int64_t targetShapeRank = shape.size();
487
488 SmallVector<int64_t> adjustedTargetShape(srcShape.size());
489 int64_t rankDiff = srcShapeRank - targetShapeRank;
490 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
491 1);
492 llvm::copy(shape, adjustedTargetShape.begin() + rankDiff);
493
495 for (SmallVector<int64_t> offsets :
496 StaticTileOffsetRange(srcShape, adjustedTargetShape)) {
497 SmallVector<int64_t> staticStrides(offsets.size(), 1);
498 Value slice = vector::ExtractStridedSliceOp::create(
499 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
500
501 // Reshape to remove leading unit dims if needed
502 if (srcShapeRank > targetShapeRank) {
503 auto targetTy = VectorType::get(shape, vecTy.getElementType());
504 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
505 }
506 result.push_back(slice);
507 }
508
509 return result;
510}
511
513 ValueRange values,
515 VectorType inputTy = dyn_cast<VectorType>(values[0].getType());
516 assert(llvm::all_of(values.getTypes(),
517 [&](Type type) { return type == inputTy; }) &&
518 "values must be of the same VectorType");
519
520 Type elemTy = inputTy.getElementType();
521 ArrayRef<int64_t> tileShape = inputTy.getShape();
522
523 VectorType resultTy = VectorType::get(shape, elemTy);
524 auto zeroAttr = builder.getZeroAttr(elemTy);
525 Value result = arith::ConstantOp::create(
526 builder, loc, resultTy, DenseElementsAttr::get(resultTy, zeroAttr));
527
528 for (auto [src, offsets] :
529 llvm::zip_equal(values, StaticTileOffsetRange(shape, tileShape))) {
530 SmallVector<int64_t> staticStrides(tileShape.size(), 1);
531 result = vector::InsertStridedSliceOp::create(builder, loc, src, result,
532 offsets, staticStrides);
533 }
534 return result;
535}
536
538 Operation *op, TypeConverter converter) {
539 MLIRContext *context = op->getContext();
540
541 auto materializeCast = [](OpBuilder &builder, Type type, ValueRange inputs,
542 Location loc) -> Value {
543 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
544 .getResult(0);
545 };
546
547 { // convert VectorType to RankedTensorType for SCF Structural ops
548 TypeConverter converter;
549 converter.addConversion([](Type type) -> Type { return type; });
550 converter.addConversion([](VectorType type) -> Type {
551 return RankedTensorType::get(type.getShape(), type.getElementType());
552 });
553 converter.addSourceMaterialization(materializeCast);
554 converter.addTargetMaterialization(materializeCast);
555
556 mlir::ConversionTarget target(*context);
557 target.addLegalOp<UnrealizedConversionCastOp>();
558
561 target);
562 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
563 }
564
565 { // propagate the layout attribute to RankedTensorType by checking
566 // BuiltInUnrealizedCastOps
567 // for VectorType to RankedTensorType cast.
568 op->walk([](UnrealizedConversionCastOp castOp) {
569 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
570 return WalkResult::skip();
571
572 Value input = castOp.getInputs()[0];
573 Value result = castOp.getResults()[0];
574 auto inputTy = dyn_cast<VectorType>(input.getType());
575 auto resultTy = dyn_cast<RankedTensorType>(result.getType());
576
577 // Only look at ops casting from VectorType to RankedTensorType
578 if (!inputTy || !resultTy)
579 return WalkResult::skip();
580
581 xegpu::DistributeLayoutAttr layout =
583 if (!layout)
584 return WalkResult::skip();
585
586 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
587 result.setType(newTy);
588
589 // update the arguments if user is a LoopLike op.
590 for (OpOperand &use : result.getUses()) {
591 if (auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
592 BlockArgument arg = loop.getTiedLoopRegionIterArg(&use);
593 arg.setType(newTy);
594 }
595 // whileOp has two regions, the BlockArgument of the after region
596 // is not exposed by LoopLikeOpInterface
597 if (auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
598 unsigned idx = use.getOperandNumber();
599 BlockArgument arg = whileOp.getAfterArguments()[idx];
600 arg.setType(newTy);
601 }
602 }
603 return WalkResult::advance();
604 });
605
606 // using yieldOp as anchor to update the result type of its ParentOp
607 op->walk([](scf::YieldOp yieldOp) {
608 Operation *parentOp = yieldOp->getParentOp();
609 for (OpResult r : parentOp->getOpResults()) {
610 unsigned idx = r.getResultNumber();
611 Type resultTy = r.getType();
612 Type yieldTy = yieldOp.getResults()[idx].getType();
613 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
614 r.setType(yieldTy);
615 }
616 });
617 }
618
619 { // perform the conversion from RankedTensorType to VectorType based on the
620 // DistributeLayoutAttr
621
622 // Handle the UnrealizedConversionCastOp introduced by the first step.
623 // For vector->RankedTensorType, it will simply forward the inputs.
624 // For RankedTensorType->vector, it will update the inputs with the
625 // one from the adaptor.
626 class UnrealizedConversionCastOpPattern
627 : public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
628 using OpConversionPattern<
629 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
630
631 mlir::LogicalResult
632 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
633 OneToNOpAdaptor adaptor,
634 ConversionPatternRewriter &rewriter) const override {
635 auto inputs = op.getOperands();
636 auto outputs = op.getOutputs();
637
638 if (inputs.size() != 1 || outputs.size() != 1)
639 return failure();
640
641 auto inputTy = inputs[0].getType();
642 auto outputTy = outputs[0].getType();
643
644 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
645 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
646 return success();
647 }
648
649 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
650 SmallVector<Value> values = xegpu::flattenValues(adaptor.getInputs());
651 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
652 outputTy, values);
653 rewriter.replaceOp(op, newOp);
654 return success();
655 }
656 return failure();
657 }
658 };
659
660 converter.addSourceMaterialization(materializeCast);
661 converter.addTargetMaterialization([&](OpBuilder &builder, TypeRange type,
662 ValueRange inputs, Location loc) {
663 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
664 .getResults();
665 });
666
667 mlir::ConversionTarget target(*context);
668 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
669 [](UnrealizedConversionCastOp op) {
670 auto isTensorTy = [](Type type) {
671 return isa<RankedTensorType>(type);
672 };
673 return llvm::none_of(op->getOperandTypes(), isTensorTy) &&
674 llvm::none_of(op->getResultTypes(), isTensorTy);
675 });
677 patterns.insert<UnrealizedConversionCastOpPattern>(context);
679 target);
680 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
681 }
682}
683
684std::optional<std::string> xegpu::getChipStr(Operation *op) {
685 auto gpuModuleOp = op->getParentOfType<gpu::GPUModuleOp>();
686
687 if (!gpuModuleOp)
688 return std::nullopt;
689
690 auto targetAttrs = gpuModuleOp.getTargets();
691 if (targetAttrs) {
692 for (auto &attr : *targetAttrs) {
693 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
694 if (xevmAttr)
695 return xevmAttr.getChip().str();
696 }
697 }
698
699 return std::nullopt;
700}
701
702/// Generates element-wise addition ops of two arrays with same length.
704 Location loc,
707 assert(lhs.size() == rhs.size() && "lhs and rhs must have the same size");
709 for (auto [l, r] : llvm::zip_equal(lhs, rhs)) {
710 auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
711 auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
712 results.push_back(builder.createOrFold<arith::AddIOp>(loc, lval, rval));
713 }
714 return results;
715}
716
717/// Generates element-wise addition ops of two arrays with automatic alignment.
718/// When the input arrays have different sizes, the shorter array is
719/// right-aligned with the longer array, and the unmatched leading elements from
720/// the longer array are preserved unchanged. This is commonly used for offset
721/// computation where higher-dimensional offsets need to be added to
722/// lower-dimensional adjustments.
723///
724/// Example:
725/// lhs = [l1, l2, l3], rhs = [r1, r2]
726/// Result: [11, l2+r1, l3+r2]
731 // ensure a is longer than b
732 ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
733 ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
734 SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
735 a = a.slice(a.size() - b.size());
736 results.append(addElementwise(builder, loc, a, b));
737 return results;
738}
739
740template <typename T>
742 ArrayRef<T> candidateMultiples) {
743 static_assert(std::is_integral<T>::value, "T must be an integer type");
744 int largest = -1;
745 SmallVector<T> multiples = {1};
746 if (!candidateMultiples.empty())
747 multiples =
748 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
749 for (T candidate : candidates) {
750 for (T multiple : multiples) {
751 int value = static_cast<int>(candidate * multiple);
752 if (value != 0 && dim % value == 0 && value > largest)
753 largest = value;
754 }
755 }
756 return largest;
757}
758
759/// Explicit instantiations
760template int xegpu::getLargestDivisor<int>(int dim, ArrayRef<int> candidates,
761 ArrayRef<int> candidateMultiples);
762template int
764 ArrayRef<unsigned> candidateMultiples);
765
766bool xegpu::requirePacked(const xegpu::LayoutAttr layout) {
767 if (!layout)
768 return false;
769 auto laneData = layout.getEffectiveLaneDataAsInt();
770 if (laneData.size() != 2)
771 return false;
772 return laneData[0] != 1;
773}
774
775bool xegpu::requireTranspose(const xegpu::LayoutAttr layout,
776 const xegpu::uArch::uArch *uArch) {
777 // Return false for unsupported targets.
778 // TODO: Add more support or move to target info.
779 if (uArch->getName().equals_insensitive("pvc") &&
780 uArch->getName().equals_insensitive("bmg"))
781 return false;
782 if (!layout)
783 return false;
784 auto laneLayout = layout.getEffectiveLaneLayoutAsInt();
785 if (laneLayout.size() != 2)
786 return false;
787 return laneLayout[0] == uArch->getSubgroupSize() && laneLayout[1] == 1;
788}
return success()
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
This class represents an argument of a Block.
Definition Value.h:309
TypedAttr getZeroAttr(Type type)
Definition Builders.cpp:324
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This class helps build Operations.
Definition Builders.h:207
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:526
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
AttrClass getAttrOfType(StringAttr name)
Definition Operation.h:550
bool hasAttrOfType(NameT &&name)
Definition Operation.h:575
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition Operation.h:560
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
MutableArrayRef< OpOperand > getOpOperands()
Definition Operation.h:383
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition Operation.h:238
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition Operation.h:582
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119
operand_type_range getOperandTypes()
Definition Operation.h:397
result_type_range getResultTypes()
Definition Operation.h:428
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797
result_range getOpResults()
Definition Operation.h:420
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
Definition Operation.h:600
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Definition Value.h:116
Type getType() const
Return the type of this value.
Definition Value.h:105
static WalkResult skip()
Definition WalkResult.h:48
static WalkResult advance()
Definition WalkResult.h:47
Operation * getOwner() const
Return the owner of this operand.
Definition UseDefLists.h:38
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
SmallVector< NamedAttribute > dropInstDataOnAttrs(ArrayRef< NamedAttribute > attrs)
Updates the NamedAttribute sequence by dropping inst-data information from any DistributeLayoutAttr f...
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
bool recoverTemporaryLayouts(Operation *rootOp)
Attach layout attributes to all vector-type operands of operations within the given operation's regio...
void recoverTemporaryLayoutsDeprecated(Operation *op)
[to-be-deprecated] Set the DistributeLayoutAttr for each OpOperand and OpResult of of the given opera...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
bool requirePacked(const LayoutAttr layout)
Helper function to check if the layout is packed.
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
SmallVector< NamedAttribute > dropSgLayoutAndDataOnAttrs(ArrayRef< NamedAttribute > attrs)
Updates the NamedAttribute sequence by dropping sg-layout and sg-data information from any Distribute...
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:305
const FrozenRewritePatternSet & patterns
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:112
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
virtual int getSubgroupSize() const =0
StringRef getName() const
Definition uArchBase.h:158