MLIR 22.0.0git
XeGPUUtils.cpp
Go to the documentation of this file.
1//===---- XeGPUUtils.cpp - MLIR Utilities for XeGPUOps ------------------===//
2//
3// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utility methods for working with the XeGPU dialect.
10//
11//===----------------------------------------------------------------------===//
12
19#include "mlir/IR/Builders.h"
20#include "mlir/IR/Operation.h"
21#include "mlir/IR/ValueRange.h"
24#include "llvm/Support/FormatVariadic.h"
25#include <cstdint>
26#include <numeric>
27
28using namespace mlir;
29
30/// convert ArrayRef<ValueRange> into SmallVector<Value>
33 for (const auto &vals : values)
34 llvm::append_range(result, vals);
35 return result;
36}
37
38FailureOr<VectorType>
39mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) {
40 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
41 // It only works for subgroup level layout, which only has lane_layout
42 // and lane_data, and is to distribute a SIMD code into SIMT code.
43 if (!layout || !layout.isForSubgroup())
44 return failure();
45
46 SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
47 SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
48 auto tdescShape = tdescTy.getShape();
49 auto elementType = tdescTy.getElementType();
50
51 // compute sgSize by multiply elements of laneLayout
52 // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
53 // e.g. for 1D layout, sgSize = laneLayout[0]
54 int64_t sgSize = llvm::product_of(laneLayout);
55
56 // Case 1: regular loads/stores
57 auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
58 if (scatterAttr) {
59 auto chunkSize = scatterAttr.getChunkSize().getInt();
60 // Verify if the first dimension of the tensor descriptor shape is
61 // distributable.
62 assert(tdescShape[0] == laneLayout[0] &&
63 "tensor descriptor shape is not distributable");
64 return VectorType::get({chunkSize}, elementType);
65 }
66
67 // Case 2: block loads/stores
68 // Check if the tensor descriptor shape is distributable.
69 int64_t tensorSize = 1;
70 for (auto [tdescDim, laneDim, laneDataDim] :
71 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
72 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
73 "tensor descriptor shape is not distributable");
74 tensorSize *= tdescDim;
75 }
76 // tensorSize must be adjusted for array_length.
77 tensorSize *= tdescTy.getArrayLength();
78
79 return VectorType::get({tensorSize / sgSize}, elementType);
80}
81
82FailureOr<VectorType>
83mlir::xegpu::getDistributedVectorType(VectorType originalType,
84 xegpu::LayoutAttr layout) {
85 int64_t rank = originalType.getRank();
86 // Distributed vector type is only supported for 1D, 2D and 3D vectors.
87 if (rank < 1 || rank > 3)
88 return failure();
89 ArrayRef<int64_t> shape = originalType.getShape();
90 // arrayLength is 1 for 1D and 2D vectors, and equal to the first dimension
91 // of the 3D vector.
92 int arrayLength = 1;
93 if (rank == 3) {
94 arrayLength = shape[0];
95 shape = shape.drop_front();
96 }
97 auto helperTdescTy = xegpu::TensorDescType::get(
98 shape, originalType.getElementType(), arrayLength,
99 /*boundary_check=*/true,
100 /*memory_space=*/xegpu::MemorySpace::Global, layout);
101 return xegpu::getDistributedVectorType(helperTdescTy);
102}
103
104std::string xegpu::getTemporaryLayoutName(const OpOperand &operand) {
105 const StringRef prefix("layout_operand_");
106 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
107 return llvm::formatv("{0}{1}", prefix, idx).str();
108}
109
111 const StringRef prefix = "layout_result_";
112 return llvm::formatv("{0}{1}", prefix, result.getResultNumber()).str();
113}
114
115xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {
116 if (!value)
117 return nullptr;
118
119 if (auto tdescTy =
120 dyn_cast_if_present<xegpu::TensorDescType>(value.getType()))
121 return tdescTy.getLayoutAttr();
122
123 if (auto result = dyn_cast<OpResult>(value)) {
124 Operation *defOp = result.getDefiningOp();
125 assert(defOp && "result must have a defining op");
126
127 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
128 auto layout = anchorOp.getAnchorLayout();
129 return layout;
130 }
131
132 std::string layoutName = getTemporaryLayoutName(result);
133 if (defOp->hasAttr(layoutName)) {
134 auto layout =
135 defOp->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
136 return layout;
137 }
138 }
139
140 if (auto arg = dyn_cast<BlockArgument>(value)) {
141 auto *parentOp = arg.getOwner()->getParentOp();
142 if (auto loop = dyn_cast<LoopLikeOpInterface>(parentOp)) {
143 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
144 if (tiedInit)
145 return getDistributeLayoutAttr(tiedInit->get());
146 }
147 }
148
149 return nullptr;
150}
151xegpu::DistributeLayoutAttr
153 Operation *op = opr.getOwner();
154 unsigned idx = const_cast<OpOperand &>(opr).getOperandNumber();
155
156 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(op)) {
157 if (auto dpasOp = dyn_cast<xegpu::DpasOp>(op)) {
158 if (idx == 0) {
159 return dpasOp.getLayoutAAttr();
160 } else if (idx == 1) {
161 return dpasOp.getLayoutBAttr();
162 } else if (idx == 2) {
163 return dpasOp.getLayoutCdAttr();
164 }
165 }
166 if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
167 return convertOp.getInputLayoutAttr();
168 }
169 auto layout = anchorOp.getAnchorLayout();
170
171 if (idx == 0)
172 return layout;
173
174 // For store operations (StoreScatterOp, StoreNdOp, StoreMatrixOp),
175 // the layout is valid for the first two operands: value and memref/tdesc.
176 // For other operations, the layout applies to the first operand only.
177 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
178 op) &&
179 (idx < 2))
180 return layout;
181 }
182
183 std::string layoutName = xegpu::getTemporaryLayoutName(opr);
184 if (op->hasAttr(layoutName)) {
185 auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
186 return layout;
187 }
188
189 auto layout = getDistributeLayoutAttr(opr.get());
190 return layout;
191}
192
193// Returns the permanent layout attribute for the given result if it's
194// available on the defining op. Otherwise returns the provided layout.
195xegpu::DistributeLayoutAttr
196maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
197 const OpResult &result, mlir::Operation *owner,
198 const std::string &name) {
199 xegpu::DistributeLayoutAttr candidate = layout;
200
201 if (auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
202 if (auto perm = loadOp.getLayoutAttr())
203 candidate = perm;
204 }
205
206 return candidate;
207}
208
209// Returns the permanent layout attribute for the given operand if it's
210// available on the defining op. Otherwise returns the provided layout.
211xegpu::DistributeLayoutAttr
212maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
213 const OpOperand &operand, mlir::Operation *owner,
214 const std::string &name) {
215 xegpu::DistributeLayoutAttr candidate = layout;
216 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
217
218 if (auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
219 if (idx == 0) {
220 if (auto perm = storeOp.getLayoutAttr())
221 candidate = perm;
222 }
223 }
224
225 return candidate;
226}
227
228// TODO-LayoutRefactor: Remove this function after replacing use
229// with setTemporaryLayout or setAnchorLayout
231 const mlir::OpResult &result,
232 const mlir::xegpu::DistributeLayoutAttr layout) {
233 Operation *owner = result.getOwner();
234
235 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
236 if (anchorOp.getAnchorLayout() == layout)
237 return;
238 anchorOp.setAnchorLayout(layout);
239 return;
240 }
241
242 std::string name = xegpu::getTemporaryLayoutName(result);
243 if (owner->hasAttrOfType<DistributeLayoutAttr>(name)) {
244 return;
245 }
246 if (layout) {
247 owner->setAttr(name, layout);
248 }
249}
250
251// TODO-LayoutRefactor: Remove this function after replacing use
252// with setTemporaryLayout or setAnchorLayout
254 const DistributeLayoutAttr layout) {
255 Operation *owner = operand.getOwner();
256 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
257
258 if (!layout) {
259 return;
260 }
261 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
262 if (auto dpasOp = dyn_cast<xegpu::DpasOp>(owner)) {
263 if (idx == 0) {
264 return dpasOp.setLayoutAAttr(layout);
265 } else if (idx == 1) {
266 return dpasOp.setLayoutBAttr(layout);
267 } else if (idx == 2) {
268 return dpasOp.setLayoutCdAttr(layout);
269 }
270 }
271 if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(owner)) {
272 return convertOp.setInputLayoutAttr(layout);
273 }
274
275 // For store operations (StoreScatterOp, StoreNdOp, StoreMatrixOp),
276 // the layout is valid for the first two operands: value and memref/tdesc.
277 // For other operations, the layout applies to the first operand only.
278 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
279 owner)) {
280 if (idx < 2) {
281 anchorOp.setAnchorLayout(layout);
282 }
283 } else {
284 if (idx == 0) {
285 anchorOp.setAnchorLayout(layout);
286 }
287 }
288 }
289
290 std::string name = xegpu::getTemporaryLayoutName(operand);
291 if (owner->hasAttrOfType<DistributeLayoutAttr>(name)) {
292 return;
293 }
294 if (layout) {
295 owner->setAttr(name, layout);
296 }
297}
298
299template <typename T, typename>
300xegpu::DistributeLayoutAttr
301xegpu::getTemporaryLayout(const T &operandOrResult) {
302 Operation *op = operandOrResult.getOwner();
303
304 std::string layoutName = xegpu::getTemporaryLayoutName(operandOrResult);
305 if (op->hasAttr(layoutName)) {
306 auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
307 return layout;
308 }
309
310 return nullptr;
311}
312
313template xegpu::DistributeLayoutAttr
315template xegpu::DistributeLayoutAttr
317
318template <typename T, typename>
319void xegpu::setTemporaryLayout(const T &operandOrResult,
320 const xegpu::DistributeLayoutAttr layout) {
321 Operation *owner = operandOrResult.getOwner();
322 std::string name = xegpu::getTemporaryLayoutName(operandOrResult);
323 if (owner->hasAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
324 return;
325 }
326 if (layout) {
327 owner->setAttr(name, layout);
328 }
329}
330
332 const mlir::OpResult &result,
333 const mlir::xegpu::DistributeLayoutAttr layout);
334
336 const mlir::OpOperand &operand,
337 const mlir::xegpu::DistributeLayoutAttr layout);
338
340 op->walk([&](Operation *nestOp) {
341 for (OpOperand &opr : nestOp->getOpOperands()) {
342 auto layout = getDistributeLayoutAttr(opr.get());
343 setDistributeLayoutAttr(opr, layout);
344 }
345
346 for (OpResult result : nestOp->getOpResults()) {
347 auto layout = getDistributeLayoutAttr(result);
349 }
350 });
351}
352
353/// Attach layout attributes to all vector-type operands of operations within
354/// the given operation's region. Reports an error if any vector operand lacks
355/// a layout attribute.
357 auto result = rootOp->walk([&](Operation *op) {
358 for (OpOperand &operand : op->getOpOperands()) {
359 // Layouts are needed for vector type only.
360 if (!isa<VectorType>(operand.get().getType()))
361 continue;
362 auto layout = xegpu::getDistributeLayoutAttr(operand.get());
363 if (!layout) {
364 op->emitError("Could not find layout attribute for operand ")
365 << operand.getOperandNumber() << " of operation " << op->getName();
366 return WalkResult::interrupt();
367 }
368 xegpu::setDistributeLayoutAttr(operand, layout);
369 }
370 return WalkResult::advance();
371 });
372 return !result.wasInterrupted();
373}
374
375template <typename T, typename>
376void xegpu::removeLayoutAttr(const T &operandOrResult) {
377 Operation *owner = operandOrResult.getOwner();
378 std::string name = xegpu::getTemporaryLayoutName(operandOrResult);
379 if (owner->hasAttrOfType<DistributeLayoutAttr>(name))
380 owner->removeAttr(name);
381}
382
383// Explicit instantiation for OpResult
384template void
386
387// Explicit instantiation for OpOperand
388template void
390
392 op->walk([&](Operation *nestOp) {
393 for (OpOperand &opr : nestOp->getOpOperands())
394 removeLayoutAttr(opr);
395 for (OpResult result : nestOp->getOpResults())
397 if (op->hasAttrOfType<DistributeLayoutAttr>("layout"))
398 op->removeAttr("layout");
399 if (op->hasAttrOfType<DistributeLayoutAttr>("layout_a"))
400 op->removeAttr("layout_a");
401 if (op->hasAttrOfType<DistributeLayoutAttr>("layout_b"))
402 op->removeAttr("layout_b");
403 if (op->hasAttrOfType<DistributeLayoutAttr>("layout_cd"))
404 op->removeAttr("layout_cd");
405 });
406}
407
411 auto vecTy = dyn_cast<VectorType>(value.getType());
412 if (!vecTy)
413 return {value};
414
415 ArrayRef<int64_t> srcShape = vecTy.getShape();
416 if (!computeShapeRatio(srcShape, shape))
417 return {value};
418
419 int64_t srcShapeRank = srcShape.size();
420 int64_t targetShapeRank = shape.size();
421
422 SmallVector<int64_t> adjustedTargetShape(srcShape.size());
423 int64_t rankDiff = srcShapeRank - targetShapeRank;
424 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
425 1);
426 llvm::copy(shape, adjustedTargetShape.begin() + rankDiff);
427
429 for (SmallVector<int64_t> offsets :
430 StaticTileOffsetRange(srcShape, adjustedTargetShape)) {
431 SmallVector<int64_t> staticStrides(offsets.size(), 1);
432 Value slice = vector::ExtractStridedSliceOp::create(
433 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
434
435 // Reshape to remove leading unit dims if needed
436 if (srcShapeRank > targetShapeRank) {
437 auto targetTy = VectorType::get(shape, vecTy.getElementType());
438 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
439 }
440 result.push_back(slice);
441 }
442
443 return result;
444}
445
447 ValueRange values,
449 VectorType inputTy = dyn_cast<VectorType>(values[0].getType());
450 assert(llvm::all_of(values.getTypes(),
451 [&](Type type) { return type == inputTy; }) &&
452 "values must be of the same VectorType");
453
454 Type elemTy = inputTy.getElementType();
455 ArrayRef<int64_t> tileShape = inputTy.getShape();
456
457 VectorType resultTy = VectorType::get(shape, elemTy);
458 auto zeroAttr = builder.getZeroAttr(elemTy);
459 Value result = arith::ConstantOp::create(
460 builder, loc, resultTy, DenseElementsAttr::get(resultTy, zeroAttr));
461
462 for (auto [src, offsets] :
463 llvm::zip_equal(values, StaticTileOffsetRange(shape, tileShape))) {
464 SmallVector<int64_t> staticStrides(tileShape.size(), 1);
465 result = vector::InsertStridedSliceOp::create(builder, loc, src, result,
466 offsets, staticStrides);
467 }
468 return result;
469}
470
472 Operation *op, TypeConverter converter) {
473 MLIRContext *context = op->getContext();
474
475 auto materializeCast = [](OpBuilder &builder, Type type, ValueRange inputs,
476 Location loc) -> Value {
477 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
478 .getResult(0);
479 };
480
481 { // convert VectorType to RankedTensorType for SCF Structural ops
482 TypeConverter converter;
483 converter.addConversion([](Type type) -> Type { return type; });
484 converter.addConversion([](VectorType type) -> Type {
485 return RankedTensorType::get(type.getShape(), type.getElementType());
486 });
487 converter.addSourceMaterialization(materializeCast);
488 converter.addTargetMaterialization(materializeCast);
489
490 mlir::ConversionTarget target(*context);
491 target.addLegalOp<UnrealizedConversionCastOp>();
492
495 target);
496 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
497 }
498
499 { // propagate the layout attribute to RankedTensorType by checking
500 // BuiltInUnrealizedCastOps
501 // for VectorType to RankedTensorType cast.
502 op->walk([](UnrealizedConversionCastOp castOp) {
503 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
504 return WalkResult::skip();
505
506 Value input = castOp.getInputs()[0];
507 Value result = castOp.getResults()[0];
508 auto inputTy = dyn_cast<VectorType>(input.getType());
509 auto resultTy = dyn_cast<RankedTensorType>(result.getType());
510
511 // Only look at ops casting from VectorType to RankedTensorType
512 if (!inputTy || !resultTy)
513 return WalkResult::skip();
514
515 xegpu::DistributeLayoutAttr layout =
517 if (!layout)
518 return WalkResult::skip();
519
520 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
521 result.setType(newTy);
522
523 // update the arguments if user is a LoopLike op.
524 for (OpOperand &use : result.getUses()) {
525 if (auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
526 BlockArgument arg = loop.getTiedLoopRegionIterArg(&use);
527 arg.setType(newTy);
528 }
529 // whileOp has two regions, the BlockArgument of the after region
530 // is not exposed by LoopLikeOpInterface
531 if (auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
532 unsigned idx = use.getOperandNumber();
533 BlockArgument arg = whileOp.getAfterArguments()[idx];
534 arg.setType(newTy);
535 }
536 }
537 return WalkResult::advance();
538 });
539
540 // using yieldOp as anchor to update the result type of its ParentOp
541 op->walk([](scf::YieldOp yieldOp) {
542 Operation *parentOp = yieldOp->getParentOp();
543 for (OpResult r : parentOp->getOpResults()) {
544 unsigned idx = r.getResultNumber();
545 Type resultTy = r.getType();
546 Type yieldTy = yieldOp.getResults()[idx].getType();
547 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
548 r.setType(yieldTy);
549 }
550 });
551 }
552
553 { // perform the conversion from RankedTensorType to VectorType based on the
554 // DistributeLayoutAttr
555
556 // Handle the UnrealizedConversionCastOp introduced by the first step.
557 // For vector->RankedTensorType, it will simply forward the inputs.
558 // For RankedTensorType->vector, it will update the inputs with the
559 // one from the adaptor.
560 class UnrealizedConversionCastOpPattern
561 : public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
562 using OpConversionPattern<
563 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
564
565 mlir::LogicalResult
566 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
567 OneToNOpAdaptor adaptor,
568 ConversionPatternRewriter &rewriter) const override {
569 auto inputs = op.getOperands();
570 auto outputs = op.getOutputs();
571
572 if (inputs.size() != 1 || outputs.size() != 1)
573 return failure();
574
575 auto inputTy = inputs[0].getType();
576 auto outputTy = outputs[0].getType();
577
578 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
579 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
580 return success();
581 }
582
583 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
584 SmallVector<Value> values = xegpu::flattenValues(adaptor.getInputs());
585 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
586 outputTy, values);
587 rewriter.replaceOp(op, newOp);
588 return success();
589 }
590 return failure();
591 }
592 };
593
594 converter.addSourceMaterialization(materializeCast);
595 converter.addTargetMaterialization([&](OpBuilder &builder, TypeRange type,
596 ValueRange inputs, Location loc) {
597 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
598 .getResults();
599 });
600
601 mlir::ConversionTarget target(*context);
602 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
603 [](UnrealizedConversionCastOp op) {
604 auto isTensorTy = [](Type type) {
605 return isa<RankedTensorType>(type);
606 };
607 return llvm::none_of(op->getOperandTypes(), isTensorTy) &&
608 llvm::none_of(op->getResultTypes(), isTensorTy);
609 });
611 patterns.insert<UnrealizedConversionCastOpPattern>(context);
613 target);
614 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
615 }
616}
617
618std::optional<std::string> xegpu::getChipStr(Operation *op) {
619 auto gpuModuleOp = op->getParentOfType<gpu::GPUModuleOp>();
620
621 if (!gpuModuleOp)
622 return std::nullopt;
623
624 auto targetAttrs = gpuModuleOp.getTargets();
625 if (targetAttrs) {
626 for (auto &attr : *targetAttrs) {
627 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
628 if (xevmAttr)
629 return xevmAttr.getChip().str();
630 }
631 }
632
633 return std::nullopt;
634}
635
636/// Generates element-wise addition ops of two arrays with same length.
638 Location loc,
641 assert(lhs.size() == rhs.size() && "lhs and rhs must have the same size");
643 for (auto [l, r] : llvm::zip_equal(lhs, rhs)) {
644 auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
645 auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
646 results.push_back(builder.createOrFold<arith::AddIOp>(loc, lval, rval));
647 }
648 return results;
649}
650
651/// Generates element-wise addition ops of two arrays with automatic alignment.
652/// When the input arrays have different sizes, the shorter array is
653/// right-aligned with the longer array, and the unmatched leading elements from
654/// the longer array are preserved unchanged. This is commonly used for offset
655/// computation where higher-dimensional offsets need to be added to
656/// lower-dimensional adjustments.
657///
658/// Example:
659/// lhs = [l1, l2, l3], rhs = [r1, r2]
660/// Result: [11, l2+r1, l3+r2]
665 // ensure a is longer than b
666 ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
667 ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
668 SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
669 a = a.slice(a.size() - b.size());
670 results.append(addElementwise(builder, loc, a, b));
671 return results;
672}
673
674template <typename T>
676 ArrayRef<T> candidateMultiples) {
677 static_assert(std::is_integral<T>::value, "T must be an integer type");
678 int largest = -1;
679 SmallVector<T> multiples = {1};
680 if (!candidateMultiples.empty())
681 multiples =
682 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
683 for (T candidate : candidates) {
684 for (T multiple : multiples) {
685 int value = static_cast<int>(candidate * multiple);
686 if (value != 0 && dim % value == 0 && value > largest)
687 largest = value;
688 }
689 }
690 return largest;
691}
692
693/// Explicit instantiations
694template int xegpu::getLargestDivisor<int>(int dim, ArrayRef<int> candidates,
695 ArrayRef<int> candidateMultiples);
696template int
698 ArrayRef<unsigned> candidateMultiples);
return success()
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
This class represents an argument of a Block.
Definition Value.h:309
TypedAttr getZeroAttr(Type type)
Definition Builders.cpp:324
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This class helps build Operations.
Definition Builders.h:207
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:526
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
AttrClass getAttrOfType(StringAttr name)
Definition Operation.h:550
bool hasAttrOfType(NameT &&name)
Definition Operation.h:575
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition Operation.h:560
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
MutableArrayRef< OpOperand > getOpOperands()
Definition Operation.h:383
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition Operation.h:238
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition Operation.h:582
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119
operand_type_range getOperandTypes()
Definition Operation.h:397
result_type_range getResultTypes()
Definition Operation.h:428
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797
result_range getOpResults()
Definition Operation.h:420
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
Definition Operation.h:600
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Definition Value.h:116
Type getType() const
Return the type of this value.
Definition Value.h:105
static WalkResult skip()
Definition WalkResult.h:48
static WalkResult advance()
Definition WalkResult.h:47
static WalkResult interrupt()
Definition WalkResult.h:46
Operation * getOwner() const
Return the owner of this operand.
Definition UseDefLists.h:38
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
bool recoverTemporaryLayouts(Operation *rootOp)
Attach layout attributes to all vector-type operands of operations within the given operation's regio...
void recoverTemporaryLayoutsDeprecated(Operation *op)
[to-be-deprecated] Set the DistributeLayoutAttr for each OpOperand and OpResult of of the given opera...
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:304
const FrozenRewritePatternSet & patterns
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.