MLIR 22.0.0git
XeGPUUtils.cpp
Go to the documentation of this file.
1//===---- XeGPUUtils.cpp - MLIR Utilities for XeGPUOps ------------------===//
2//
3// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utility methods for working with the XeGPU dialect.
10//
11//===----------------------------------------------------------------------===//
12
19#include "mlir/IR/Builders.h"
20#include "mlir/IR/Operation.h"
21#include "mlir/IR/ValueRange.h"
24#include "llvm/Support/FormatVariadic.h"
25#include <cstdint>
26#include <numeric>
27
28using namespace mlir;
29
30/// convert ArrayRef<ValueRange> into SmallVector<Value>
33 for (const auto &vals : values)
34 llvm::append_range(result, vals);
35 return result;
36}
37
38FailureOr<VectorType>
39mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) {
40 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
41 // It only works for subgroup level layout, which only has lane_layout
42 // and lane_data, and is to distribute a SIMD code into SIMT code.
43 if (!layout || !layout.isForSubgroup())
44 return failure();
45
46 SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
47 SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
48 auto tdescShape = tdescTy.getShape();
49 auto elementType = tdescTy.getElementType();
50
51 // compute sgSize by multiply elements of laneLayout
52 // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
53 // e.g. for 1D layout, sgSize = laneLayout[0]
54 int64_t sgSize = llvm::product_of(laneLayout);
55
56 // Case 1: regular loads/stores
57 auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
58 if (scatterAttr) {
59 auto chunkSize = scatterAttr.getChunkSize().getInt();
60 // Verify if the first dimension of the tensor descriptor shape is
61 // distributable.
62 assert(tdescShape[0] == laneLayout[0] &&
63 "tensor descriptor shape is not distributable");
64 return VectorType::get({chunkSize}, elementType);
65 }
66
67 // Case 2: block loads/stores
68 // Check if the tensor descriptor shape is distributable.
69 int64_t tensorSize = 1;
70 for (auto [tdescDim, laneDim, laneDataDim] :
71 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
72 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
73 "tensor descriptor shape is not distributable");
74 tensorSize *= tdescDim;
75 }
76 // tensorSize must be adjusted for array_length.
77 tensorSize *= tdescTy.getArrayLength();
78
79 return VectorType::get({tensorSize / sgSize}, elementType);
80}
81
82FailureOr<VectorType>
83mlir::xegpu::getDistributedVectorType(VectorType originalType,
84 xegpu::LayoutAttr layout) {
85 int64_t rank = originalType.getRank();
86 // Distributed vector type is only supported for 1D, 2D and 3D vectors.
87 if (rank < 1 || rank > 3)
88 return failure();
89 ArrayRef<int64_t> shape = originalType.getShape();
90 // arrayLength is 1 for 1D and 2D vectors, and equal to the first dimension
91 // of the 3D vector.
92 int arrayLength = 1;
93 if (rank == 3) {
94 arrayLength = shape[0];
95 shape = shape.drop_front();
96 }
97 auto helperTdescTy = xegpu::TensorDescType::get(
98 shape, originalType.getElementType(), arrayLength,
99 /*boundary_check=*/true,
100 /*memory_space=*/xegpu::MemorySpace::Global, layout);
101 return xegpu::getDistributedVectorType(helperTdescTy);
102}
103
104std::string xegpu::getLayoutName(const OpOperand &operand) {
105 const StringRef prefix("layout_operand_");
106 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
107 return llvm::formatv("{0}{1}", prefix, idx).str();
108}
109
111 const StringRef prefix = "layout_result_";
112 return llvm::formatv("{0}{1}", prefix, result.getResultNumber()).str();
113}
114
115xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {
116 if (!value)
117 return nullptr;
118
119 if (auto tdescTy =
120 dyn_cast_if_present<xegpu::TensorDescType>(value.getType()))
121 return tdescTy.getLayoutAttr();
122
123 if (auto result = dyn_cast<OpResult>(value)) {
124 Operation *defOp = result.getDefiningOp();
125 assert(defOp && "result must have a defining op");
126
127 // For ConvertLayoutOp, the layout is stored in the targetLayoutAttr
128 if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(defOp))
129 return convertOp.getTargetLayoutAttr();
130
131 // for LoadNdOp, the layout is stored in the tensor descriptor
132 if (auto loadNd = dyn_cast<xegpu::LoadNdOp>(defOp))
133 return getDistributeLayoutAttr(loadNd.getTensorDesc());
134
135 // for LoadMatrixOp, the layout is attached to the property of the op
136 if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(defOp))
137 return loadOp.getLayoutAttr();
138
139 // for StoreMatrixOp, the layout is attached to the property of the op
140 if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(defOp))
141 return storeOp.getLayoutAttr();
142 std::string layoutName = getLayoutName(result);
143 if (defOp->hasAttr(layoutName))
144 return defOp->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
145
146 // check for "permament" layout only after "temporary" layout name lookup
147 // for backward compatibility
148 if (auto loadGatherOp = dyn_cast<xegpu::LoadGatherOp>(defOp))
149 return loadGatherOp.getLayoutAttr();
150 }
151
152 if (auto arg = dyn_cast<BlockArgument>(value)) {
153 auto *parentOp = arg.getOwner()->getParentOp();
154 if (auto loop = dyn_cast<LoopLikeOpInterface>(parentOp)) {
155 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
156 if (tiedInit)
157 return getDistributeLayoutAttr(tiedInit->get());
158 }
159 }
160
161 return nullptr;
162}
163
164xegpu::DistributeLayoutAttr
166 Operation *op = opr.getOwner();
167
168 if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(op))
169 return loadOp.getLayoutAttr();
170
171 if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(op))
172 return storeOp.getLayoutAttr();
173
174 std::string layoutName = xegpu::getLayoutName(opr);
175 if (op->hasAttr(layoutName))
176 return op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
177
178 // check for "permament" layout only after "temporary" layout name lookup
179 if (auto storeScatterOp = dyn_cast<xegpu::StoreScatterOp>(op))
180 if (auto layout = storeScatterOp.getLayoutAttr())
181 return layout;
182
183 return getDistributeLayoutAttr(opr.get());
184}
185
186// Returns the permanent layout attribute for the given result if it's
187// available on the defining op. Otherwise returns the provided layout.
188xegpu::DistributeLayoutAttr
189maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
190 const OpResult &result, mlir::Operation *owner,
191 const std::string &name) {
192 xegpu::DistributeLayoutAttr candidate = layout;
193
194 if (auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
195 if (auto perm = loadOp.getLayoutAttr())
196 candidate = perm;
197 }
198
199 return candidate;
200}
201
202// Returns the permanent layout attribute for the given operand if it's
203// available on the defining op. Otherwise returns the provided layout.
204xegpu::DistributeLayoutAttr
205maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
206 const OpOperand &operand, mlir::Operation *owner,
207 const std::string &name) {
208 xegpu::DistributeLayoutAttr candidate = layout;
209 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
210
211 if (auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
212 if (idx == 0) {
213 if (auto perm = storeOp.getLayoutAttr())
214 candidate = perm;
215 }
216 }
217
218 return candidate;
219}
220
221template <typename T, typename>
222void xegpu::setDistributeLayoutAttr(const T &operandOrResult,
223 const DistributeLayoutAttr layout,
224 bool respectPermLayout) {
225 Operation *owner = operandOrResult.getOwner();
226 std::string name = xegpu::getLayoutName(operandOrResult);
227
228 if (owner->hasAttrOfType<DistributeLayoutAttr>(name))
229 return;
230
231 DistributeLayoutAttr candidate = layout;
232 if (respectPermLayout)
233 candidate = maybePickPermanentLayout(layout, operandOrResult, owner, name);
234
235 if (candidate)
236 owner->setAttr(name, candidate);
237}
238
239// Explicit instantiation for OpResult
241 const mlir::OpResult &result,
242 const mlir::xegpu::DistributeLayoutAttr layout, bool respectPermLayout);
243
244// Explicit instantiation for OpOperand
246 const mlir::OpOperand &operand,
247 const mlir::xegpu::DistributeLayoutAttr layout, bool respectPermLayout);
248
250 Operation *op, function_ref<DistributeLayoutAttr(Value)> getLayoutImpl) {
251 op->walk([&](Operation *nestOp) {
252 if (isa<xegpu::LoadMatrixOp, xegpu::StoreMatrixOp>(nestOp))
253 return;
254
255 for (OpOperand &opr : nestOp->getOpOperands()) {
256 auto layout = getLayoutImpl(opr.get());
257 setDistributeLayoutAttr(opr, layout);
258 }
259 for (OpResult result : nestOp->getOpResults()) {
260 auto layout = getLayoutImpl(result);
262 }
263 });
264}
265
266template <typename T, typename>
267void xegpu::removeLayoutAttr(const T &operandOrResult) {
268 Operation *owner = operandOrResult.getOwner();
269 std::string name = xegpu::getLayoutName(operandOrResult);
270 if (owner->hasAttrOfType<DistributeLayoutAttr>(name))
271 owner->removeAttr(name);
272}
273
274// Explicit instantiation for OpResult
275template void
277
278// Explicit instantiation for OpOperand
279template void
281
283 op->walk([&](Operation *nestOp) {
284 for (OpOperand &opr : nestOp->getOpOperands())
285 removeLayoutAttr(opr);
286 for (OpResult result : nestOp->getOpResults())
288 });
289}
290
294 auto vecTy = dyn_cast<VectorType>(value.getType());
295 if (!vecTy)
296 return {value};
297
298 ArrayRef<int64_t> srcShape = vecTy.getShape();
299 if (!computeShapeRatio(srcShape, shape))
300 return {value};
301
302 int64_t srcShapeRank = srcShape.size();
303 int64_t targetShapeRank = shape.size();
304
305 SmallVector<int64_t> adjustedTargetShape(srcShape.size());
306 int64_t rankDiff = srcShapeRank - targetShapeRank;
307 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
308 1);
309 llvm::copy(shape, adjustedTargetShape.begin() + rankDiff);
310
312 for (SmallVector<int64_t> offsets :
313 StaticTileOffsetRange(srcShape, adjustedTargetShape)) {
314 SmallVector<int64_t> staticStrides(offsets.size(), 1);
315 Value slice = vector::ExtractStridedSliceOp::create(
316 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
317
318 // Reshape to remove leading unit dims if needed
319 if (srcShapeRank > targetShapeRank) {
320 auto targetTy = VectorType::get(shape, vecTy.getElementType());
321 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
322 }
323 result.push_back(slice);
324 }
325
326 return result;
327}
328
330 ValueRange values,
332 VectorType inputTy = dyn_cast<VectorType>(values[0].getType());
333 assert(llvm::all_of(values.getTypes(),
334 [&](Type type) { return type == inputTy; }) &&
335 "values must be of the same VectorType");
336
337 Type elemTy = inputTy.getElementType();
338 ArrayRef<int64_t> tileShape = inputTy.getShape();
339
340 VectorType resultTy = VectorType::get(shape, elemTy);
341 auto zeroAttr = builder.getZeroAttr(elemTy);
342 Value result = arith::ConstantOp::create(
343 builder, loc, resultTy, DenseElementsAttr::get(resultTy, zeroAttr));
344
345 for (auto [src, offsets] :
346 llvm::zip_equal(values, StaticTileOffsetRange(shape, tileShape))) {
347 SmallVector<int64_t> staticStrides(tileShape.size(), 1);
348 result = vector::InsertStridedSliceOp::create(builder, loc, src, result,
349 offsets, staticStrides);
350 }
351 return result;
352}
353
355 Operation *op, TypeConverter converter) {
356 MLIRContext *context = op->getContext();
357
358 auto materializeCast = [](OpBuilder &builder, Type type, ValueRange inputs,
359 Location loc) -> Value {
360 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
361 .getResult(0);
362 };
363
364 { // convert VectorType to RankedTensorType for SCF Structural ops
365 TypeConverter converter;
366 converter.addConversion([](Type type) -> Type { return type; });
367 converter.addConversion([](VectorType type) -> Type {
368 return RankedTensorType::get(type.getShape(), type.getElementType());
369 });
370 converter.addSourceMaterialization(materializeCast);
371 converter.addTargetMaterialization(materializeCast);
372
373 mlir::ConversionTarget target(*context);
374 target.addLegalOp<UnrealizedConversionCastOp>();
375
378 target);
379 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
380 }
381
382 { // propagate the layout attribute to RankedTensorType by checking
383 // BuiltInUnrealizedCastOps
384 // for VectorType to RankedTensorType cast.
385 op->walk([](UnrealizedConversionCastOp castOp) {
386 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
387 return WalkResult::skip();
388
389 Value input = castOp.getInputs()[0];
390 Value result = castOp.getResults()[0];
391 auto inputTy = dyn_cast<VectorType>(input.getType());
392 auto resultTy = dyn_cast<RankedTensorType>(result.getType());
393
394 // Only look at ops casting from VectorType to RankedTensorType
395 if (!inputTy || !resultTy)
396 return WalkResult::skip();
397
398 xegpu::DistributeLayoutAttr layout =
400 if (!layout)
401 return WalkResult::skip();
402
403 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
404 result.setType(newTy);
405
406 // update the arguments if user is a LoopLike op.
407 for (OpOperand &use : result.getUses()) {
408 if (auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
409 BlockArgument arg = loop.getTiedLoopRegionIterArg(&use);
410 arg.setType(newTy);
411 }
412 // whileOp has two regions, the BlockArgument of the after region
413 // is not exposed by LoopLikeOpInterface
414 if (auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
415 unsigned idx = use.getOperandNumber();
416 BlockArgument arg = whileOp.getAfterArguments()[idx];
417 arg.setType(newTy);
418 }
419 }
420 return WalkResult::advance();
421 });
422
423 // using yieldOp as anchor to update the result type of its ParentOp
424 op->walk([](scf::YieldOp yieldOp) {
425 Operation *parentOp = yieldOp->getParentOp();
426 for (OpResult r : parentOp->getOpResults()) {
427 unsigned idx = r.getResultNumber();
428 Type resultTy = r.getType();
429 Type yieldTy = yieldOp.getResults()[idx].getType();
430 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
431 r.setType(yieldTy);
432 }
433 });
434 }
435
436 { // perform the conversion from RankedTensorType to VectorType based on the
437 // DistributeLayoutAttr
438
439 // Handle the UnrealizedConversionCastOp introduced by the first step.
440 // For vector->RankedTensorType, it will simply forward the inputs.
441 // For RankedTensorType->vector, it will update the inputs with the
442 // one from the adaptor.
443 class UnrealizedConversionCastOpPattern
444 : public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
445 using OpConversionPattern<
446 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
447
448 mlir::LogicalResult
449 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
450 OneToNOpAdaptor adaptor,
451 ConversionPatternRewriter &rewriter) const override {
452 auto inputs = op.getOperands();
453 auto outputs = op.getOutputs();
454
455 if (inputs.size() != 1 || outputs.size() != 1)
456 return failure();
457
458 auto inputTy = inputs[0].getType();
459 auto outputTy = outputs[0].getType();
460
461 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
462 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
463 return success();
464 }
465
466 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
467 SmallVector<Value> values = xegpu::flattenValues(adaptor.getInputs());
468 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
469 outputTy, values);
470 rewriter.replaceOp(op, newOp);
471 return success();
472 }
473 return failure();
474 }
475 };
476
477 converter.addSourceMaterialization(materializeCast);
478 converter.addTargetMaterialization([&](OpBuilder &builder, TypeRange type,
479 ValueRange inputs, Location loc) {
480 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
481 .getResults();
482 });
483
484 mlir::ConversionTarget target(*context);
485 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
486 [](UnrealizedConversionCastOp op) {
487 auto isTensorTy = [](Type type) {
488 return isa<RankedTensorType>(type);
489 };
490 return llvm::none_of(op->getOperandTypes(), isTensorTy) &&
491 llvm::none_of(op->getResultTypes(), isTensorTy);
492 });
494 patterns.insert<UnrealizedConversionCastOpPattern>(context);
496 target);
497 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
498 }
499}
500
501std::optional<std::string> xegpu::getChipStr(Operation *op) {
502 auto gpuModuleOp = op->getParentOfType<gpu::GPUModuleOp>();
503
504 if (!gpuModuleOp)
505 return std::nullopt;
506
507 auto targetAttrs = gpuModuleOp.getTargets();
508 if (targetAttrs) {
509 for (auto &attr : *targetAttrs) {
510 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
511 if (xevmAttr)
512 return xevmAttr.getChip().str();
513 }
514 }
515
516 return std::nullopt;
517}
518
519/// Generates element-wise addition ops of two arrays with same length.
521 Location loc,
524 assert(lhs.size() == rhs.size() && "lhs and rhs must have the same size");
526 for (auto [l, r] : llvm::zip_equal(lhs, rhs)) {
527 auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
528 auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
529 results.push_back(builder.createOrFold<arith::AddIOp>(loc, lval, rval));
530 }
531 return results;
532}
533
534/// Generates element-wise addition ops of two arrays with automatic alignment.
535/// When the input arrays have different sizes, the shorter array is
536/// right-aligned with the longer array, and the unmatched leading elements from
537/// the longer array are preserved unchanged. This is commonly used for offset
538/// computation where higher-dimensional offsets need to be added to
539/// lower-dimensional adjustments.
540///
541/// Example:
542/// lhs = [l1, l2, l3], rhs = [r1, r2]
543/// Result: [11, l2+r1, l3+r2]
548 // ensure a is longer than b
549 ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
550 ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
551 SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
552 a = a.slice(a.size() - b.size());
553 results.append(addElementwise(builder, loc, a, b));
554 return results;
555}
556
557template <typename T>
559 ArrayRef<T> candidateMultiples) {
560 static_assert(std::is_integral<T>::value, "T must be an integer type");
561 int largest = -1;
562 SmallVector<T> multiples = {1};
563 if (!candidateMultiples.empty())
564 multiples =
565 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
566 for (T candidate : candidates) {
567 for (T multiple : multiples) {
568 int value = static_cast<int>(candidate * multiple);
569 if (value != 0 && dim % value == 0 && value > largest)
570 largest = value;
571 }
572 }
573 return largest;
574}
575
576/// Explicit instantiations
577template int xegpu::getLargestDivisor<int>(int dim, ArrayRef<int> candidates,
578 ArrayRef<int> candidateMultiples);
579template int
581 ArrayRef<unsigned> candidateMultiples);
return success()
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
This class represents an argument of a Block.
Definition Value.h:309
TypedAttr getZeroAttr(Type type)
Definition Builders.cpp:324
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This class helps build Operations.
Definition Builders.h:207
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:526
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
AttrClass getAttrOfType(StringAttr name)
Definition Operation.h:550
bool hasAttrOfType(NameT &&name)
Definition Operation.h:575
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition Operation.h:560
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
MutableArrayRef< OpOperand > getOpOperands()
Definition Operation.h:383
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition Operation.h:238
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition Operation.h:582
operand_type_range getOperandTypes()
Definition Operation.h:397
result_type_range getResultTypes()
Definition Operation.h:428
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797
result_range getOpResults()
Definition Operation.h:420
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
Definition Operation.h:600
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Definition Value.h:116
Type getType() const
Return the type of this value.
Definition Value.h:105
static WalkResult skip()
Definition WalkResult.h:48
static WalkResult advance()
Definition WalkResult.h:47
Operation * getOwner() const
Return the owner of this operand.
Definition UseDefLists.h:38
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setDistributeLayoutAttrs(Operation *op, function_ref< DistributeLayoutAttr(Value)> getLayoutImpl)
Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
std::string getLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
void setDistributeLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout, bool respectPermLayout=false)
Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching it to the owner's dictio...
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:304
const FrozenRewritePatternSet & patterns
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152