MLIR 22.0.0git
XeGPUUtils.cpp
Go to the documentation of this file.
1//===---- XeGPUUtils.cpp - MLIR Utilities for XeGPUOps ------------------===//
2//
3// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utility methods for working with the XeGPU dialect.
10//
11//===----------------------------------------------------------------------===//
12
20#include "mlir/IR/Builders.h"
21#include "mlir/IR/Operation.h"
22#include "mlir/IR/ValueRange.h"
25#include "llvm/Support/FormatVariadic.h"
26#include <cstdint>
27#include <numeric>
28
29using namespace mlir;
30
31/// convert ArrayRef<ValueRange> into SmallVector<Value>
34 for (const auto &vals : values)
35 llvm::append_range(result, vals);
36 return result;
37}
38
39FailureOr<VectorType>
40mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) {
41 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
42 // It only works for subgroup level layout, which only has lane_layout
43 // and lane_data, and is to distribute a SIMD code into SIMT code.
44 if (!layout || !layout.isForSubgroup())
45 return failure();
46
47 SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
48 SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
49 auto tdescShape = tdescTy.getShape();
50 auto elementType = tdescTy.getElementType();
51
52 // compute sgSize by multiply elements of laneLayout
53 // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
54 // e.g. for 1D layout, sgSize = laneLayout[0]
55 int64_t sgSize = llvm::product_of(laneLayout);
56
57 // Case 1: regular loads/stores
58 auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
59 if (scatterAttr) {
60 auto chunkSize = scatterAttr.getChunkSize().getInt();
61 // Verify if the first dimension of the tensor descriptor shape is
62 // distributable.
63 assert(tdescShape[0] == laneLayout[0] &&
64 "tensor descriptor shape is not distributable");
65 return VectorType::get({chunkSize}, elementType);
66 }
67
68 // Case 2: block loads/stores
69 // Check if the tensor descriptor shape is distributable.
70 int64_t tensorSize = 1;
71 for (auto [tdescDim, laneDim, laneDataDim] :
72 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
73 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
74 "tensor descriptor shape is not distributable");
75 tensorSize *= tdescDim;
76 }
77 // tensorSize must be adjusted for array_length.
78 tensorSize *= tdescTy.getArrayLength();
79
80 return VectorType::get({tensorSize / sgSize}, elementType);
81}
82
83FailureOr<VectorType>
84mlir::xegpu::getDistributedVectorType(VectorType originalType,
85 xegpu::LayoutAttr layout) {
86 int64_t rank = originalType.getRank();
87 // Distributed vector type is only supported for 1D, 2D and 3D vectors.
88 if (rank < 1 || rank > 3)
89 return failure();
90 ArrayRef<int64_t> shape = originalType.getShape();
91 // arrayLength is 1 for 1D and 2D vectors, and equal to the first dimension
92 // of the 3D vector.
93 int arrayLength = 1;
94 if (rank == 3) {
95 arrayLength = shape[0];
96 shape = shape.drop_front();
97 }
98 auto helperTdescTy = xegpu::TensorDescType::get(
99 shape, originalType.getElementType(), arrayLength,
100 /*boundary_check=*/true,
101 /*memory_space=*/xegpu::MemorySpace::Global, layout);
102 return xegpu::getDistributedVectorType(helperTdescTy);
103}
104
105std::string xegpu::getLayoutName(const OpOperand &operand) {
106 const StringRef prefix("layout_operand_");
107 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
108 return llvm::formatv("{0}{1}", prefix, idx).str();
109}
110
112 const StringRef prefix = "layout_result_";
113 return llvm::formatv("{0}{1}", prefix, result.getResultNumber()).str();
114}
115
116xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {
117 if (!value)
118 return nullptr;
119
120 if (auto tdescTy =
121 dyn_cast_if_present<xegpu::TensorDescType>(value.getType()))
122 return tdescTy.getLayoutAttr();
123
124 if (auto result = dyn_cast<OpResult>(value)) {
125 Operation *defOp = result.getDefiningOp();
126 assert(defOp && "result must have a defining op");
127
128 // For ConvertLayoutOp, the layout is stored in the targetLayoutAttr
129 if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(defOp))
130 return convertOp.getTargetLayoutAttr();
131
132 // for LoadNdOp, the layout is stored in the tensor descriptor
133 if (auto loadNd = dyn_cast<xegpu::LoadNdOp>(defOp))
134 return getDistributeLayoutAttr(loadNd.getTensorDesc());
135
136 // for LoadMatrixOp, the layout is attached to the property of the op
137 if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(defOp))
138 return loadOp.getLayoutAttr();
139
140 // for StoreMatrixOp, the layout is attached to the property of the op
141 if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(defOp))
142 return storeOp.getLayoutAttr();
143
144 std::string layoutName = getLayoutName(result);
145 if (defOp->hasAttr(layoutName))
146 return defOp->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
147
148 // check for "permament" layout only after "temporary" layout name lookup
149 // for backward compatibility
150 if (auto loadGatherOp = dyn_cast<xegpu::LoadGatherOp>(defOp))
151 return loadGatherOp.getLayoutAttr();
152 }
153
154 if (auto arg = dyn_cast<BlockArgument>(value)) {
155 auto *parentOp = arg.getOwner()->getParentOp();
156 if (auto loop = dyn_cast<LoopLikeOpInterface>(parentOp)) {
157 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
158 if (tiedInit)
159 return getDistributeLayoutAttr(tiedInit->get());
160 }
161 }
162
163 return nullptr;
164}
165
166xegpu::DistributeLayoutAttr
168 Operation *op = opr.getOwner();
169
170 if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(op))
171 return loadOp.getLayoutAttr();
172
173 if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(op))
174 return storeOp.getLayoutAttr();
175
176 std::string layoutName = xegpu::getLayoutName(opr);
177 if (op->hasAttr(layoutName))
178 return op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
179
180 // check for "permament" layout only after "temporary" layout name lookup
181 if (auto storeScatterOp = dyn_cast<xegpu::StoreScatterOp>(op))
182 if (auto layout = storeScatterOp.getLayoutAttr())
183 return layout;
184
185 return getDistributeLayoutAttr(opr.get());
186}
187
188// Returns the permanent layout attribute for the given result if it's
189// available on the defining op. Otherwise returns the provided layout.
190xegpu::DistributeLayoutAttr
191maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
192 const OpResult &result, mlir::Operation *owner,
193 const std::string &name) {
194 xegpu::DistributeLayoutAttr candidate = layout;
195
196 if (auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
197 if (auto perm = loadOp.getLayoutAttr())
198 candidate = perm;
199 }
200
201 return candidate;
202}
203
204// Returns the permanent layout attribute for the given operand if it's
205// available on the defining op. Otherwise returns the provided layout.
206xegpu::DistributeLayoutAttr
207maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
208 const OpOperand &operand, mlir::Operation *owner,
209 const std::string &name) {
210 xegpu::DistributeLayoutAttr candidate = layout;
211 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
212
213 if (auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
214 if (idx == 0) {
215 if (auto perm = storeOp.getLayoutAttr())
216 candidate = perm;
217 }
218 }
219
220 return candidate;
221}
222
223template <typename T, typename>
224void xegpu::setDistributeLayoutAttr(const T &operandOrResult,
225 const DistributeLayoutAttr layout,
226 bool respectPermLayout) {
227 Operation *owner = operandOrResult.getOwner();
228 std::string name = xegpu::getLayoutName(operandOrResult);
229
230 if (owner->hasAttrOfType<DistributeLayoutAttr>(name))
231 return;
232
233 DistributeLayoutAttr candidate = layout;
234 if (respectPermLayout)
235 candidate = maybePickPermanentLayout(layout, operandOrResult, owner, name);
236
237 if (candidate)
238 owner->setAttr(name, candidate);
239}
240
241// Explicit instantiation for OpResult
243 const mlir::OpResult &result,
244 const mlir::xegpu::DistributeLayoutAttr layout, bool respectPermLayout);
245
246// Explicit instantiation for OpOperand
248 const mlir::OpOperand &operand,
249 const mlir::xegpu::DistributeLayoutAttr layout, bool respectPermLayout);
250
252 Operation *op, function_ref<DistributeLayoutAttr(Value)> getLayoutImpl) {
253 op->walk([&](Operation *nestOp) {
254 if (isa<xegpu::LoadMatrixOp, xegpu::StoreMatrixOp>(nestOp))
255 return;
256
257 for (OpOperand &opr : nestOp->getOpOperands()) {
258 auto layout = getLayoutImpl(opr.get());
259 setDistributeLayoutAttr(opr, layout);
260 }
261 for (OpResult result : nestOp->getOpResults()) {
262 auto layout = getLayoutImpl(result);
264 }
265 });
266}
267
268template <typename T, typename>
269void xegpu::removeLayoutAttr(const T &operandOrResult) {
270 Operation *owner = operandOrResult.getOwner();
271 std::string name = xegpu::getLayoutName(operandOrResult);
272 if (owner->hasAttrOfType<DistributeLayoutAttr>(name))
273 owner->removeAttr(name);
274}
275
276// Explicit instantiation for OpResult
277template void
279
280// Explicit instantiation for OpOperand
281template void
283
285 op->walk([&](Operation *nestOp) {
286 for (OpOperand &opr : nestOp->getOpOperands())
287 removeLayoutAttr(opr);
288 for (OpResult result : nestOp->getOpResults())
290 });
291}
292
296 auto vecTy = dyn_cast<VectorType>(value.getType());
297 if (!vecTy)
298 return {value};
299
300 ArrayRef<int64_t> srcShape = vecTy.getShape();
301 if (!computeShapeRatio(srcShape, shape))
302 return {value};
303
304 int64_t srcShapeRank = srcShape.size();
305 int64_t targetShapeRank = shape.size();
306
307 SmallVector<int64_t> adjustedTargetShape(srcShape.size());
308 int64_t rankDiff = srcShapeRank - targetShapeRank;
309 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
310 1);
311 llvm::copy(shape, adjustedTargetShape.begin() + rankDiff);
312
314 for (SmallVector<int64_t> offsets :
315 StaticTileOffsetRange(srcShape, adjustedTargetShape)) {
316 SmallVector<int64_t> staticStrides(offsets.size(), 1);
317 Value slice = vector::ExtractStridedSliceOp::create(
318 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
319
320 // Reshape to remove leading unit dims if needed
321 if (srcShapeRank > targetShapeRank) {
322 auto targetTy = VectorType::get(shape, vecTy.getElementType());
323 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
324 }
325 result.push_back(slice);
326 }
327
328 return result;
329}
330
332 ValueRange values,
334 VectorType inputTy = dyn_cast<VectorType>(values[0].getType());
335 assert(llvm::all_of(values.getTypes(),
336 [&](Type type) { return type == inputTy; }) &&
337 "values must be of the same VectorType");
338
339 Type elemTy = inputTy.getElementType();
340 ArrayRef<int64_t> tileShape = inputTy.getShape();
341
342 VectorType resultTy = VectorType::get(shape, elemTy);
343 auto zeroAttr = builder.getZeroAttr(elemTy);
344 Value result = arith::ConstantOp::create(
345 builder, loc, resultTy, DenseElementsAttr::get(resultTy, zeroAttr));
346
347 for (auto [src, offsets] :
348 llvm::zip_equal(values, StaticTileOffsetRange(shape, tileShape))) {
349 SmallVector<int64_t> staticStrides(tileShape.size(), 1);
350 result = vector::InsertStridedSliceOp::create(builder, loc, src, result,
351 offsets, staticStrides);
352 }
353 return result;
354}
355
357 Operation *op, TypeConverter converter) {
358 MLIRContext *context = op->getContext();
359
360 auto materializeCast = [](OpBuilder &builder, Type type, ValueRange inputs,
361 Location loc) -> Value {
362 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
363 .getResult(0);
364 };
365
366 { // convert VectorType to RankedTensorType for SCF Structural ops
367 TypeConverter converter;
368 converter.addConversion([](Type type) -> Type { return type; });
369 converter.addConversion([](VectorType type) -> Type {
370 return RankedTensorType::get(type.getShape(), type.getElementType());
371 });
372 converter.addSourceMaterialization(materializeCast);
373 converter.addTargetMaterialization(materializeCast);
374
375 mlir::ConversionTarget target(*context);
376 target.addLegalOp<UnrealizedConversionCastOp>();
377
380 target);
381 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
382 }
383
384 { // propagate the layout attribute to RankedTensorType by checking
385 // BuiltInUnrealizedCastOps
386 // for VectorType to RankedTensorType cast.
387 op->walk([](UnrealizedConversionCastOp castOp) {
388 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
389 return WalkResult::skip();
390
391 Value input = castOp.getInputs()[0];
392 Value result = castOp.getResults()[0];
393 auto inputTy = dyn_cast<VectorType>(input.getType());
394 auto resultTy = dyn_cast<RankedTensorType>(result.getType());
395
396 // Only look at ops casting from VectorType to RankedTensorType
397 if (!inputTy || !resultTy)
398 return WalkResult::skip();
399
400 xegpu::DistributeLayoutAttr layout =
402 if (!layout)
403 return WalkResult::skip();
404
405 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
406 result.setType(newTy);
407
408 // update the arguments if user is a LoopLike op.
409 for (OpOperand &use : result.getUses()) {
410 if (auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
411 BlockArgument arg = loop.getTiedLoopRegionIterArg(&use);
412 arg.setType(newTy);
413 }
414 // whileOp has two regions, the BlockArgument of the after region
415 // is not exposed by LoopLikeOpInterface
416 if (auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
417 unsigned idx = use.getOperandNumber();
418 BlockArgument arg = whileOp.getAfterArguments()[idx];
419 arg.setType(newTy);
420 }
421 }
422 return WalkResult::advance();
423 });
424
425 // using yieldOp as anchor to update the result type of its ParentOp
426 op->walk([](scf::YieldOp yieldOp) {
427 Operation *parentOp = yieldOp->getParentOp();
428 for (OpResult r : parentOp->getOpResults()) {
429 unsigned idx = r.getResultNumber();
430 Type resultTy = r.getType();
431 Type yieldTy = yieldOp.getResults()[idx].getType();
432 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
433 r.setType(yieldTy);
434 }
435 });
436 }
437
438 { // perform the conversion from RankedTensorType to VectorType based on the
439 // DistributeLayoutAttr
440
441 // Handle the UnrealizedConversionCastOp introduced by the first step.
442 // For vector->RankedTensorType, it will simply forward the inputs.
443 // For RankedTensorType->vector, it will update the inputs with the
444 // one from the adaptor.
445 class UnrealizedConversionCastOpPattern
446 : public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
447 using OpConversionPattern<
448 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
449
450 mlir::LogicalResult
451 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
452 OneToNOpAdaptor adaptor,
453 ConversionPatternRewriter &rewriter) const override {
454 auto inputs = op.getOperands();
455 auto outputs = op.getOutputs();
456
457 if (inputs.size() != 1 || outputs.size() != 1)
458 return failure();
459
460 auto inputTy = inputs[0].getType();
461 auto outputTy = outputs[0].getType();
462
463 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
464 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
465 return success();
466 }
467
468 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
469 SmallVector<Value> values = xegpu::flattenValues(adaptor.getInputs());
470 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
471 outputTy, values);
472 rewriter.replaceOp(op, newOp);
473 return success();
474 }
475 return failure();
476 }
477 };
478
479 converter.addSourceMaterialization(materializeCast);
480 converter.addTargetMaterialization([&](OpBuilder &builder, TypeRange type,
481 ValueRange inputs, Location loc) {
482 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
483 .getResults();
484 });
485
486 mlir::ConversionTarget target(*context);
487 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
488 [](UnrealizedConversionCastOp op) {
489 auto isTensorTy = [](Type type) {
490 return isa<RankedTensorType>(type);
491 };
492 return llvm::none_of(op->getOperandTypes(), isTensorTy) &&
493 llvm::none_of(op->getResultTypes(), isTensorTy);
494 });
496 patterns.insert<UnrealizedConversionCastOpPattern>(context);
498 target);
499 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
500 }
501}
502
503std::optional<std::string> xegpu::getChipStr(Operation *op) {
504 auto gpuModuleOp = op->getParentOfType<gpu::GPUModuleOp>();
505
506 if (!gpuModuleOp)
507 return std::nullopt;
508
509 auto targetAttrs = gpuModuleOp.getTargets();
510 if (targetAttrs) {
511 for (auto &attr : *targetAttrs) {
512 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
513 if (xevmAttr)
514 return xevmAttr.getChip().str();
515 }
516 }
517
518 return std::nullopt;
519}
520
521/// Generates element-wise addition ops of two arrays with same length.
523 Location loc,
526 assert(lhs.size() == rhs.size() && "lhs and rhs must have the same size");
528 for (auto [l, r] : llvm::zip_equal(lhs, rhs)) {
529 auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
530 auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
531 results.push_back(builder.createOrFold<index::AddOp>(loc, lval, rval));
532 }
533 return results;
534}
535
536/// Generates element-wise addition ops of two arrays with automatic alignment.
537/// When the input arrays have different sizes, the shorter array is
538/// right-aligned with the longer array, and the unmatched leading elements from
539/// the longer array are preserved unchanged. This is commonly used for offset
540/// computation where higher-dimensional offsets need to be added to
541/// lower-dimensional adjustments.
542///
543/// Example:
544/// lhs = [l1, l2, l3], rhs = [r1, r2]
545/// Result: [11, l2+r1, l3+r2]
550 // ensure a is longer than b
551 ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
552 ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
553 SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
554 a = a.slice(a.size() - b.size());
555 results.append(addElementwise(builder, loc, a, b));
556 return results;
557}
558
559template <typename T>
561 ArrayRef<T> candidateMultiples) {
562 static_assert(std::is_integral<T>::value, "T must be an integer type");
563 int largest = -1;
564 SmallVector<T> multiples = {1};
565 if (!candidateMultiples.empty())
566 multiples =
567 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
568 for (T candidate : candidates) {
569 for (T multiple : multiples) {
570 int value = static_cast<int>(candidate * multiple);
571 if (value != 0 && dim % value == 0 && value > largest)
572 largest = value;
573 }
574 }
575 return largest;
576}
577
578/// Explicit instantiations
579template int xegpu::getLargestDivisor<int>(int dim, ArrayRef<int> candidates,
580 ArrayRef<int> candidateMultiples);
581template int
583 ArrayRef<unsigned> candidateMultiples);
return success()
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
This class represents an argument of a Block.
Definition Value.h:309
TypedAttr getZeroAttr(Type type)
Definition Builders.cpp:324
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This class helps build Operations.
Definition Builders.h:207
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:526
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
AttrClass getAttrOfType(StringAttr name)
Definition Operation.h:550
bool hasAttrOfType(NameT &&name)
Definition Operation.h:575
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition Operation.h:560
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234
MutableArrayRef< OpOperand > getOpOperands()
Definition Operation.h:383
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition Operation.h:238
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition Operation.h:582
operand_type_range getOperandTypes()
Definition Operation.h:397
result_type_range getResultTypes()
Definition Operation.h:428
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797
result_range getOpResults()
Definition Operation.h:420
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
Definition Operation.h:600
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Definition Value.h:116
Type getType() const
Return the type of this value.
Definition Value.h:105
static WalkResult skip()
Definition WalkResult.h:48
static WalkResult advance()
Definition WalkResult.h:47
Operation * getOwner() const
Return the owner of this operand.
Definition UseDefLists.h:38
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setDistributeLayoutAttrs(Operation *op, function_ref< DistributeLayoutAttr(Value)> getLayoutImpl)
Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
std::string getLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
void setDistributeLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout, bool respectPermLayout=false)
Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching it to the owner's dictio...
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:304
const FrozenRewritePatternSet & patterns
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152