24#include "llvm/Support/FormatVariadic.h"
33 for (
const auto &vals : values)
34 llvm::append_range(
result, vals);
40 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
43 if (!layout || !layout.isForSubgroup())
48 auto tdescShape = tdescTy.getShape();
49 auto elementType = tdescTy.getElementType();
54 int64_t sgSize = llvm::product_of(laneLayout);
57 auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
59 auto chunkSize = scatterAttr.getChunkSize().getInt();
62 assert(tdescShape[0] == laneLayout[0] &&
63 "tensor descriptor shape is not distributable");
64 return VectorType::get({chunkSize}, elementType);
70 for (
auto [tdescDim, laneDim, laneDataDim] :
71 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
72 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
73 "tensor descriptor shape is not distributable");
74 tensorSize *= tdescDim;
77 tensorSize *= tdescTy.getArrayLength();
79 return VectorType::get({tensorSize / sgSize}, elementType);
84 xegpu::LayoutAttr layout) {
85 int64_t rank = originalType.getRank();
87 if (rank < 1 || rank > 3)
94 arrayLength =
shape[0];
97 auto helperTdescTy = xegpu::TensorDescType::get(
98 shape, originalType.getElementType(), arrayLength,
100 xegpu::MemorySpace::Global, layout);
105 const StringRef prefix(
"layout_operand_");
106 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
107 return llvm::formatv(
"{0}{1}", prefix, idx).str();
111 const StringRef prefix =
"layout_result_";
112 return llvm::formatv(
"{0}{1}", prefix,
result.getResultNumber()).str();
120 dyn_cast_if_present<xegpu::TensorDescType>(value.
getType()))
121 return tdescTy.getLayoutAttr();
123 if (
auto result = dyn_cast<OpResult>(value)) {
125 assert(defOp &&
"result must have a defining op");
127 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
128 auto layout = anchorOp.getAnchorLayout();
133 if (defOp->
hasAttr(layoutName)) {
135 defOp->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
140 if (
auto arg = dyn_cast<BlockArgument>(value)) {
141 auto *parentOp = arg.getOwner()->getParentOp();
142 if (
auto loop = dyn_cast<LoopLikeOpInterface>(parentOp)) {
143 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
151xegpu::DistributeLayoutAttr
154 unsigned idx =
const_cast<OpOperand &
>(opr).getOperandNumber();
156 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(op)) {
157 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(op)) {
159 return dpasOp.getLayoutAAttr();
160 }
else if (idx == 1) {
161 return dpasOp.getLayoutBAttr();
162 }
else if (idx == 2) {
163 return dpasOp.getLayoutCdAttr();
166 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
167 return convertOp.getInputLayoutAttr();
169 auto layout = anchorOp.getAnchorLayout();
177 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
185 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
195xegpu::DistributeLayoutAttr
198 const std::string &name) {
199 xegpu::DistributeLayoutAttr candidate = layout;
201 if (
auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
202 if (
auto perm = loadOp.getLayoutAttr())
211xegpu::DistributeLayoutAttr
214 const std::string &name) {
215 xegpu::DistributeLayoutAttr candidate = layout;
216 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
218 if (
auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
220 if (
auto perm = storeOp.getLayoutAttr())
232 const mlir::xegpu::DistributeLayoutAttr layout) {
235 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
236 if (anchorOp.getAnchorLayout() == layout)
238 anchorOp.setAnchorLayout(layout);
254 const DistributeLayoutAttr layout) {
256 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
261 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
262 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(owner)) {
264 return dpasOp.setLayoutAAttr(layout);
265 }
else if (idx == 1) {
266 return dpasOp.setLayoutBAttr(layout);
267 }
else if (idx == 2) {
268 return dpasOp.setLayoutCdAttr(layout);
271 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(owner)) {
272 return convertOp.setInputLayoutAttr(layout);
278 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
281 anchorOp.setAnchorLayout(layout);
285 anchorOp.setAnchorLayout(layout);
299template <
typename T,
typename>
300xegpu::DistributeLayoutAttr
302 Operation *op = operandOrResult.getOwner();
306 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
313template xegpu::DistributeLayoutAttr
315template xegpu::DistributeLayoutAttr
318template <
typename T,
typename>
320 const xegpu::DistributeLayoutAttr layout) {
321 Operation *owner = operandOrResult.getOwner();
323 if (owner->
hasAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
333 const mlir::xegpu::DistributeLayoutAttr layout);
337 const mlir::xegpu::DistributeLayoutAttr layout);
360 if (!isa<VectorType>(operand.get().getType()))
364 op->
emitError(
"Could not find layout attribute for operand ")
365 << operand.getOperandNumber() <<
" of operation " << op->
getName();
372 return !
result.wasInterrupted();
375template <
typename T,
typename>
377 Operation *owner = operandOrResult.getOwner();
411 auto vecTy = dyn_cast<VectorType>(value.
getType());
419 int64_t srcShapeRank = srcShape.size();
423 int64_t rankDiff = srcShapeRank - targetShapeRank;
424 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
426 llvm::copy(
shape, adjustedTargetShape.begin() + rankDiff);
432 Value slice = vector::ExtractStridedSliceOp::create(
433 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
436 if (srcShapeRank > targetShapeRank) {
437 auto targetTy = VectorType::get(
shape, vecTy.getElementType());
438 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
449 VectorType inputTy = dyn_cast<VectorType>(values[0].
getType());
450 assert(llvm::all_of(values.
getTypes(),
451 [&](
Type type) { return type == inputTy; }) &&
452 "values must be of the same VectorType");
454 Type elemTy = inputTy.getElementType();
457 VectorType resultTy = VectorType::get(
shape, elemTy);
462 for (
auto [src, offsets] :
465 result = vector::InsertStridedSliceOp::create(builder, loc, src,
result,
466 offsets, staticStrides);
477 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
483 converter.addConversion([](
Type type) ->
Type {
return type; });
484 converter.addConversion([](VectorType type) ->
Type {
485 return RankedTensorType::get(type.getShape(), type.getElementType());
487 converter.addSourceMaterialization(materializeCast);
488 converter.addTargetMaterialization(materializeCast);
490 mlir::ConversionTarget
target(*context);
491 target.addLegalOp<UnrealizedConversionCastOp>();
502 op->
walk([](UnrealizedConversionCastOp castOp) {
503 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
506 Value input = castOp.getInputs()[0];
508 auto inputTy = dyn_cast<VectorType>(input.
getType());
509 auto resultTy = dyn_cast<RankedTensorType>(
result.getType());
512 if (!inputTy || !resultTy)
515 xegpu::DistributeLayoutAttr layout =
520 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
525 if (
auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
531 if (
auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
532 unsigned idx = use.getOperandNumber();
541 op->
walk([](scf::YieldOp yieldOp) {
544 unsigned idx = r.getResultNumber();
545 Type resultTy = r.getType();
546 Type yieldTy = yieldOp.getResults()[idx].getType();
547 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
560 class UnrealizedConversionCastOpPattern
561 :
public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
562 using OpConversionPattern<
563 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
566 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
568 ConversionPatternRewriter &rewriter)
const override {
569 auto inputs = op.getOperands();
570 auto outputs = op.getOutputs();
572 if (inputs.size() != 1 || outputs.size() != 1)
575 auto inputTy = inputs[0].getType();
576 auto outputTy = outputs[0].getType();
578 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
579 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
583 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
585 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
587 rewriter.replaceOp(op, newOp);
594 converter.addSourceMaterialization(materializeCast);
597 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
601 mlir::ConversionTarget
target(*context);
602 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
603 [](UnrealizedConversionCastOp op) {
604 auto isTensorTy = [](
Type type) {
605 return isa<RankedTensorType>(type);
611 patterns.insert<UnrealizedConversionCastOpPattern>(context);
624 auto targetAttrs = gpuModuleOp.getTargets();
626 for (
auto &attr : *targetAttrs) {
627 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
629 return xevmAttr.getChip().str();
641 assert(
lhs.size() ==
rhs.size() &&
"lhs and rhs must have the same size");
643 for (
auto [l, r] : llvm::zip_equal(
lhs,
rhs)) {
646 results.push_back(builder.
createOrFold<arith::AddIOp>(loc, lval, rval));
669 a = a.slice(a.size() -
b.size());
677 static_assert(std::is_integral<T>::value,
"T must be an integer type");
680 if (!candidateMultiples.empty())
682 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
683 for (T candidate : candidates) {
684 for (T multiple : multiples) {
685 int value =
static_cast<int>(candidate * multiple);
686 if (value != 0 && dim % value == 0 && value > largest)
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
This class represents an argument of a Block.
TypedAttr getZeroAttr(Type type)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
AttrClass getAttrOfType(StringAttr name)
bool hasAttrOfType(NameT &&name)
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
MutableArrayRef< OpOperand > getOpOperands()
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
OperationName getName()
The name of an operation is the key identifier for it.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
result_range getOpResults()
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
MLIRContext * getContext()
Return the context this operation is associated with.
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Type getType() const
Return the type of this value.
static WalkResult advance()
static WalkResult interrupt()
Operation * getOwner() const
Return the owner of this operand.
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
bool recoverTemporaryLayouts(Operation *rootOp)
Attach layout attributes to all vector-type operands of operations within the given operation's regio...
void recoverTemporaryLayoutsDeprecated(Operation *op)
[to-be-deprecated] Set the DistributeLayoutAttr for each OpOperand and OpResult of of the given opera...
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
const FrozenRewritePatternSet & patterns
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.