25#include "llvm/Support/Casting.h"
26#include "llvm/Support/FormatVariadic.h"
35 for (
const auto &vals : values)
36 llvm::append_range(
result, vals);
42 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
45 if (!layout || !layout.isForSubgroup())
50 auto tdescShape = tdescTy.getShape();
51 auto elementType = tdescTy.getElementType();
56 int64_t sgSize = llvm::product_of(laneLayout);
59 auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
61 auto chunkSize = scatterAttr.getChunkSize().getInt();
64 assert(tdescShape[0] == laneLayout[0] &&
65 "tensor descriptor shape is not distributable");
66 return VectorType::get({chunkSize}, elementType);
72 for (
auto [tdescDim, laneDim, laneDataDim] :
73 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
74 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
75 "tensor descriptor shape is not distributable");
76 tensorSize *= tdescDim;
79 tensorSize *= tdescTy.getArrayLength();
81 return VectorType::get({tensorSize / sgSize}, elementType);
86 xegpu::LayoutAttr layout) {
87 int64_t rank = originalType.getRank();
89 if (rank < 1 || rank > 3)
96 arrayLength =
shape[0];
99 auto helperTdescTy = xegpu::TensorDescType::get(
100 shape, originalType.getElementType(), arrayLength,
102 xegpu::MemorySpace::Global, layout);
108 VectorType originalType) {
111 assert((isa<xegpu::LayoutAttr>(layout) || isa<xegpu::SliceAttr>(layout)) &&
112 "Expecting a valid layout.");
114 layout.getEffectiveLaneLayoutAsInt();
115 assert(
static_cast<size_t>(originalType.getRank()) >=
116 effectiveLaneLayout.size() &&
117 "Rank of the original vector type should be greater or equal to the "
118 "size of the lane layout to distribute the vector type.");
122 unsigned distributionStart =
123 originalType.getRank() - effectiveLaneLayout.size();
124 for (
auto [i, dim] : llvm::enumerate(originalType.getShape())) {
125 if (i < distributionStart)
128 if (dim % effectiveLaneLayout[i - distributionStart] != 0)
130 distributedShape[i] = dim / effectiveLaneLayout[i - distributionStart];
132 return VectorType::get(distributedShape, originalType.getElementType());
136 const StringRef prefix(
"layout_operand_");
137 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
138 return llvm::formatv(
"{0}{1}", prefix, idx).str();
142 const StringRef prefix =
"layout_result_";
143 return llvm::formatv(
"{0}{1}", prefix,
result.getResultNumber()).str();
151 dyn_cast_if_present<xegpu::TensorDescType>(value.
getType()))
152 return tdescTy.getLayoutAttr();
154 if (
auto result = dyn_cast<OpResult>(value)) {
156 assert(defOp &&
"result must have a defining op");
158 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
159 auto layout = anchorOp.getAnchorLayout();
164 if (defOp->
hasAttr(layoutName)) {
166 defOp->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
171 if (
auto arg = dyn_cast<BlockArgument>(value)) {
172 auto *parentOp = arg.getOwner()->getParentOp();
173 if (
auto loop = dyn_cast_if_present<LoopLikeOpInterface>(parentOp)) {
174 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
182xegpu::DistributeLayoutAttr
185 unsigned idx =
const_cast<OpOperand &
>(opr).getOperandNumber();
187 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(op)) {
188 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(op)) {
190 return dpasOp.getLayoutAAttr();
191 }
else if (idx == 1) {
192 return dpasOp.getLayoutBAttr();
193 }
else if (idx == 2) {
194 return dpasOp.getLayoutCdAttr();
197 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
198 return convertOp.getInputLayoutAttr();
200 auto layout = anchorOp.getAnchorLayout();
208 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
216 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
225xegpu::DistributeLayoutAttr
228 const std::string &name) {
229 xegpu::DistributeLayoutAttr candidate = layout;
231 if (
auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
232 if (
auto perm = loadOp.getLayoutAttr())
241xegpu::DistributeLayoutAttr
244 const std::string &name) {
245 xegpu::DistributeLayoutAttr candidate = layout;
246 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
248 if (
auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
250 if (
auto perm = storeOp.getLayoutAttr())
262 const mlir::xegpu::DistributeLayoutAttr layout) {
265 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
266 if (anchorOp.getAnchorLayout() == layout)
268 anchorOp.setAnchorLayout(layout);
284 const DistributeLayoutAttr layout) {
286 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
291 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
292 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(owner)) {
294 return dpasOp.setLayoutAAttr(layout);
295 }
else if (idx == 1) {
296 return dpasOp.setLayoutBAttr(layout);
297 }
else if (idx == 2) {
298 return dpasOp.setLayoutCdAttr(layout);
301 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(owner)) {
302 return convertOp.setInputLayoutAttr(layout);
308 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
311 anchorOp.setAnchorLayout(layout);
315 anchorOp.setAnchorLayout(layout);
329template <
typename T,
typename>
330xegpu::DistributeLayoutAttr
332 Operation *op = operandOrResult.getOwner();
336 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
343template xegpu::DistributeLayoutAttr
345template xegpu::DistributeLayoutAttr
348template <
typename T,
typename>
350 const xegpu::DistributeLayoutAttr layout) {
351 Operation *owner = operandOrResult.getOwner();
353 if (owner->
hasAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
363 const mlir::xegpu::DistributeLayoutAttr layout);
367 const mlir::xegpu::DistributeLayoutAttr layout);
390 if (!isa<VectorType>(operand.get().getType()))
394 op->
emitWarning(
"Could not find layout attribute for operand ")
395 << operand.getOperandNumber() <<
" of operation " << op->
getName();
402 return !
result.wasInterrupted();
405template <
typename T,
typename>
407 Operation *owner = operandOrResult.getOwner();
416 out.reserve(attrs.size());
418 for (
auto attr : attrs) {
419 if (
auto dist = dyn_cast<xegpu::DistributeLayoutAttr>(attr.getValue())) {
420 auto newLayout = dist.dropSgLayoutAndData();
422 out.emplace_back(attr.getName(), newLayout);
434 out.reserve(attrs.size());
436 for (
auto attr : attrs) {
437 if (
auto dist = dyn_cast<xegpu::DistributeLayoutAttr>(attr.getValue())) {
438 auto newLayout = dist.dropInstData();
440 out.emplace_back(attr.getName(), newLayout);
477 auto vecTy = dyn_cast<VectorType>(value.
getType());
485 int64_t srcShapeRank = srcShape.size();
489 int64_t rankDiff = srcShapeRank - targetShapeRank;
490 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
492 llvm::copy(
shape, adjustedTargetShape.begin() + rankDiff);
498 Value slice = vector::ExtractStridedSliceOp::create(
499 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
502 if (srcShapeRank > targetShapeRank) {
503 auto targetTy = VectorType::get(
shape, vecTy.getElementType());
504 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
515 VectorType inputTy = dyn_cast<VectorType>(values[0].
getType());
516 assert(llvm::all_of(values.
getTypes(),
517 [&](
Type type) { return type == inputTy; }) &&
518 "values must be of the same VectorType");
520 Type elemTy = inputTy.getElementType();
523 VectorType resultTy = VectorType::get(
shape, elemTy);
528 for (
auto [src, offsets] :
531 result = vector::InsertStridedSliceOp::create(builder, loc, src,
result,
532 offsets, staticStrides);
543 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
549 converter.addConversion([](
Type type) ->
Type {
return type; });
550 converter.addConversion([](VectorType type) ->
Type {
551 return RankedTensorType::get(type.getShape(), type.getElementType());
553 converter.addSourceMaterialization(materializeCast);
554 converter.addTargetMaterialization(materializeCast);
556 mlir::ConversionTarget
target(*context);
557 target.addLegalOp<UnrealizedConversionCastOp>();
568 op->
walk([](UnrealizedConversionCastOp castOp) {
569 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
572 Value input = castOp.getInputs()[0];
574 auto inputTy = dyn_cast<VectorType>(input.
getType());
575 auto resultTy = dyn_cast<RankedTensorType>(
result.getType());
578 if (!inputTy || !resultTy)
581 xegpu::DistributeLayoutAttr layout =
586 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
591 if (
auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
597 if (
auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
598 unsigned idx = use.getOperandNumber();
607 op->
walk([](scf::YieldOp yieldOp) {
610 unsigned idx = r.getResultNumber();
611 Type resultTy = r.getType();
612 Type yieldTy = yieldOp.getResults()[idx].getType();
613 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
626 class UnrealizedConversionCastOpPattern
627 :
public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
628 using OpConversionPattern<
629 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
632 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
634 ConversionPatternRewriter &rewriter)
const override {
635 auto inputs = op.getOperands();
636 auto outputs = op.getOutputs();
638 if (inputs.size() != 1 || outputs.size() != 1)
641 auto inputTy = inputs[0].getType();
642 auto outputTy = outputs[0].getType();
644 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
645 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
649 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
651 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
653 rewriter.replaceOp(op, newOp);
660 converter.addSourceMaterialization(materializeCast);
663 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
667 mlir::ConversionTarget
target(*context);
668 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
669 [](UnrealizedConversionCastOp op) {
670 auto isTensorTy = [](
Type type) {
671 return isa<RankedTensorType>(type);
677 patterns.insert<UnrealizedConversionCastOpPattern>(context);
690 auto targetAttrs = gpuModuleOp.getTargets();
692 for (
auto &attr : *targetAttrs) {
693 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
695 return xevmAttr.getChip().str();
707 assert(
lhs.size() ==
rhs.size() &&
"lhs and rhs must have the same size");
709 for (
auto [l, r] : llvm::zip_equal(
lhs,
rhs)) {
712 results.push_back(builder.
createOrFold<arith::AddIOp>(loc, lval, rval));
735 a = a.slice(a.size() -
b.size());
743 static_assert(std::is_integral<T>::value,
"T must be an integer type");
746 if (!candidateMultiples.empty())
748 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
749 for (T candidate : candidates) {
750 for (T multiple : multiples) {
751 int value =
static_cast<int>(candidate * multiple);
752 if (value != 0 && dim % value == 0 && value > largest)
769 auto laneData = layout.getEffectiveLaneDataAsInt();
770 if (laneData.size() != 2)
772 return laneData[0] != 1;
784 auto laneLayout = layout.getEffectiveLaneLayoutAsInt();
785 if (laneLayout.size() != 2)
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
This class represents an argument of a Block.
TypedAttr getZeroAttr(Type type)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
AttrClass getAttrOfType(StringAttr name)
bool hasAttrOfType(NameT &&name)
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
InFlightDiagnostic emitWarning(const Twine &message={})
Emit a warning about this operation, reporting up to any diagnostic handlers that may be listening.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
MutableArrayRef< OpOperand > getOpOperands()
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
OperationName getName()
The name of an operation is the key identifier for it.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
result_range getOpResults()
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
MLIRContext * getContext()
Return the context this operation is associated with.
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Type getType() const
Return the type of this value.
static WalkResult advance()
Operation * getOwner() const
Return the owner of this operand.
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
SmallVector< NamedAttribute > dropInstDataOnAttrs(ArrayRef< NamedAttribute > attrs)
Updates the NamedAttribute sequence by dropping inst-data information from any DistributeLayoutAttr f...
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
bool recoverTemporaryLayouts(Operation *rootOp)
Attach layout attributes to all vector-type operands of operations within the given operation's regio...
void recoverTemporaryLayoutsDeprecated(Operation *op)
[to-be-deprecated] Set the DistributeLayoutAttr for each OpOperand and OpResult of of the given opera...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
bool requirePacked(const LayoutAttr layout)
Helper function to check if the layout is packed.
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
SmallVector< NamedAttribute > dropSgLayoutAndDataOnAttrs(ArrayRef< NamedAttribute > attrs)
Updates the NamedAttribute sequence by dropping sg-layout and sg-data information from any Distribute...
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
const FrozenRewritePatternSet & patterns
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
virtual int getSubgroupSize() const =0
StringRef getName() const