25#include "llvm/Support/FormatVariadic.h"
34 for (
const auto &vals : values)
35 llvm::append_range(
result, vals);
41 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
44 if (!layout || !layout.isForSubgroup())
49 auto tdescShape = tdescTy.getShape();
50 auto elementType = tdescTy.getElementType();
55 int64_t sgSize = llvm::product_of(laneLayout);
58 auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
60 auto chunkSize = scatterAttr.getChunkSize().getInt();
63 assert(tdescShape[0] == laneLayout[0] &&
64 "tensor descriptor shape is not distributable");
65 return VectorType::get({chunkSize}, elementType);
71 for (
auto [tdescDim, laneDim, laneDataDim] :
72 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
73 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
74 "tensor descriptor shape is not distributable");
75 tensorSize *= tdescDim;
78 tensorSize *= tdescTy.getArrayLength();
80 return VectorType::get({tensorSize / sgSize}, elementType);
85 xegpu::LayoutAttr layout) {
86 int64_t rank = originalType.getRank();
88 if (rank < 1 || rank > 3)
95 arrayLength =
shape[0];
98 auto helperTdescTy = xegpu::TensorDescType::get(
99 shape, originalType.getElementType(), arrayLength,
101 xegpu::MemorySpace::Global, layout);
106 const StringRef prefix(
"layout_operand_");
107 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
108 return llvm::formatv(
"{0}{1}", prefix, idx).str();
112 const StringRef prefix =
"layout_result_";
113 return llvm::formatv(
"{0}{1}", prefix,
result.getResultNumber()).str();
121 dyn_cast_if_present<xegpu::TensorDescType>(value.
getType()))
122 return tdescTy.getLayoutAttr();
124 if (
auto result = dyn_cast<OpResult>(value)) {
126 assert(defOp &&
"result must have a defining op");
129 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(defOp))
130 return convertOp.getTargetLayoutAttr();
133 if (
auto loadNd = dyn_cast<xegpu::LoadNdOp>(defOp))
137 if (
auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(defOp))
138 return loadOp.getLayoutAttr();
141 if (
auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(defOp))
142 return storeOp.getLayoutAttr();
145 if (defOp->
hasAttr(layoutName))
146 return defOp->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
150 if (
auto loadGatherOp = dyn_cast<xegpu::LoadGatherOp>(defOp))
151 return loadGatherOp.getLayoutAttr();
154 if (
auto arg = dyn_cast<BlockArgument>(value)) {
155 auto *parentOp = arg.getOwner()->getParentOp();
156 if (
auto loop = dyn_cast<LoopLikeOpInterface>(parentOp)) {
157 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
166xegpu::DistributeLayoutAttr
170 if (
auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(op))
171 return loadOp.getLayoutAttr();
173 if (
auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(op))
174 return storeOp.getLayoutAttr();
178 return op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
181 if (
auto storeScatterOp = dyn_cast<xegpu::StoreScatterOp>(op))
182 if (
auto layout = storeScatterOp.getLayoutAttr())
190xegpu::DistributeLayoutAttr
193 const std::string &name) {
194 xegpu::DistributeLayoutAttr candidate = layout;
196 if (
auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
197 if (
auto perm = loadOp.getLayoutAttr())
206xegpu::DistributeLayoutAttr
209 const std::string &name) {
210 xegpu::DistributeLayoutAttr candidate = layout;
211 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
213 if (
auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
215 if (
auto perm = storeOp.getLayoutAttr())
223template <
typename T,
typename>
225 const DistributeLayoutAttr layout,
226 bool respectPermLayout) {
227 Operation *owner = operandOrResult.getOwner();
233 DistributeLayoutAttr candidate = layout;
234 if (respectPermLayout)
238 owner->
setAttr(name, candidate);
244 const mlir::xegpu::DistributeLayoutAttr layout,
bool respectPermLayout);
249 const mlir::xegpu::DistributeLayoutAttr layout,
bool respectPermLayout);
254 if (isa<xegpu::LoadMatrixOp, xegpu::StoreMatrixOp>(nestOp))
258 auto layout = getLayoutImpl(opr.get());
262 auto layout = getLayoutImpl(
result);
268template <
typename T,
typename>
270 Operation *owner = operandOrResult.getOwner();
296 auto vecTy = dyn_cast<VectorType>(value.
getType());
304 int64_t srcShapeRank = srcShape.size();
308 int64_t rankDiff = srcShapeRank - targetShapeRank;
309 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
311 llvm::copy(
shape, adjustedTargetShape.begin() + rankDiff);
317 Value slice = vector::ExtractStridedSliceOp::create(
318 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
321 if (srcShapeRank > targetShapeRank) {
322 auto targetTy = VectorType::get(
shape, vecTy.getElementType());
323 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
334 VectorType inputTy = dyn_cast<VectorType>(values[0].
getType());
335 assert(llvm::all_of(values.
getTypes(),
336 [&](
Type type) { return type == inputTy; }) &&
337 "values must be of the same VectorType");
339 Type elemTy = inputTy.getElementType();
342 VectorType resultTy = VectorType::get(
shape, elemTy);
347 for (
auto [src, offsets] :
350 result = vector::InsertStridedSliceOp::create(builder, loc, src,
result,
351 offsets, staticStrides);
362 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
368 converter.addConversion([](
Type type) ->
Type {
return type; });
369 converter.addConversion([](VectorType type) ->
Type {
370 return RankedTensorType::get(type.getShape(), type.getElementType());
372 converter.addSourceMaterialization(materializeCast);
373 converter.addTargetMaterialization(materializeCast);
375 mlir::ConversionTarget
target(*context);
376 target.addLegalOp<UnrealizedConversionCastOp>();
387 op->
walk([](UnrealizedConversionCastOp castOp) {
388 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
391 Value input = castOp.getInputs()[0];
393 auto inputTy = dyn_cast<VectorType>(input.
getType());
394 auto resultTy = dyn_cast<RankedTensorType>(
result.getType());
397 if (!inputTy || !resultTy)
400 xegpu::DistributeLayoutAttr layout =
405 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
410 if (
auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
416 if (
auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
417 unsigned idx = use.getOperandNumber();
426 op->
walk([](scf::YieldOp yieldOp) {
429 unsigned idx = r.getResultNumber();
430 Type resultTy = r.getType();
431 Type yieldTy = yieldOp.getResults()[idx].getType();
432 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
445 class UnrealizedConversionCastOpPattern
446 :
public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
447 using OpConversionPattern<
448 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
451 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
453 ConversionPatternRewriter &rewriter)
const override {
454 auto inputs = op.getOperands();
455 auto outputs = op.getOutputs();
457 if (inputs.size() != 1 || outputs.size() != 1)
460 auto inputTy = inputs[0].getType();
461 auto outputTy = outputs[0].getType();
463 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
464 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
468 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
470 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
472 rewriter.replaceOp(op, newOp);
479 converter.addSourceMaterialization(materializeCast);
482 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
486 mlir::ConversionTarget
target(*context);
487 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
488 [](UnrealizedConversionCastOp op) {
489 auto isTensorTy = [](
Type type) {
490 return isa<RankedTensorType>(type);
496 patterns.insert<UnrealizedConversionCastOpPattern>(context);
509 auto targetAttrs = gpuModuleOp.getTargets();
511 for (
auto &attr : *targetAttrs) {
512 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
514 return xevmAttr.getChip().str();
526 assert(
lhs.size() ==
rhs.size() &&
"lhs and rhs must have the same size");
528 for (
auto [l, r] : llvm::zip_equal(
lhs,
rhs)) {
531 results.push_back(builder.
createOrFold<index::AddOp>(loc, lval, rval));
554 a = a.slice(a.size() -
b.size());
562 static_assert(std::is_integral<T>::value,
"T must be an integer type");
565 if (!candidateMultiples.empty())
567 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
568 for (T candidate : candidates) {
569 for (T multiple : multiples) {
570 int value =
static_cast<int>(candidate * multiple);
571 if (value != 0 && dim % value == 0 && value > largest)
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
This class represents an argument of a Block.
TypedAttr getZeroAttr(Type type)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
AttrClass getAttrOfType(StringAttr name)
bool hasAttrOfType(NameT &&name)
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
MutableArrayRef< OpOperand > getOpOperands()
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
result_range getOpResults()
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
MLIRContext * getContext()
Return the context this operation is associated with.
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Type getType() const
Return the type of this value.
static WalkResult advance()
Operation * getOwner() const
Return the owner of this operand.
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setDistributeLayoutAttrs(Operation *op, function_ref< DistributeLayoutAttr(Value)> getLayoutImpl)
Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
std::string getLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
void setDistributeLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout, bool respectPermLayout=false)
Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching it to the owner's dictio...
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
const FrozenRewritePatternSet & patterns
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
llvm::function_ref< Fn > function_ref