25#include "llvm/Support/Casting.h"
26#include "llvm/Support/FormatVariadic.h"
35 for (
const auto &vals : values)
36 llvm::append_range(
result, vals);
42 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
45 if (!layout || !layout.isForSubgroup())
50 auto tdescShape = tdescTy.getShape();
51 auto elementType = tdescTy.getElementType();
56 int64_t sgSize = llvm::product_of(laneLayout);
60 for (
auto [tdescDim, laneDim, laneDataDim] :
61 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
62 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
63 "tensor descriptor shape is not distributable");
64 tensorSize *= tdescDim;
67 tensorSize *= tdescTy.getArrayLength();
69 return VectorType::get({tensorSize / sgSize}, elementType);
74 xegpu::LayoutAttr layout) {
75 int64_t rank = originalType.getRank();
77 if (rank < 1 || rank > 3)
84 arrayLength =
shape[0];
87 auto helperTdescTy = xegpu::TensorDescType::get(
88 shape, originalType.getElementType(), arrayLength,
90 xegpu::MemorySpace::Global, layout);
96 VectorType originalType) {
99 assert((isa<xegpu::LayoutAttr>(layout) || isa<xegpu::SliceAttr>(layout)) &&
100 "Expecting a valid layout.");
102 int64_t vectorRank = originalType.getRank();
103 int64_t layoutRank = layout.getRank();
104 assert(vectorRank >= layoutRank &&
"Vector rank must be >= layout rank.");
108 int64_t offset = vectorRank - layoutRank;
112 auto distributedShapeOrFailure =
113 layout.computeDistributedShape(trailingShape);
114 if (
failed(distributedShapeOrFailure))
118 fullShape.begin() + offset);
119 resultShape.append(distributedShapeOrFailure->begin(),
120 distributedShapeOrFailure->end());
121 return VectorType::get(resultShape, originalType.getElementType());
125 const StringRef prefix(
"layout_operand_");
126 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
127 return llvm::formatv(
"{0}{1}", prefix, idx).str();
131 const StringRef prefix =
"layout_result_";
132 return llvm::formatv(
"{0}{1}", prefix,
result.getResultNumber()).str();
139 if (
auto result = dyn_cast<OpResult>(value)) {
141 assert(defOp &&
"result must have a defining op");
143 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
144 auto layout = anchorOp.getAnchorLayout();
149 if (defOp->
hasAttr(layoutName)) {
151 defOp->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
156 if (
auto arg = dyn_cast<BlockArgument>(value)) {
157 auto *parentOp = arg.getOwner()->getParentOp();
158 if (
auto loop = dyn_cast_if_present<LoopLikeOpInterface>(parentOp)) {
159 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
166 dyn_cast_if_present<xegpu::TensorDescType>(value.
getType()))
167 return tdescTy.getLayoutAttr();
171xegpu::DistributeLayoutAttr
174 unsigned idx =
const_cast<OpOperand &
>(opr).getOperandNumber();
176 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(op)) {
177 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(op)) {
179 return dpasOp.getLayoutAAttr();
180 }
else if (idx == 1) {
181 return dpasOp.getLayoutBAttr();
182 }
else if (idx == 2) {
183 return dpasOp.getLayoutCdAttr();
186 if (
auto dpasMxOp = dyn_cast<xegpu::DpasMxOp>(op)) {
189 unsigned currentIdx = 0;
191 if (idx == currentIdx++)
192 return dpasMxOp.getLayoutAAttr();
194 if (idx == currentIdx++)
195 return dpasMxOp.getLayoutBAttr();
197 if (dpasMxOp.getAcc())
198 if (idx == currentIdx++)
199 return dpasMxOp.getLayoutCdAttr();
201 if (dpasMxOp.getScaleA())
202 if (idx == currentIdx++)
203 return dpasMxOp.getLayoutAScaleAttr();
205 if (dpasMxOp.getScaleB())
206 if (idx == currentIdx++)
207 return dpasMxOp.getLayoutBScaleAttr();
211 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
212 return convertOp.getInputLayoutAttr();
214 auto layout = anchorOp.getAnchorLayout();
221 if (isa<xegpu::StoreNdOp, xegpu::StoreMatrixOp>(op) && (idx < 2))
224 if (isa<xegpu::StoreScatterOp>(op)) {
225 xegpu::StoreScatterOp store(op);
226 int chunkSize = store.getChunkSize().value_or(1);
227 if (layout && idx >= 2 && chunkSize > 1)
228 return layout.dropDims(llvm::to_vector(
229 llvm::seq<int64_t>(layout.getRank() - 1, layout.getRank())));
232 if (isa<xegpu::LoadGatherOp>(op)) {
233 xegpu::LoadGatherOp
load(op);
234 int chunkSize =
load.getChunkSize().value_or(1);
235 if (layout && idx >= 1 && chunkSize > 1)
236 return layout.dropDims(llvm::to_vector(
237 llvm::seq<int64_t>(layout.getRank() - 1, layout.getRank())));
244 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
253xegpu::DistributeLayoutAttr
256 const std::string &name) {
257 xegpu::DistributeLayoutAttr candidate = layout;
259 if (
auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
260 if (
auto perm = loadOp.getLayoutAttr())
269xegpu::DistributeLayoutAttr
272 const std::string &name) {
273 xegpu::DistributeLayoutAttr candidate = layout;
274 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
276 if (
auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
278 if (
auto perm = storeOp.getLayoutAttr())
290 const mlir::xegpu::DistributeLayoutAttr layout) {
293 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
294 if (anchorOp.getAnchorLayout() == layout)
296 anchorOp.setAnchorLayout(layout);
312 const DistributeLayoutAttr layout) {
314 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
319 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
320 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(owner)) {
322 return dpasOp.setLayoutAAttr(layout);
323 }
else if (idx == 1) {
324 return dpasOp.setLayoutBAttr(layout);
325 }
else if (idx == 2) {
326 return dpasOp.setLayoutCdAttr(layout);
329 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(owner)) {
330 return convertOp.setInputLayoutAttr(layout);
336 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
339 anchorOp.setAnchorLayout(layout);
343 anchorOp.setAnchorLayout(layout);
357template <
typename T,
typename>
358xegpu::DistributeLayoutAttr
360 Operation *op = operandOrResult.getOwner();
364 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
371template xegpu::DistributeLayoutAttr
373template xegpu::DistributeLayoutAttr
376template <
typename T,
typename>
378 const xegpu::DistributeLayoutAttr layout) {
379 Operation *owner = operandOrResult.getOwner();
381 if (owner->
hasAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
391 const mlir::xegpu::DistributeLayoutAttr layout);
395 const mlir::xegpu::DistributeLayoutAttr layout);
400 auto vecTy = dyn_cast<VectorType>(value.
getType());
408 int64_t srcShapeRank = srcShape.size();
412 int64_t rankDiff = srcShapeRank - targetShapeRank;
413 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
415 llvm::copy(
shape, adjustedTargetShape.begin() + rankDiff);
421 Value slice = vector::ExtractStridedSliceOp::create(
422 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
425 if (srcShapeRank > targetShapeRank) {
426 auto targetTy = VectorType::get(
shape, vecTy.getElementType());
427 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
438 VectorType inputTy = dyn_cast<VectorType>(values[0].
getType());
439 assert(llvm::all_of(values.
getTypes(),
440 [&](
Type type) { return type == inputTy; }) &&
441 "values must be of the same VectorType");
443 Type elemTy = inputTy.getElementType();
446 VectorType resultTy = VectorType::get(
shape, elemTy);
451 for (
auto [src, offsets] :
454 result = vector::InsertStridedSliceOp::create(builder, loc, src,
result,
455 offsets, staticStrides);
466 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
472 converter.addConversion([](
Type type) ->
Type {
return type; });
473 converter.addConversion([](VectorType type) ->
Type {
474 return RankedTensorType::get(type.getShape(), type.getElementType());
476 converter.addSourceMaterialization(materializeCast);
477 converter.addTargetMaterialization(materializeCast);
479 mlir::ConversionTarget
target(*context);
480 target.addLegalOp<UnrealizedConversionCastOp>();
485 (
void)mlir::applyPartialConversion(op,
target, std::move(patterns));
491 op->
walk([](UnrealizedConversionCastOp castOp) {
492 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
495 Value input = castOp.getInputs()[0];
497 auto inputTy = dyn_cast<VectorType>(input.
getType());
498 auto resultTy = dyn_cast<RankedTensorType>(
result.getType());
501 if (!inputTy || !resultTy)
504 xegpu::DistributeLayoutAttr layout =
509 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
514 if (
auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
520 if (
auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
521 unsigned idx = use.getOperandNumber();
530 op->
walk([](scf::YieldOp yieldOp) {
533 unsigned idx = r.getResultNumber();
534 Type resultTy = r.getType();
535 Type yieldTy = yieldOp.getResults()[idx].getType();
536 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
549 class UnrealizedConversionCastOpPattern
550 :
public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
551 using OpConversionPattern<
552 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
555 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
557 ConversionPatternRewriter &rewriter)
const override {
558 auto inputs = op.getOperands();
559 auto outputs = op.getOutputs();
561 if (inputs.size() != 1 || outputs.size() != 1)
564 auto inputTy = inputs[0].getType();
565 auto outputTy = outputs[0].getType();
567 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
568 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
572 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
574 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
576 rewriter.replaceOp(op, newOp);
583 converter.addSourceMaterialization(materializeCast);
586 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
590 mlir::ConversionTarget
target(*context);
591 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
592 [](UnrealizedConversionCastOp op) {
593 auto isTensorTy = [](
Type type) {
594 return isa<RankedTensorType>(type);
600 patterns.insert<UnrealizedConversionCastOpPattern>(context);
603 (
void)mlir::applyPartialConversion(op,
target, std::move(patterns));
613 auto targetAttrs = gpuModuleOp.getTargets();
615 for (
auto &attr : *targetAttrs) {
616 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
618 return xevmAttr.getChip().str();
630 assert(
lhs.size() ==
rhs.size() &&
"lhs and rhs must have the same size");
632 for (
auto [l, r] : llvm::zip_equal(
lhs,
rhs)) {
635 results.push_back(builder.
createOrFold<arith::AddIOp>(loc, lval, rval));
658 a = a.slice(a.size() -
b.size());
666 static_assert(std::is_integral<T>::value,
"T must be an integer type");
669 if (!candidateMultiples.empty())
671 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
672 for (T candidate : candidates) {
673 for (T multiple : multiples) {
674 int value =
static_cast<int>(candidate * multiple);
675 if (value != 0 && dim % value == 0 && value > largest)
683 vector::CombiningKind kind, uint32_t size) {
685 Value laneVal = vector::ReductionOp::create(builder, loc, kind, input);
687 for (uint64_t i = 1; i < size; i <<= 1) {
689 gpu::ShuffleOp::create(builder, loc, laneVal, i, size,
690 gpu::ShuffleMode::XOR)
692 laneVal = makeArithReduction(builder, loc, kind, laneVal, shuffled);
699 vector::CombiningKind kind,
702 VectorType sourceType = src.
getType();
703 int64_t sourceRank = sourceType.getRank();
706 assert(sourceRank >= 2 &&
"expected at least a 2D source vector");
707 for (
int64_t i = 0; i < sourceRank - 2; ++i)
708 assert(sourceType.getShape()[i] == 1 &&
709 "expected leading dimensions to be unit");
710 int64_t rowIdx = sourceRank - 2;
711 int64_t columnIdx = sourceRank - 1;
712 int64_t sourceH = sourceType.getShape()[rowIdx];
713 int64_t sourceW = sourceType.getShape()[columnIdx];
714 int nSlices = (reductionDim == rowIdx) ? sourceW : sourceH;
716 TypedAttr zeroAttr = rewriter.
getZeroAttr(sourceType.getElementType());
717 Value reductionResult = arith::ConstantOp::create(
718 rewriter, loc,
acc.getType(),
729 for (
int i = 0; i < nSlices; ++i) {
735 if (reductionDim == columnIdx) {
736 sliceOffsets[rowIdx] = i;
737 sliceSizes[columnIdx] = sourceW;
739 sliceOffsets[columnIdx] = i;
740 sliceSizes[rowIdx] = sourceH;
743 vector::ExtractStridedSliceOp extractOp =
744 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
745 sliceSizes, strides);
749 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
751 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
753 VectorType::get({nSliceElements}, sourceType.getElementType()),
754 extractOp.getResult());
764 accIdx[accRank - 1] = i;
765 Value accExtract = vector::ExtractOp::create(rewriter, loc,
acc, accIdx);
766 Value reduction = vector::ReductionOp::create(
767 rewriter, loc, kind, slice.getResult(), accExtract);
768 reductionResult = vector::InsertOp::create(rewriter, loc, reduction,
769 reductionResult, accIdx);
773 return reductionResult;
778 vector::CombiningKind kind,
int64_t reductionDim,
int64_t reductionSize,
780 VectorType sourceType = src.
getType();
781 int64_t sourceRank = sourceType.getRank();
784 assert(sourceRank >= 2 &&
"expected at least a 2D source vector");
785 for (
int64_t i = 0; i < sourceRank - 2; ++i)
786 assert(sourceType.getShape()[i] == 1 &&
787 "expected leading dimensions to be unit");
788 int64_t rowIdx = sourceRank - 2;
789 int64_t columnIdx = sourceRank - 1;
790 int64_t sourceH = sourceType.getShape()[rowIdx];
791 int64_t sourceW = sourceType.getShape()[columnIdx];
794 TypedAttr zeroAttr = rewriter.
getZeroAttr(sourceType.getElementType());
795 Value reductionResult = arith::ConstantOp::create(
796 rewriter, loc,
acc.getType(),
803 int nSlices = (reductionDim == rowIdx) ? sourceW : sourceH;
808 for (
int i = 0; i < nSlices; ++i) {
814 if (reductionDim == columnIdx) {
815 sliceOffsets[rowIdx] = i;
816 sliceSizes[columnIdx] = sourceW;
818 sliceOffsets[columnIdx] = i;
819 sliceSizes[rowIdx] = sourceH;
822 vector::ExtractStridedSliceOp extractOp =
823 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
824 sliceSizes, strides);
825 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
826 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
828 VectorType::get({nSliceElements}, sourceType.getElementType()),
829 extractOp.getResult());
832 accIdx[accRank - 1] = i;
833 Value accExtract = vector::ExtractOp::create(rewriter, loc,
acc, accIdx);
838 reductionResult = vector::InsertOp::create(rewriter, loc, fullReduce,
839 reductionResult, accIdx);
841 return reductionResult;
846 vector::CombiningKind kind) {
847 auto vecTy = dyn_cast<VectorType>(type);
848 Type elemTy = vecTy ? vecTy.getElementType() : type;
853 return arith::ConstantOp::create(
855 return arith::ConstantOp::create(builder, loc, cast<TypedAttr>(scalarAttr));
859 case vector::CombiningKind::ADD:
860 case vector::CombiningKind::XOR:
861 case vector::CombiningKind::OR:
862 case vector::CombiningKind::MAXUI:
865 case vector::CombiningKind::MUL:
866 case vector::CombiningKind::AND:
869 case vector::CombiningKind::MINSI:
870 if (
auto intTy = dyn_cast<IntegerType>(elemTy))
872 elemTy, APInt::getSignedMaxValue(intTy.getWidth())));
875 case vector::CombiningKind::MINUI:
876 if (
auto intTy = dyn_cast<IntegerType>(elemTy))
878 builder.
getIntegerAttr(elemTy, APInt::getMaxValue(intTy.getWidth())));
881 case vector::CombiningKind::MAXSI:
882 if (
auto intTy = dyn_cast<IntegerType>(elemTy))
884 elemTy, APInt::getSignedMinValue(intTy.getWidth())));
887 case vector::CombiningKind::MINNUMF:
888 case vector::CombiningKind::MINIMUMF:
889 if (
auto floatTy = dyn_cast<FloatType>(elemTy))
891 elemTy, APFloat::getInf(floatTy.getFloatSemantics())));
894 case vector::CombiningKind::MAXNUMF:
895 case vector::CombiningKind::MAXIMUMF:
896 if (
auto floatTy = dyn_cast<FloatType>(elemTy))
898 elemTy, APFloat::getInf(floatTy.getFloatSemantics(),
true)));
914 auto laneData = layout.getEffectiveLaneDataAsInt();
915 if (laneData.size() != 2)
917 return laneData[0] != 1;
930 auto laneLayout = layout.getEffectiveLaneLayoutAsInt();
931 if (laneLayout.size() != 2)
946 for (
size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx)
947 if (srcIdx < src.size() && src[srcIdx] == dst[dstIdx])
949 else if (dst[dstIdx] == 1)
950 expandedUnitDims.push_back(dstIdx);
953 return srcIdx == src.size();
970 splitDimGroups.clear();
971 for (
size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx) {
972 if (srcIdx >= src.size())
974 accumulatedSize *= dst[dstIdx];
975 currentDstDims.push_back(dstIdx);
977 if (accumulatedSize == src[srcIdx]) {
979 splitDimGroups.push_back(currentDstDims);
983 currentDstDims.clear();
984 }
else if (accumulatedSize > src[srcIdx]) {
988 return srcIdx == src.size();
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
Attributes are known-constant values of operations.
This class represents an argument of a Block.
IntegerAttr getIntegerAttr(Type type, int64_t value)
FloatAttr getFloatAttr(Type type, double value)
TypedAttr getZeroAttr(Type type)
TypedAttr getOneAttr(Type type)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
AttrClass getAttrOfType(StringAttr name)
bool hasAttrOfType(NameT &&name)
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
result_range getOpResults()
MLIRContext * getContext()
Return the context this operation is associated with.
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Type getType() const
Return the type of this value.
static WalkResult advance()
Operation * getOwner() const
Return the owner of this operand.
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value makeArithReduction(OpBuilder &b, Location loc, CombiningKind kind, Value v1, Value acc, arith::FastMathFlagsAttr fastmath=nullptr, Value mask=nullptr)
Returns the result value of reducing two scalar/vector values with the corresponding arith operation.
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
bool requirePacked(const DistributeLayoutAttr layout)
Helper function to check if the layout is packed.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
Value createReductionNeutralValue(OpBuilder &builder, Location loc, Type type, vector::CombiningKind kind)
Creates a constant filled with the neutral (identity) value for the given reduction kind.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
Value subgroupReduction(Location loc, OpBuilder &builder, Value input, vector::CombiningKind kind, uint32_t size)
Given an input value representing per-lane data, this function returns the result after performing a ...
bool matchUnitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< int64_t > &expandedUnitDims)
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
Value lowerToVectorReductions(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, Location loc, PatternRewriter &rewriter)
Given a src and an acc argumments from a vector::MultiDimReductionOp, lower to a set of vector::Reduc...
bool requireTranspose(const DistributeLayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
bool matchSplitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &splitDimGroups)
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
Value lowerCrossLaneReductionToShuffles(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, int64_t reductionSize, Location loc, PatternRewriter &rewriter)
Lowers cross-lane reductions to shuffle operations on a 2D vector.
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
virtual int getSubgroupSize() const =0
StringRef getName() const