25#include "llvm/Support/Casting.h"
26#include "llvm/Support/FormatVariadic.h"
35 for (
const auto &vals : values)
36 llvm::append_range(
result, vals);
42 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
45 if (!layout || !layout.isForSubgroup())
50 auto tdescShape = tdescTy.getShape();
51 auto elementType = tdescTy.getElementType();
56 int64_t sgSize = llvm::product_of(laneLayout);
59 auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
61 auto chunkSize = scatterAttr.getChunkSize().getInt();
64 assert(tdescShape[0] == laneLayout[0] &&
65 "tensor descriptor shape is not distributable");
66 return VectorType::get({chunkSize}, elementType);
72 for (
auto [tdescDim, laneDim, laneDataDim] :
73 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
74 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
75 "tensor descriptor shape is not distributable");
76 tensorSize *= tdescDim;
79 tensorSize *= tdescTy.getArrayLength();
81 return VectorType::get({tensorSize / sgSize}, elementType);
86 xegpu::LayoutAttr layout) {
87 int64_t rank = originalType.getRank();
89 if (rank < 1 || rank > 3)
96 arrayLength =
shape[0];
99 auto helperTdescTy = xegpu::TensorDescType::get(
100 shape, originalType.getElementType(), arrayLength,
102 xegpu::MemorySpace::Global, layout);
108 VectorType originalType) {
111 assert((isa<xegpu::LayoutAttr>(layout) || isa<xegpu::SliceAttr>(layout)) &&
112 "Expecting a valid layout.");
114 int64_t vectorRank = originalType.getRank();
115 int64_t layoutRank = layout.getRank();
116 assert(vectorRank >= layoutRank &&
"Vector rank must be >= layout rank.");
120 int64_t offset = vectorRank - layoutRank;
124 auto distributedShapeOrFailure =
125 layout.computeDistributedShape(trailingShape);
126 if (
failed(distributedShapeOrFailure))
130 fullShape.begin() + offset);
131 resultShape.append(distributedShapeOrFailure->begin(),
132 distributedShapeOrFailure->end());
133 return VectorType::get(resultShape, originalType.getElementType());
137 const StringRef prefix(
"layout_operand_");
138 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
139 return llvm::formatv(
"{0}{1}", prefix, idx).str();
143 const StringRef prefix =
"layout_result_";
144 return llvm::formatv(
"{0}{1}", prefix,
result.getResultNumber()).str();
152 dyn_cast_if_present<xegpu::TensorDescType>(value.
getType()))
153 return tdescTy.getLayoutAttr();
155 if (
auto result = dyn_cast<OpResult>(value)) {
157 assert(defOp &&
"result must have a defining op");
159 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
160 auto layout = anchorOp.getAnchorLayout();
165 if (defOp->
hasAttr(layoutName)) {
167 defOp->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
172 if (
auto arg = dyn_cast<BlockArgument>(value)) {
173 auto *parentOp = arg.getOwner()->getParentOp();
174 if (
auto loop = dyn_cast_if_present<LoopLikeOpInterface>(parentOp)) {
175 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
183xegpu::DistributeLayoutAttr
186 unsigned idx =
const_cast<OpOperand &
>(opr).getOperandNumber();
188 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(op)) {
189 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(op)) {
191 return dpasOp.getLayoutAAttr();
192 }
else if (idx == 1) {
193 return dpasOp.getLayoutBAttr();
194 }
else if (idx == 2) {
195 return dpasOp.getLayoutCdAttr();
198 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
199 return convertOp.getInputLayoutAttr();
201 auto layout = anchorOp.getAnchorLayout();
209 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
217 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
226xegpu::DistributeLayoutAttr
229 const std::string &name) {
230 xegpu::DistributeLayoutAttr candidate = layout;
232 if (
auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
233 if (
auto perm = loadOp.getLayoutAttr())
242xegpu::DistributeLayoutAttr
245 const std::string &name) {
246 xegpu::DistributeLayoutAttr candidate = layout;
247 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
249 if (
auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
251 if (
auto perm = storeOp.getLayoutAttr())
263 const mlir::xegpu::DistributeLayoutAttr layout) {
266 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
267 if (anchorOp.getAnchorLayout() == layout)
269 anchorOp.setAnchorLayout(layout);
285 const DistributeLayoutAttr layout) {
287 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
292 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
293 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(owner)) {
295 return dpasOp.setLayoutAAttr(layout);
296 }
else if (idx == 1) {
297 return dpasOp.setLayoutBAttr(layout);
298 }
else if (idx == 2) {
299 return dpasOp.setLayoutCdAttr(layout);
302 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(owner)) {
303 return convertOp.setInputLayoutAttr(layout);
309 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
312 anchorOp.setAnchorLayout(layout);
316 anchorOp.setAnchorLayout(layout);
330template <
typename T,
typename>
331xegpu::DistributeLayoutAttr
333 Operation *op = operandOrResult.getOwner();
337 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
344template xegpu::DistributeLayoutAttr
346template xegpu::DistributeLayoutAttr
349template <
typename T,
typename>
351 const xegpu::DistributeLayoutAttr layout) {
352 Operation *owner = operandOrResult.getOwner();
354 if (owner->
hasAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
364 const mlir::xegpu::DistributeLayoutAttr layout);
368 const mlir::xegpu::DistributeLayoutAttr layout);
373 auto vecTy = dyn_cast<VectorType>(value.
getType());
381 int64_t srcShapeRank = srcShape.size();
385 int64_t rankDiff = srcShapeRank - targetShapeRank;
386 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
388 llvm::copy(
shape, adjustedTargetShape.begin() + rankDiff);
394 Value slice = vector::ExtractStridedSliceOp::create(
395 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
398 if (srcShapeRank > targetShapeRank) {
399 auto targetTy = VectorType::get(
shape, vecTy.getElementType());
400 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
411 VectorType inputTy = dyn_cast<VectorType>(values[0].
getType());
412 assert(llvm::all_of(values.
getTypes(),
413 [&](
Type type) { return type == inputTy; }) &&
414 "values must be of the same VectorType");
416 Type elemTy = inputTy.getElementType();
419 VectorType resultTy = VectorType::get(
shape, elemTy);
424 for (
auto [src, offsets] :
427 result = vector::InsertStridedSliceOp::create(builder, loc, src,
result,
428 offsets, staticStrides);
439 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
445 converter.addConversion([](
Type type) ->
Type {
return type; });
446 converter.addConversion([](VectorType type) ->
Type {
447 return RankedTensorType::get(type.getShape(), type.getElementType());
449 converter.addSourceMaterialization(materializeCast);
450 converter.addTargetMaterialization(materializeCast);
452 mlir::ConversionTarget
target(*context);
453 target.addLegalOp<UnrealizedConversionCastOp>();
458 (
void)mlir::applyPartialConversion(op,
target, std::move(patterns));
464 op->
walk([](UnrealizedConversionCastOp castOp) {
465 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
468 Value input = castOp.getInputs()[0];
470 auto inputTy = dyn_cast<VectorType>(input.
getType());
471 auto resultTy = dyn_cast<RankedTensorType>(
result.getType());
474 if (!inputTy || !resultTy)
477 xegpu::DistributeLayoutAttr layout =
482 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
487 if (
auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
493 if (
auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
494 unsigned idx = use.getOperandNumber();
503 op->
walk([](scf::YieldOp yieldOp) {
506 unsigned idx = r.getResultNumber();
507 Type resultTy = r.getType();
508 Type yieldTy = yieldOp.getResults()[idx].getType();
509 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
522 class UnrealizedConversionCastOpPattern
523 :
public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
524 using OpConversionPattern<
525 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
528 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
530 ConversionPatternRewriter &rewriter)
const override {
531 auto inputs = op.getOperands();
532 auto outputs = op.getOutputs();
534 if (inputs.size() != 1 || outputs.size() != 1)
537 auto inputTy = inputs[0].getType();
538 auto outputTy = outputs[0].getType();
540 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
541 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
545 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
547 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
549 rewriter.replaceOp(op, newOp);
556 converter.addSourceMaterialization(materializeCast);
559 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
563 mlir::ConversionTarget
target(*context);
564 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
565 [](UnrealizedConversionCastOp op) {
566 auto isTensorTy = [](
Type type) {
567 return isa<RankedTensorType>(type);
573 patterns.insert<UnrealizedConversionCastOpPattern>(context);
576 (
void)mlir::applyPartialConversion(op,
target, std::move(patterns));
586 auto targetAttrs = gpuModuleOp.getTargets();
588 for (
auto &attr : *targetAttrs) {
589 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
591 return xevmAttr.getChip().str();
603 assert(
lhs.size() ==
rhs.size() &&
"lhs and rhs must have the same size");
605 for (
auto [l, r] : llvm::zip_equal(
lhs,
rhs)) {
608 results.push_back(builder.
createOrFold<arith::AddIOp>(loc, lval, rval));
631 a = a.slice(a.size() -
b.size());
639 static_assert(std::is_integral<T>::value,
"T must be an integer type");
642 if (!candidateMultiples.empty())
644 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
645 for (T candidate : candidates) {
646 for (T multiple : multiples) {
647 int value =
static_cast<int>(candidate * multiple);
648 if (value != 0 && dim % value == 0 && value > largest)
656 vector::CombiningKind kind, uint32_t size) {
658 Value laneVal = vector::ReductionOp::create(builder, loc, kind, input);
660 for (uint64_t i = 1; i < size; i <<= 1) {
662 gpu::ShuffleOp::create(builder, loc, laneVal, i, size,
663 gpu::ShuffleMode::XOR)
665 laneVal = makeArithReduction(builder, loc, kind, laneVal, shuffled);
672 vector::CombiningKind kind,
675 VectorType sourceType = src.
getType();
676 int64_t sourceRank = sourceType.getRank();
679 assert(sourceRank >= 2 &&
"expected at least a 2D source vector");
680 for (
int64_t i = 0; i < sourceRank - 2; ++i)
681 assert(sourceType.getShape()[i] == 1 &&
682 "expected leading dimensions to be unit");
683 int64_t rowIdx = sourceRank - 2;
684 int64_t columnIdx = sourceRank - 1;
685 int64_t sourceH = sourceType.getShape()[rowIdx];
686 int64_t sourceW = sourceType.getShape()[columnIdx];
687 int nSlices = (reductionDim == rowIdx) ? sourceW : sourceH;
689 TypedAttr zeroAttr = rewriter.
getZeroAttr(sourceType.getElementType());
690 Value reductionResult = arith::ConstantOp::create(
691 rewriter, loc,
acc.getType(),
700 for (
int i = 0; i < nSlices; ++i) {
706 if (reductionDim == columnIdx) {
707 sliceOffsets[rowIdx] = i;
708 sliceSizes[columnIdx] = sourceW;
710 sliceOffsets[columnIdx] = i;
711 sliceSizes[rowIdx] = sourceH;
714 vector::ExtractStridedSliceOp extractOp =
715 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
716 sliceSizes, strides);
720 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
722 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
724 VectorType::get({nSliceElements}, sourceType.getElementType()),
725 extractOp.getResult());
735 accIdx[accRank - 1] = i;
736 Value accExtract = vector::ExtractOp::create(rewriter, loc,
acc, accIdx);
737 Value reduction = vector::ReductionOp::create(
738 rewriter, loc, kind, slice.getResult(), accExtract);
739 reductionResult = vector::InsertOp::create(rewriter, loc, reduction,
740 reductionResult, accIdx);
744 return reductionResult;
749 vector::CombiningKind kind,
int64_t reductionDim,
int64_t reductionSize,
751 VectorType sourceType = src.
getType();
752 int64_t sourceRank = sourceType.getRank();
755 assert(sourceRank >= 2 &&
"expected at least a 2D source vector");
756 for (
int64_t i = 0; i < sourceRank - 2; ++i)
757 assert(sourceType.getShape()[i] == 1 &&
758 "expected leading dimensions to be unit");
759 int64_t rowIdx = sourceRank - 2;
760 int64_t columnIdx = sourceRank - 1;
761 int64_t sourceH = sourceType.getShape()[rowIdx];
762 int64_t sourceW = sourceType.getShape()[columnIdx];
765 TypedAttr zeroAttr = rewriter.
getZeroAttr(sourceType.getElementType());
766 Value reductionResult = arith::ConstantOp::create(
767 rewriter, loc,
acc.getType(),
774 int nSlices = (reductionDim == rowIdx) ? sourceW : sourceH;
779 for (
int i = 0; i < nSlices; ++i) {
785 if (reductionDim == columnIdx) {
786 sliceOffsets[rowIdx] = i;
787 sliceSizes[columnIdx] = sourceW;
789 sliceOffsets[columnIdx] = i;
790 sliceSizes[rowIdx] = sourceH;
793 vector::ExtractStridedSliceOp extractOp =
794 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
795 sliceSizes, strides);
796 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
797 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
799 VectorType::get({nSliceElements}, sourceType.getElementType()),
800 extractOp.getResult());
803 accIdx[accRank - 1] = i;
804 Value accExtract = vector::ExtractOp::create(rewriter, loc,
acc, accIdx);
809 reductionResult = vector::InsertOp::create(rewriter, loc, fullReduce,
810 reductionResult, accIdx);
812 return reductionResult;
817 vector::CombiningKind kind) {
818 auto vecTy = dyn_cast<VectorType>(type);
819 Type elemTy = vecTy ? vecTy.getElementType() : type;
824 return arith::ConstantOp::create(
826 return arith::ConstantOp::create(builder, loc, cast<TypedAttr>(scalarAttr));
830 case vector::CombiningKind::ADD:
831 case vector::CombiningKind::XOR:
832 case vector::CombiningKind::OR:
833 case vector::CombiningKind::MAXUI:
836 case vector::CombiningKind::MUL:
837 case vector::CombiningKind::AND:
840 case vector::CombiningKind::MINSI:
841 if (
auto intTy = dyn_cast<IntegerType>(elemTy))
843 elemTy, APInt::getSignedMaxValue(intTy.getWidth())));
846 case vector::CombiningKind::MINUI:
847 if (
auto intTy = dyn_cast<IntegerType>(elemTy))
849 builder.
getIntegerAttr(elemTy, APInt::getMaxValue(intTy.getWidth())));
852 case vector::CombiningKind::MAXSI:
853 if (
auto intTy = dyn_cast<IntegerType>(elemTy))
855 elemTy, APInt::getSignedMinValue(intTy.getWidth())));
858 case vector::CombiningKind::MINNUMF:
859 case vector::CombiningKind::MINIMUMF:
860 if (
auto floatTy = dyn_cast<FloatType>(elemTy))
862 elemTy, APFloat::getInf(floatTy.getFloatSemantics())));
865 case vector::CombiningKind::MAXNUMF:
866 case vector::CombiningKind::MAXIMUMF:
867 if (
auto floatTy = dyn_cast<FloatType>(elemTy))
869 elemTy, APFloat::getInf(floatTy.getFloatSemantics(),
true)));
885 auto laneData = layout.getEffectiveLaneDataAsInt();
886 if (laneData.size() != 2)
888 return laneData[0] != 1;
900 auto laneLayout = layout.getEffectiveLaneLayoutAsInt();
901 if (laneLayout.size() != 2)
916 for (
size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx)
917 if (srcIdx < src.size() && src[srcIdx] == dst[dstIdx])
919 else if (dst[dstIdx] == 1)
920 expandedUnitDims.push_back(dstIdx);
923 return srcIdx == src.size();
940 splitDimGroups.clear();
941 for (
size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx) {
942 if (srcIdx >= src.size())
944 accumulatedSize *= dst[dstIdx];
945 currentDstDims.push_back(dstIdx);
947 if (accumulatedSize == src[srcIdx]) {
949 splitDimGroups.push_back(currentDstDims);
953 currentDstDims.clear();
954 }
else if (accumulatedSize > src[srcIdx]) {
958 return srcIdx == src.size();
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
Attributes are known-constant values of operations.
This class represents an argument of a Block.
IntegerAttr getIntegerAttr(Type type, int64_t value)
FloatAttr getFloatAttr(Type type, double value)
TypedAttr getZeroAttr(Type type)
TypedAttr getOneAttr(Type type)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
AttrClass getAttrOfType(StringAttr name)
bool hasAttrOfType(NameT &&name)
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
result_range getOpResults()
MLIRContext * getContext()
Return the context this operation is associated with.
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Type getType() const
Return the type of this value.
static WalkResult advance()
Operation * getOwner() const
Return the owner of this operand.
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value makeArithReduction(OpBuilder &b, Location loc, CombiningKind kind, Value v1, Value acc, arith::FastMathFlagsAttr fastmath=nullptr, Value mask=nullptr)
Returns the result value of reducing two scalar/vector values with the corresponding arith operation.
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
Value createReductionNeutralValue(OpBuilder &builder, Location loc, Type type, vector::CombiningKind kind)
Creates a constant filled with the neutral (identity) value for the given reduction kind.
bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
Value subgroupReduction(Location loc, OpBuilder &builder, Value input, vector::CombiningKind kind, uint32_t size)
Given an input value representing per-lane data, this function returns the result after performing a ...
bool matchUnitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< int64_t > &expandedUnitDims)
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
Value lowerToVectorReductions(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, Location loc, PatternRewriter &rewriter)
Given a src and an acc argumments from a vector::MultiDimReductionOp, lower to a set of vector::Reduc...
bool matchSplitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &splitDimGroups)
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
bool requirePacked(const LayoutAttr layout)
Helper function to check if the layout is packed.
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
Value lowerCrossLaneReductionToShuffles(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, int64_t reductionSize, Location loc, PatternRewriter &rewriter)
Lowers cross-lane reductions to shuffle operations on a 2D vector.
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
virtual int getSubgroupSize() const =0
StringRef getName() const