25#include "llvm/Support/Casting.h"
26#include "llvm/Support/FormatVariadic.h"
35 for (
const auto &vals : values)
36 llvm::append_range(
result, vals);
42 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
45 if (!layout || !layout.isForSubgroup())
50 auto tdescShape = tdescTy.getShape();
51 auto elementType = tdescTy.getElementType();
56 int64_t sgSize = llvm::product_of(laneLayout);
59 auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
61 auto chunkSize = scatterAttr.getChunkSize().getInt();
64 assert(tdescShape[0] == laneLayout[0] &&
65 "tensor descriptor shape is not distributable");
66 return VectorType::get({chunkSize}, elementType);
72 for (
auto [tdescDim, laneDim, laneDataDim] :
73 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
74 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
75 "tensor descriptor shape is not distributable");
76 tensorSize *= tdescDim;
79 tensorSize *= tdescTy.getArrayLength();
81 return VectorType::get({tensorSize / sgSize}, elementType);
86 xegpu::LayoutAttr layout) {
87 int64_t rank = originalType.getRank();
89 if (rank < 1 || rank > 3)
96 arrayLength =
shape[0];
99 auto helperTdescTy = xegpu::TensorDescType::get(
100 shape, originalType.getElementType(), arrayLength,
102 xegpu::MemorySpace::Global, layout);
108 VectorType originalType) {
111 assert((isa<xegpu::LayoutAttr>(layout) || isa<xegpu::SliceAttr>(layout)) &&
112 "Expecting a valid layout.");
114 layout.getEffectiveLaneLayoutAsInt();
115 assert(
static_cast<size_t>(originalType.getRank()) >=
116 effectiveLaneLayout.size() &&
117 "Rank of the original vector type should be greater or equal to the "
118 "size of the lane layout to distribute the vector type.");
122 unsigned distributionStart =
123 originalType.getRank() - effectiveLaneLayout.size();
124 for (
auto [i, dim] : llvm::enumerate(originalType.getShape())) {
125 if (i < distributionStart)
128 if (dim % effectiveLaneLayout[i - distributionStart] != 0)
130 distributedShape[i] = dim / effectiveLaneLayout[i - distributionStart];
132 return VectorType::get(distributedShape, originalType.getElementType());
136 const StringRef prefix(
"layout_operand_");
137 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
138 return llvm::formatv(
"{0}{1}", prefix, idx).str();
142 const StringRef prefix =
"layout_result_";
143 return llvm::formatv(
"{0}{1}", prefix,
result.getResultNumber()).str();
151 dyn_cast_if_present<xegpu::TensorDescType>(value.
getType()))
152 return tdescTy.getLayoutAttr();
154 if (
auto result = dyn_cast<OpResult>(value)) {
156 assert(defOp &&
"result must have a defining op");
158 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
159 auto layout = anchorOp.getAnchorLayout();
164 if (defOp->
hasAttr(layoutName)) {
166 defOp->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
171 if (
auto arg = dyn_cast<BlockArgument>(value)) {
172 auto *parentOp = arg.getOwner()->getParentOp();
173 if (
auto loop = dyn_cast_if_present<LoopLikeOpInterface>(parentOp)) {
174 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
182xegpu::DistributeLayoutAttr
185 unsigned idx =
const_cast<OpOperand &
>(opr).getOperandNumber();
187 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(op)) {
188 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(op)) {
190 return dpasOp.getLayoutAAttr();
191 }
else if (idx == 1) {
192 return dpasOp.getLayoutBAttr();
193 }
else if (idx == 2) {
194 return dpasOp.getLayoutCdAttr();
197 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
198 return convertOp.getInputLayoutAttr();
200 auto layout = anchorOp.getAnchorLayout();
208 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
216 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
225xegpu::DistributeLayoutAttr
228 const std::string &name) {
229 xegpu::DistributeLayoutAttr candidate = layout;
231 if (
auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
232 if (
auto perm = loadOp.getLayoutAttr())
241xegpu::DistributeLayoutAttr
244 const std::string &name) {
245 xegpu::DistributeLayoutAttr candidate = layout;
246 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
248 if (
auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
250 if (
auto perm = storeOp.getLayoutAttr())
262 const mlir::xegpu::DistributeLayoutAttr layout) {
265 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
266 if (anchorOp.getAnchorLayout() == layout)
268 anchorOp.setAnchorLayout(layout);
284 const DistributeLayoutAttr layout) {
286 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
291 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
292 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(owner)) {
294 return dpasOp.setLayoutAAttr(layout);
295 }
else if (idx == 1) {
296 return dpasOp.setLayoutBAttr(layout);
297 }
else if (idx == 2) {
298 return dpasOp.setLayoutCdAttr(layout);
301 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(owner)) {
302 return convertOp.setInputLayoutAttr(layout);
308 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
311 anchorOp.setAnchorLayout(layout);
315 anchorOp.setAnchorLayout(layout);
329template <
typename T,
typename>
330xegpu::DistributeLayoutAttr
332 Operation *op = operandOrResult.getOwner();
336 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
343template xegpu::DistributeLayoutAttr
345template xegpu::DistributeLayoutAttr
348template <
typename T,
typename>
350 const xegpu::DistributeLayoutAttr layout) {
351 Operation *owner = operandOrResult.getOwner();
353 if (owner->
hasAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
363 const mlir::xegpu::DistributeLayoutAttr layout);
367 const mlir::xegpu::DistributeLayoutAttr layout);
372 auto vecTy = dyn_cast<VectorType>(value.
getType());
380 int64_t srcShapeRank = srcShape.size();
384 int64_t rankDiff = srcShapeRank - targetShapeRank;
385 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
387 llvm::copy(
shape, adjustedTargetShape.begin() + rankDiff);
393 Value slice = vector::ExtractStridedSliceOp::create(
394 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
397 if (srcShapeRank > targetShapeRank) {
398 auto targetTy = VectorType::get(
shape, vecTy.getElementType());
399 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
410 VectorType inputTy = dyn_cast<VectorType>(values[0].
getType());
411 assert(llvm::all_of(values.
getTypes(),
412 [&](
Type type) { return type == inputTy; }) &&
413 "values must be of the same VectorType");
415 Type elemTy = inputTy.getElementType();
418 VectorType resultTy = VectorType::get(
shape, elemTy);
423 for (
auto [src, offsets] :
426 result = vector::InsertStridedSliceOp::create(builder, loc, src,
result,
427 offsets, staticStrides);
438 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
444 converter.addConversion([](
Type type) ->
Type {
return type; });
445 converter.addConversion([](VectorType type) ->
Type {
446 return RankedTensorType::get(type.getShape(), type.getElementType());
448 converter.addSourceMaterialization(materializeCast);
449 converter.addTargetMaterialization(materializeCast);
451 mlir::ConversionTarget
target(*context);
452 target.addLegalOp<UnrealizedConversionCastOp>();
457 (
void)mlir::applyPartialConversion(op,
target, std::move(patterns));
463 op->
walk([](UnrealizedConversionCastOp castOp) {
464 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
467 Value input = castOp.getInputs()[0];
469 auto inputTy = dyn_cast<VectorType>(input.
getType());
470 auto resultTy = dyn_cast<RankedTensorType>(
result.getType());
473 if (!inputTy || !resultTy)
476 xegpu::DistributeLayoutAttr layout =
481 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
486 if (
auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
492 if (
auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
493 unsigned idx = use.getOperandNumber();
502 op->
walk([](scf::YieldOp yieldOp) {
505 unsigned idx = r.getResultNumber();
506 Type resultTy = r.getType();
507 Type yieldTy = yieldOp.getResults()[idx].getType();
508 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
521 class UnrealizedConversionCastOpPattern
522 :
public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
523 using OpConversionPattern<
524 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
527 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
529 ConversionPatternRewriter &rewriter)
const override {
530 auto inputs = op.getOperands();
531 auto outputs = op.getOutputs();
533 if (inputs.size() != 1 || outputs.size() != 1)
536 auto inputTy = inputs[0].getType();
537 auto outputTy = outputs[0].getType();
539 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
540 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
544 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
546 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
548 rewriter.replaceOp(op, newOp);
555 converter.addSourceMaterialization(materializeCast);
558 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
562 mlir::ConversionTarget
target(*context);
563 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
564 [](UnrealizedConversionCastOp op) {
565 auto isTensorTy = [](
Type type) {
566 return isa<RankedTensorType>(type);
572 patterns.insert<UnrealizedConversionCastOpPattern>(context);
575 (
void)mlir::applyPartialConversion(op,
target, std::move(patterns));
585 auto targetAttrs = gpuModuleOp.getTargets();
587 for (
auto &attr : *targetAttrs) {
588 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
590 return xevmAttr.getChip().str();
602 assert(
lhs.size() ==
rhs.size() &&
"lhs and rhs must have the same size");
604 for (
auto [l, r] : llvm::zip_equal(
lhs,
rhs)) {
607 results.push_back(builder.
createOrFold<arith::AddIOp>(loc, lval, rval));
630 a = a.slice(a.size() -
b.size());
638 static_assert(std::is_integral<T>::value,
"T must be an integer type");
641 if (!candidateMultiples.empty())
643 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
644 for (T candidate : candidates) {
645 for (T multiple : multiples) {
646 int value =
static_cast<int>(candidate * multiple);
647 if (value != 0 && dim % value == 0 && value > largest)
655 vector::CombiningKind kind, uint32_t size) {
657 Value laneVal = vector::ReductionOp::create(builder, loc, kind, input);
659 for (uint64_t i = 1; i < size; i <<= 1) {
661 gpu::ShuffleOp::create(builder, loc, laneVal, i, size,
662 gpu::ShuffleMode::XOR)
664 laneVal = makeArithReduction(builder, loc, kind, laneVal, shuffled);
671 vector::CombiningKind kind,
674 VectorType sourceType = src.
getType();
675 int64_t sourceRank = sourceType.getRank();
678 assert(sourceRank >= 2 &&
"expected at least a 2D source vector");
679 for (
int64_t i = 0; i < sourceRank - 2; ++i)
680 assert(sourceType.getShape()[i] == 1 &&
681 "expected leading dimensions to be unit");
682 int64_t rowIdx = sourceRank - 2;
683 int64_t columnIdx = sourceRank - 1;
684 int64_t sourceH = sourceType.getShape()[rowIdx];
685 int64_t sourceW = sourceType.getShape()[columnIdx];
686 int nSlices = (reductionDim == rowIdx) ? sourceW : sourceH;
688 TypedAttr zeroAttr = rewriter.
getZeroAttr(sourceType.getElementType());
689 Value reductionResult = arith::ConstantOp::create(
690 rewriter, loc,
acc.getType(),
699 for (
int i = 0; i < nSlices; ++i) {
705 if (reductionDim == columnIdx) {
706 sliceOffsets[rowIdx] = i;
707 sliceSizes[columnIdx] = sourceW;
709 sliceOffsets[columnIdx] = i;
710 sliceSizes[rowIdx] = sourceH;
713 vector::ExtractStridedSliceOp extractOp =
714 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
715 sliceSizes, strides);
719 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
721 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
723 VectorType::get({nSliceElements}, sourceType.getElementType()),
724 extractOp.getResult());
734 accIdx[accRank - 1] = i;
735 Value accExtract = vector::ExtractOp::create(rewriter, loc,
acc, accIdx);
736 Value reduction = vector::ReductionOp::create(
737 rewriter, loc, kind, slice.getResult(), accExtract);
738 reductionResult = vector::InsertOp::create(rewriter, loc, reduction,
739 reductionResult, accIdx);
743 return reductionResult;
748 vector::CombiningKind kind,
int64_t reductionDim,
int64_t reductionSize,
751 assert(src.getType().getRank() == 2 &&
"expected a 2D source vector");
752 VectorType sourceType = src.getType();
753 int64_t sourceH = sourceType.getShape()[0];
754 int64_t sourceW = sourceType.getShape()[1];
757 TypedAttr zeroAttr = rewriter.
getZeroAttr(sourceType.getElementType());
758 Value reductionResult = arith::ConstantOp::create(
759 rewriter, loc,
acc.getType(),
766 int nSlices = (reductionDim == 0) ? sourceW : sourceH;
770 for (
int i = 0; i < nSlices; ++i) {
772 if (reductionDim == 1) {
773 sliceOffsets = {i, 0};
774 sliceSizes = {1, sourceW};
776 sliceOffsets = {0, i};
777 sliceSizes = {sourceH, 1};
780 vector::ExtractStridedSliceOp extractOp =
781 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
783 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
784 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
786 VectorType::get({nSliceElements}, sourceType.getElementType()),
787 extractOp.getResult());
789 Value accExtract = vector::ExtractOp::create(rewriter, loc,
acc, i);
795 vector::InsertOp::create(rewriter, loc, fullReduce, reductionResult, i);
797 return reductionResult;
810 auto laneData = layout.getEffectiveLaneDataAsInt();
811 if (laneData.size() != 2)
813 return laneData[0] != 1;
825 auto laneLayout = layout.getEffectiveLaneLayoutAsInt();
826 if (laneLayout.size() != 2)
841 for (
size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx)
842 if (srcIdx < src.size() && src[srcIdx] == dst[dstIdx])
844 else if (dst[dstIdx] == 1)
845 expandedUnitDims.push_back(dstIdx);
848 return srcIdx == src.size();
865 splitDimGroups.clear();
866 for (
size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx) {
867 if (srcIdx >= src.size())
869 accumulatedSize *= dst[dstIdx];
870 currentDstDims.push_back(dstIdx);
872 if (accumulatedSize == src[srcIdx]) {
874 splitDimGroups.push_back(currentDstDims);
878 currentDstDims.clear();
879 }
else if (accumulatedSize > src[srcIdx]) {
883 return srcIdx == src.size();
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
This class represents an argument of a Block.
TypedAttr getZeroAttr(Type type)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
This class helps build Operations.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
AttrClass getAttrOfType(StringAttr name)
bool hasAttrOfType(NameT &&name)
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
result_range getOpResults()
MLIRContext * getContext()
Return the context this operation is associated with.
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Type getType() const
Return the type of this value.
static WalkResult advance()
Operation * getOwner() const
Return the owner of this operand.
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value makeArithReduction(OpBuilder &b, Location loc, CombiningKind kind, Value v1, Value acc, arith::FastMathFlagsAttr fastmath=nullptr, Value mask=nullptr)
Returns the result value of reducing two scalar/vector values with the corresponding arith operation.
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
Value subgroupReduction(Location loc, OpBuilder &builder, Value input, vector::CombiningKind kind, uint32_t size)
Given an input value representing per-lane data, this function returns the result after performing a ...
bool matchUnitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< int64_t > &expandedUnitDims)
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
Value lowerToVectorReductions(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, Location loc, PatternRewriter &rewriter)
Given a src and an acc argumments from a vector::MultiDimReductionOp, lower to a set of vector::Reduc...
bool matchSplitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &splitDimGroups)
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
bool requirePacked(const LayoutAttr layout)
Helper function to check if the layout is packed.
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
Value lowerCrossLaneReductionToShuffles(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, int64_t reductionSize, Location loc, PatternRewriter &rewriter)
Lowers cross-lane reductions to shuffle operations on a 2D vector.
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
virtual int getSubgroupSize() const =0
StringRef getName() const