20#include "llvm/ADT/STLExtras.h"
21#include "llvm/Support/DebugLog.h"
25#define GEN_PASS_DEF_XEGPUUNROLL
26#include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
30#define DEBUG_TYPE "xegpu-unroll"
36template <
typename SourceOp>
46 LDBG() <<
"Get unroll shape for: " << *op;
48 if (
options.filterConstraint && failed(
options.filterConstraint(op))) {
49 LDBG() <<
"--no filter constraint -> BAIL";
54 "expects the native shape for native shape call back function.");
55 auto nativeShape =
options.nativeShape(op);
61 bool returnSingleType =
false)
const {
62 return options.getUnrolledTypes(type, tileShape, returnSingleType);
69 if (
auto vecTy = dyn_cast<VectorType>(destTy)) {
70 auto shape = vecTy.getShape();
74 if (isa<xegpu::TensorDescType>(destTy)) {
79 auto castOp = UnrealizedConversionCastOp::create(
80 rewriter, loc, destTy, srcs,
82 return castOp.getResult(0);
85 llvm_unreachable(
"Unexpected destTy.");
94 if (
auto vecTy = dyn_cast<VectorType>(src.
getType())) {
99 if (isa<xegpu::TensorDescType>(src.
getType())) {
104 auto castOp = UnrealizedConversionCastOp::create(
105 rewriter, loc, destTypes, src,
107 return castOp.getResults();
110 llvm_unreachable(
"Unexpected src type.");
120 auto vecType = cast<VectorType>(operand.
getType());
121 std::optional<SmallVector<int64_t>> grids =
123 assert(grids &&
"Expecting grids to be computed.");
127 VectorType newVecTy =
128 vecType.cloneWith(blockSize, vecType.getElementType());
130 return pack(operand, convertedTypes, blockSize, loc, rewriter);
134 const char *
const packAttrName =
"__xegpu_blocking_pack__";
135 const char *
const unpackAttrName =
"__xegpu_blocking_unpack__";
136 const char *
const blockAttrName =
"__xegpu_blocking_tile_shape__";
152 int64_t rank = tdescTy.getRank();
160 auto aV = llvm::cast<Value>(a);
162 return rewriter.
createOrFold<arith::AddIOp>(loc, aV, bV);
167 llvm::drop_begin(mixedOffsets, mixedOffsets.size() - rank));
169 llvm::seq<int64_t>(mixedOffsets.size() - rank, mixedOffsets.size());
175 for (
auto [idx, oldOff, offset] :
176 llvm::zip(validIdxes, oldOffsets, offsets))
177 mixedOffsets[idx] = addi(oldOff, offset);
179 auto newOp = createOp(mixedOffsets);
180 newOps.push_back(newOp);
185struct UnrollCreateNdOp :
public UnrollPattern<xegpu::CreateNdDescOp> {
186 using UnrollPattern<xegpu::CreateNdDescOp>::UnrollPattern;
187 LogicalResult matchAndRewrite(xegpu::CreateNdDescOp op,
190 xegpu::TensorDescType tdescTy = op.getType();
192 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
198 auto newTdescTy = getUnrolledTypes(tdescTy, *targetShape)[0];
200 xegpu::CreateNdDescOp::create(rewriter, loc, newTdescTy, op.getSource(),
201 op.getMixedSizes(), op.getMixedStrides());
202 newOps.push_back(newOp);
203 Value castOp = unpack(newOps, tdescTy, *targetShape, loc, rewriter);
210struct UnrollPrefetchNdOp :
public UnrollPattern<xegpu::PrefetchNdOp> {
211 using UnrollPattern<xegpu::PrefetchNdOp>::UnrollPattern;
212 LogicalResult matchAndRewrite(xegpu::PrefetchNdOp op,
215 xegpu::TensorDescType tdescTy = op.getTensorDescType();
217 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
221 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
223 layout = layout.dropInstData();
226 getUnrolledTypes(tdescTy, *targetShape,
true);
229 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
232 xegpu::PrefetchNdOp::create(rewriter, loc, convertedTdesc[0], offsets,
233 op.getL1HintAttr(), op.getL2HintAttr(),
234 op.getL3HintAttr(), layout);
239 computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
240 createPrefetch, loc, rewriter);
247struct UnrollLoadNdOp :
public UnrollPattern<xegpu::LoadNdOp> {
248 using UnrollPattern<xegpu::LoadNdOp>::UnrollPattern;
249 LogicalResult matchAndRewrite(xegpu::LoadNdOp op,
253 VectorType valueTy = op.getType();
254 xegpu::TensorDescType tdescTy = op.getTensorDescType();
256 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
260 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
262 layout = layout.dropInstData();
264 Type elemTy = tdescTy.getElementType();
265 VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
268 getUnrolledTypes(tdescTy, *targetShape,
true);
271 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
275 return xegpu::LoadNdOp::create(
276 rewriter, loc, newValueTy, convertedTdescs[0], offsets,
277 op.getPackedAttr(), op.getTransposeAttr(), op.getL1HintAttr(),
278 op.getL2HintAttr(), op.getL3HintAttr(), layout);
280 newOps = computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
281 createLoad, loc, rewriter);
283 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
290struct UnrollStoreNdOp :
public UnrollPattern<xegpu::StoreNdOp> {
291 using UnrollPattern<xegpu::StoreNdOp>::UnrollPattern;
292 LogicalResult matchAndRewrite(xegpu::StoreNdOp op,
295 VectorType valueTy = op.getValueType();
296 xegpu::TensorDescType tdescTy = op.getTensorDescType();
298 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
302 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
304 layout = layout.dropInstData();
307 getUnrolledTypes(valueTy, *targetShape);
309 getUnrolledTypes(tdescTy, *targetShape,
true);
312 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
315 pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
317 size_t valueIndex = 0;
319 xegpu::StoreNdOp::create(rewriter, loc, convertedValues[valueIndex++],
320 convertedTdescs[0], offsets, op.getL1HintAttr(),
321 op.getL2HintAttr(), op.getL3HintAttr(), layout);
326 computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
327 createStore, loc, rewriter);
334struct UnrollDpasOp :
public UnrollPattern<xegpu::DpasOp> {
335 using UnrollPattern<xegpu::DpasOp>::UnrollPattern;
336 LogicalResult matchAndRewrite(xegpu::DpasOp op,
340 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
341 if (!targetShape || targetShape->size() != 3)
343 auto M = (*targetShape)[0];
344 auto K = (*targetShape)[1];
345 auto N = (*targetShape)[2];
347 int64_t aBlockSize[2] = {M, K};
348 int64_t bBlockSize[2] = {K, N};
349 int64_t cBlockSize[2] = {M, N};
351 auto a = op.getLhs();
352 auto b = op.getRhs();
353 auto c = op.getAcc();
359 cVals = packOperandForDpas(c, cBlockSize, loc, rewriter);
363 if (llvm::any_of(ranges, [](
auto &v) {
return v.size() == 0; }) ||
364 llvm::all_of(ranges, [](
auto &v) {
return v.size() == 1; }))
367 VectorType resultTy = op.getResult().getType();
368 auto vecTy = VectorType::get(cBlockSize, resultTy.getElementType());
370 auto aShape = a.getType().getShape();
371 auto bShape =
b.getType().getShape();
372 int64_t mIters = aShape[0] / M;
373 int64_t kIters = aShape[1] / K;
374 int64_t nIters = bShape[1] / N;
377 for (
int64_t i = 0; i < mIters; ++i) {
381 tmpC = cVals[i * nIters +
j];
383 for (
int64_t k = 0; k < kIters; ++k) {
384 Value aVec = aVals[i * kIters + k];
385 Value bVec = bVals[k * nIters +
j];
388 operands.push_back(tmpC);
391 xegpu::DpasOp::create(rewriter, loc, vecTy, operands,
394 newOps.push_back(tmpC);
397 Value castOp = unpack(newOps, resultTy, cBlockSize, loc, rewriter);
403struct UnrollDpasMxOp :
public UnrollPattern<xegpu::DpasMxOp> {
404 using UnrollPattern<xegpu::DpasMxOp>::UnrollPattern;
405 LogicalResult matchAndRewrite(xegpu::DpasMxOp op,
409 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
410 if (!targetShape || targetShape->size() != 4)
412 auto M = (*targetShape)[0];
413 auto K = (*targetShape)[1];
414 auto N = (*targetShape)[2];
415 auto S = (*targetShape)[3];
417 int64_t aBlockSize[2] = {M, K};
418 int64_t bBlockSize[2] = {K, N};
419 int64_t cBlockSize[2] = {M, N};
420 int64_t aScaleBlockSize[2] = {M, S};
421 int64_t bScaleBlockSize[2] = {S, N};
425 auto c = op.getAcc();
426 auto ascale = dyn_cast<TypedValue<VectorType>>(op.getScaleA());
427 auto bscale = dyn_cast<TypedValue<VectorType>>(op.getScaleB());
433 cVals = packOperandForDpas(c, cBlockSize, loc, rewriter);
436 aScaleVals = packOperandForDpas(ascale, aScaleBlockSize, loc, rewriter);
439 bScaleVals = packOperandForDpas(bscale, bScaleBlockSize, loc, rewriter);
441 VectorType resultTy = op.getResult().getType();
442 auto vecTy = VectorType::get(cBlockSize, resultTy.getElementType());
444 auto aShape = a.getType().getShape();
445 auto bShape =
b.getType().getShape();
446 int64_t mIters = aShape[0] / M;
447 int64_t kIters = aShape[1] / K;
448 int64_t nIters = bShape[1] / N;
451 xegpu::DpasMxOp newDpasMxOp;
452 for (
int64_t i = 0; i < mIters; ++i) {
456 tmpC = cVals[i * nIters +
j];
458 for (
int64_t k = 0; k < kIters; ++k) {
459 Value aVec = aVals[i * kIters + k];
460 Value bVec = bVals[k * nIters +
j];
463 operands.push_back(tmpC);
465 operands.push_back(aScaleVals[i * kIters + k]);
467 operands.push_back(bScaleVals[k * nIters +
j]);
469 newDpasMxOp = xegpu::DpasMxOp::create(
470 rewriter, loc, vecTy, operands,
472 tmpC = newDpasMxOp.getResult();
474 newOps.push_back(newDpasMxOp);
477 Value castOp = unpack(newOps, resultTy, cBlockSize, loc, rewriter);
487struct UnrollLoadGatherOp :
public UnrollPattern<xegpu::LoadGatherOp> {
488 using UnrollPattern<xegpu::LoadGatherOp>::UnrollPattern;
489 LogicalResult matchAndRewrite(xegpu::LoadGatherOp op,
492 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
493 Value offsets = op.getOffsets();
494 Value mask = op.getMask();
496 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
502 if (
auto chunkSizeAttr = op->getAttr(
"chunk_size")) {
503 if (
auto intAttr = llvm::dyn_cast<IntegerAttr>(chunkSizeAttr))
504 chunkSize = intAttr.getInt();
508 VectorType maskTy = llvm::dyn_cast<VectorType>(mask.
getType());
509 VectorType offsetsTy = llvm::dyn_cast<VectorType>(offsets.
getType());
510 Type elemTy = valueTy.getElementType();
511 VectorType newValueTy = VectorType::get(*targetShape, elemTy);
520 targetMaskShape.pop_back();
521 int64_t blockedChunkSize = targetShape->back();
522 int64_t numNewChunks = chunkSize / blockedChunkSize;
523 chunkSize = blockedChunkSize;
525 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
526 convertedOffsetTypes = getUnrolledTypes(offsetsTy, targetMaskShape);
529 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
531 pack(offsets, convertedOffsetTypes, targetMaskShape, loc, rewriter);
533 for (
auto maskVal : convertedMasksBase)
534 convertedMasks.append(numNewChunks, maskVal);
536 for (
auto [baseOffset, offsetType] :
537 llvm::zip(convertedOffsetsBase, convertedOffsetTypes)) {
538 for (
int64_t i = 0; i < numNewChunks; ++i) {
540 i * blockedChunkSize);
542 vector::BroadcastOp::create(rewriter, loc, offsetType, inc);
544 arith::AddIOp::create(rewriter, loc, baseOffset, incVec);
545 convertedOffsets.push_back(offsetVal);
549 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
551 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
553 convertedOffsetTypes = getUnrolledTypes(offsetsTy, *targetShape);
555 pack(offsets, convertedOffsetTypes, *targetShape, loc, rewriter);
558 auto layout = op.getLayoutAttr();
560 layout = layout.dropInstData();
563 for (
auto [o, m] : llvm::zip(convertedOffsets, convertedMasks)) {
564 auto newOp = xegpu::LoadGatherOp::create(
565 rewriter, loc, newValueTy, op.getSource(), o, m,
567 op.getL2HintAttr(), op.getL3HintAttr(), layout);
568 newOps.push_back(newOp);
571 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
581struct UnrollStoreScatterOp :
public UnrollPattern<xegpu::StoreScatterOp> {
582 using UnrollPattern<xegpu::StoreScatterOp>::UnrollPattern;
583 LogicalResult matchAndRewrite(xegpu::StoreScatterOp op,
586 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getValue().getType());
587 Value offsets = op.getOffsets();
588 Value mask = op.getMask();
590 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
595 if (
auto chunkSizeAttr = op->getAttr(
"chunk_size")) {
596 if (
auto intAttr = llvm::dyn_cast<IntegerAttr>(chunkSizeAttr))
597 chunkSize = intAttr.getInt();
601 VectorType maskTy = llvm::dyn_cast<VectorType>(mask.
getType());
602 VectorType offsetsTy = llvm::dyn_cast<VectorType>(offsets.
getType());
610 targetMaskShape.pop_back();
611 int64_t blockedChunkSize = targetShape->back();
612 int64_t numNewChunks = chunkSize / blockedChunkSize;
613 chunkSize = blockedChunkSize;
615 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
616 convertedOffsetTypes = getUnrolledTypes(offsetsTy, targetMaskShape);
619 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
621 pack(offsets, convertedOffsetTypes, targetMaskShape, loc, rewriter);
623 for (
auto maskVal : convertedMasksBase)
624 convertedMasks.append(numNewChunks, maskVal);
626 for (
auto [baseOffset, offsetType] :
627 llvm::zip(convertedOffsetsBase, convertedOffsetTypes)) {
628 for (
int64_t i = 0; i < numNewChunks; ++i) {
630 i * blockedChunkSize);
632 vector::BroadcastOp::create(rewriter, loc, offsetType, inc);
634 arith::AddIOp::create(rewriter, loc, baseOffset, incVec);
635 convertedOffsets.push_back(offsetVal);
639 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
641 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
643 convertedOffsetTypes = getUnrolledTypes(offsetsTy, *targetShape);
645 pack(offsets, convertedOffsetTypes, *targetShape, loc, rewriter);
649 getUnrolledTypes(valueTy, *targetShape);
651 pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
653 auto layout = op.getLayoutAttr();
655 layout = layout.dropInstData();
657 for (
auto [v, o, m] :
658 llvm::zip(convertedValues, convertedOffsets, convertedMasks)) {
659 xegpu::StoreScatterOp::create(rewriter, loc, v, op.getDest(), o, m,
661 op.getL1HintAttr(), op.getL2HintAttr(),
662 op.getL3HintAttr(), layout);
670struct UnrollLoadMatrixOp :
public UnrollPattern<xegpu::LoadMatrixOp> {
671 using UnrollPattern<xegpu::LoadMatrixOp>::UnrollPattern;
672 LogicalResult matchAndRewrite(xegpu::LoadMatrixOp op,
675 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
676 assert(valueTy &&
"the value type must be vector type!");
678 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
679 if (!targetShape || targetShape->size() != (
size_t)valueTy.getRank())
682 Type elemTy = valueTy.getElementType();
684 auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
686 VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
693 rewriter, loc, mixedOffsets,
695 offsetsList.push_back(adds);
699 layout = layout.dropInstData();
701 auto newOp = xegpu::LoadMatrixOp::create(
702 rewriter, op.getLoc(), newValueTy, op.getMemDesc(), offsets, layout);
703 newOps.push_back(newOp);
705 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
711struct UnrollStoreMatrixOp :
public UnrollPattern<xegpu::StoreMatrixOp> {
712 using UnrollPattern<xegpu::StoreMatrixOp>::UnrollPattern;
713 LogicalResult matchAndRewrite(xegpu::StoreMatrixOp op,
715 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
720 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getData().getType());
721 assert(valueTy &&
"the value type must be vector type!");
723 auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
726 getUnrolledTypes(valueTy, *targetShape);
728 pack(op.getData(), convertedValTypes, *targetShape, loc, rewriter);
735 rewriter, loc, mixedOffsets,
737 offsetsList.push_back(adds);
740 for (
auto [v, offsets] : llvm::zip_equal(convertedValues, offsetsList))
741 xegpu::StoreMatrixOp::create(rewriter, loc, v, op.getMemDesc(), offsets,
742 layout.dropInstData());
754struct UnrollConvertLayoutOp :
public UnrollPattern<xegpu::ConvertLayoutOp> {
755 using UnrollPattern<xegpu::ConvertLayoutOp>::UnrollPattern;
756 LogicalResult matchAndRewrite(xegpu::ConvertLayoutOp op,
759 Type valType = op.getType();
761 xegpu::DistributeLayoutAttr inputLayout = op.getInputLayoutAttr();
762 xegpu::DistributeLayoutAttr targetLayout = op.getTargetLayoutAttr();
763 if (!inputLayout || !targetLayout)
768 assert(!inputLayout.dropInstData() && !targetLayout.dropInstData() &&
769 "unexpected layout attributes for scalar type");
773 if (inputLayout.getEffectiveInstDataAsInt().empty() ||
774 targetLayout.getEffectiveInstDataAsInt().empty())
777 inputLayout = inputLayout.dropInstData();
778 targetLayout = targetLayout.dropInstData();
780 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
781 assert(valueTy &&
"the value type must be vector type!");
783 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
784 if (!targetShape || targetShape->size() != (
size_t)valueTy.getRank())
787 Value newSource = op.getSource();
789 if (inputLayout && targetLayout) {
791 getUnrolledTypes(valueTy, *targetShape);
793 pack(op.getOperand(), convertedValTypes, *targetShape, loc, rewriter);
794 for (
auto [v, t] : llvm::zip(convertedValues, convertedValTypes)) {
795 auto newOp = xegpu::ConvertLayoutOp::create(rewriter, loc, t, v,
796 inputLayout, targetLayout);
797 newOps.push_back(newOp);
799 newSource = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
811 patterns.
add<UnrollCreateNdOp, UnrollPrefetchNdOp, UnrollLoadNdOp,
812 UnrollStoreNdOp, UnrollDpasOp, UnrollDpasMxOp,
813 UnrollLoadMatrixOp, UnrollStoreMatrixOp, UnrollLoadGatherOp,
814 UnrollStoreScatterOp, UnrollConvertLayoutOp>(
static llvm::ManagedStatic< PassManagerOptions > options
static std::optional< SmallVector< int64_t > > getTargetShape(const vector::UnrollVectorOptions &options, Operation *op)
Return the target shape for unrolling for the given op.
DenseI64ArrayAttr getDenseI64ArrayAttr(ArrayRef< int64_t > values)
IntegerAttr getI64IntegerAttr(int64_t value)
StringAttr getStringAttr(const Twine &bytes)
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
NamedAttribute represents a combination of a name and an Attribute value.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents a single result from folding an operation.
Operation is the basic unit of execution within MLIR.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
MLIRContext * getContext() const
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void populateXeGPUUnrollPatterns(RewritePatternSet &patterns, const UnrollOptions &options)
Collect a set of patterns to unroll xegpu operations to a smaller shapes.
SmallVector< NamedAttribute > dropInstDataOnAttrs(ArrayRef< NamedAttribute > attrs)
Updates the NamedAttribute sequence by dropping inst-data information from any DistributeLayoutAttr f...
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
Include the generated interface declarations.
OpFoldResult getAsIndexOpFoldResult(MLIRContext *ctx, int64_t val)
Convert int64_t to integer attributes of index type and return them as OpFoldResult.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
int64_t computeProduct(ArrayRef< int64_t > basis)
Self-explicit.
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Options to control the XeGPU unrolling.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.