22#include "llvm/ADT/STLExtras.h"
23#include "llvm/Support/DebugLog.h"
27#define GEN_PASS_DEF_XEGPUUNROLL
28#include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
32#define DEBUG_TYPE "xegpu-unroll"
38template <
typename SourceOp>
48 LDBG() <<
"Get unroll shape for: " << *op;
50 if (
options.filterConstraint && failed(
options.filterConstraint(op))) {
51 LDBG() <<
"--no filter constraint -> BAIL";
56 "expects the native shape for native shape call back function.");
57 auto nativeShape =
options.nativeShape(op);
63 bool returnSingleType =
false)
const {
64 return options.getUnrolledTypes(type, tileShape, returnSingleType);
71 if (
auto vecTy = dyn_cast<VectorType>(destTy)) {
72 auto shape = vecTy.getShape();
76 if (isa<xegpu::TensorDescType>(destTy)) {
81 auto castOp = UnrealizedConversionCastOp::create(
82 rewriter, loc, destTy, srcs,
84 return castOp.getResult(0);
87 llvm_unreachable(
"Unexpected destTy.");
96 if (
auto vecTy = dyn_cast<VectorType>(src.
getType())) {
101 if (isa<xegpu::TensorDescType>(src.
getType())) {
106 auto castOp = UnrealizedConversionCastOp::create(
107 rewriter, loc, destTypes, src,
109 return castOp.getResults();
112 llvm_unreachable(
"Unexpected src type.");
122 auto vecType = cast<VectorType>(operand.
getType());
123 std::optional<SmallVector<int64_t>> grids =
125 assert(grids &&
"Expecting grids to be computed.");
129 VectorType newVecTy =
130 vecType.cloneWith(blockSize, vecType.getElementType());
132 return pack(operand, convertedTypes, blockSize, loc, rewriter);
136 const char *
const packAttrName =
"__xegpu_blocking_pack__";
137 const char *
const unpackAttrName =
"__xegpu_blocking_unpack__";
138 const char *
const blockAttrName =
"__xegpu_blocking_tile_shape__";
154 int64_t rank = tdescTy.getRank();
162 auto aV = llvm::cast<Value>(a);
164 return rewriter.
createOrFold<arith::AddIOp>(loc, aV, bV);
169 llvm::drop_begin(mixedOffsets, mixedOffsets.size() - rank));
171 llvm::seq<int64_t>(mixedOffsets.size() - rank, mixedOffsets.size());
177 for (
auto [idx, oldOff, offset] :
178 llvm::zip(validIdxes, oldOffsets, offsets))
179 mixedOffsets[idx] = addi(oldOff, offset);
181 auto newOp = createOp(mixedOffsets);
182 newOps.push_back(newOp);
187struct UnrollCreateNdOp :
public UnrollPattern<xegpu::CreateNdDescOp> {
188 using UnrollPattern<xegpu::CreateNdDescOp>::UnrollPattern;
189 LogicalResult matchAndRewrite(xegpu::CreateNdDescOp op,
192 xegpu::TensorDescType tdescTy = op.getType();
194 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
200 auto newTdescTy = getUnrolledTypes(tdescTy, *targetShape)[0];
202 xegpu::CreateNdDescOp::create(rewriter, loc, newTdescTy, op.getSource(),
203 op.getMixedSizes(), op.getMixedStrides());
204 newOps.push_back(newOp);
205 Value castOp = unpack(newOps, tdescTy, *targetShape, loc, rewriter);
212struct UnrollPrefetchNdOp :
public UnrollPattern<xegpu::PrefetchNdOp> {
213 using UnrollPattern<xegpu::PrefetchNdOp>::UnrollPattern;
214 LogicalResult matchAndRewrite(xegpu::PrefetchNdOp op,
217 xegpu::TensorDescType tdescTy = op.getTensorDescType();
219 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
223 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
225 layout = layout.dropInstData();
228 getUnrolledTypes(tdescTy, *targetShape,
true);
231 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
234 xegpu::PrefetchNdOp::create(rewriter, loc, convertedTdesc[0], offsets,
235 op.getL1HintAttr(), op.getL2HintAttr(),
236 op.getL3HintAttr(), layout);
241 computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
242 createPrefetch, loc, rewriter);
249struct UnrollLoadNdOp :
public UnrollPattern<xegpu::LoadNdOp> {
250 using UnrollPattern<xegpu::LoadNdOp>::UnrollPattern;
251 LogicalResult matchAndRewrite(xegpu::LoadNdOp op,
255 VectorType valueTy = op.getType();
256 xegpu::TensorDescType tdescTy = op.getTensorDescType();
258 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
262 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
264 layout = layout.dropInstData();
266 Type elemTy = tdescTy.getElementType();
267 VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
270 getUnrolledTypes(tdescTy, *targetShape,
true);
273 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
277 return xegpu::LoadNdOp::create(
278 rewriter, loc, newValueTy, convertedTdescs[0], offsets,
279 op.getPackedAttr(), op.getTransposeAttr(), op.getL1HintAttr(),
280 op.getL2HintAttr(), op.getL3HintAttr(), layout);
282 newOps = computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
283 createLoad, loc, rewriter);
285 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
292struct UnrollStoreNdOp :
public UnrollPattern<xegpu::StoreNdOp> {
293 using UnrollPattern<xegpu::StoreNdOp>::UnrollPattern;
294 LogicalResult matchAndRewrite(xegpu::StoreNdOp op,
297 VectorType valueTy = op.getValueType();
298 xegpu::TensorDescType tdescTy = op.getTensorDescType();
300 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
304 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
306 layout = layout.dropInstData();
309 getUnrolledTypes(valueTy, *targetShape);
311 getUnrolledTypes(tdescTy, *targetShape,
true);
314 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
317 pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
319 size_t valueIndex = 0;
321 xegpu::StoreNdOp::create(rewriter, loc, convertedValues[valueIndex++],
322 convertedTdescs[0], offsets, op.getL1HintAttr(),
323 op.getL2HintAttr(), op.getL3HintAttr(), layout);
328 computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
329 createStore, loc, rewriter);
336struct UnrollDpasOp :
public UnrollPattern<xegpu::DpasOp> {
337 using UnrollPattern<xegpu::DpasOp>::UnrollPattern;
338 LogicalResult matchAndRewrite(xegpu::DpasOp op,
342 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
343 if (!targetShape || targetShape->size() != 3)
345 auto M = (*targetShape)[0];
346 auto K = (*targetShape)[1];
347 auto N = (*targetShape)[2];
349 int64_t aBlockSize[2] = {M, K};
350 int64_t bBlockSize[2] = {K, N};
351 int64_t cBlockSize[2] = {M, N};
353 auto a = op.getLhs();
354 auto b = op.getRhs();
355 auto c = op.getAcc();
361 cVals = packOperandForDpas(c, cBlockSize, loc, rewriter);
365 if (llvm::any_of(ranges, [](
auto &v) {
return v.size() == 0; }) ||
366 llvm::all_of(ranges, [](
auto &v) {
return v.size() == 1; }))
369 VectorType resultTy = op.getResult().getType();
370 auto vecTy = VectorType::get(cBlockSize, resultTy.getElementType());
372 auto aShape = a.getType().getShape();
373 auto bShape =
b.getType().getShape();
374 int64_t mIters = aShape[0] / M;
375 int64_t kIters = aShape[1] / K;
376 int64_t nIters = bShape[1] / N;
379 for (
int64_t i = 0; i < mIters; ++i) {
383 tmpC = cVals[i * nIters +
j];
385 for (
int64_t k = 0; k < kIters; ++k) {
386 Value aVec = aVals[i * kIters + k];
387 Value bVec = bVals[k * nIters +
j];
390 operands.push_back(tmpC);
393 xegpu::DpasOp::create(rewriter, loc, vecTy, operands,
396 newOps.push_back(tmpC);
399 Value castOp = unpack(newOps, resultTy, cBlockSize, loc, rewriter);
405struct UnrollDpasMxOp :
public UnrollPattern<xegpu::DpasMxOp> {
406 using UnrollPattern<xegpu::DpasMxOp>::UnrollPattern;
407 LogicalResult matchAndRewrite(xegpu::DpasMxOp op,
411 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
412 if (!targetShape || targetShape->size() != 4)
414 auto M = (*targetShape)[0];
415 auto K = (*targetShape)[1];
416 auto N = (*targetShape)[2];
417 auto S = (*targetShape)[3];
419 int64_t aBlockSize[2] = {M, K};
420 int64_t bBlockSize[2] = {K, N};
421 int64_t cBlockSize[2] = {M, N};
422 int64_t aScaleBlockSize[2] = {M, S};
423 int64_t bScaleBlockSize[2] = {S, N};
427 auto c = op.getAcc();
428 auto ascale = dyn_cast<TypedValue<VectorType>>(op.getScaleA());
429 auto bscale = dyn_cast<TypedValue<VectorType>>(op.getScaleB());
435 cVals = packOperandForDpas(c, cBlockSize, loc, rewriter);
438 aScaleVals = packOperandForDpas(ascale, aScaleBlockSize, loc, rewriter);
441 bScaleVals = packOperandForDpas(bscale, bScaleBlockSize, loc, rewriter);
443 VectorType resultTy = op.getResult().getType();
444 auto vecTy = VectorType::get(cBlockSize, resultTy.getElementType());
446 auto aShape = a.getType().getShape();
447 auto bShape =
b.getType().getShape();
448 int64_t mIters = aShape[0] / M;
449 int64_t kIters = aShape[1] / K;
450 int64_t nIters = bShape[1] / N;
453 xegpu::DpasMxOp newDpasMxOp;
454 for (
int64_t i = 0; i < mIters; ++i) {
458 tmpC = cVals[i * nIters +
j];
460 for (
int64_t k = 0; k < kIters; ++k) {
461 Value aVec = aVals[i * kIters + k];
462 Value bVec = bVals[k * nIters +
j];
465 operands.push_back(tmpC);
467 operands.push_back(aScaleVals[i * kIters + k]);
469 operands.push_back(bScaleVals[k * nIters +
j]);
471 newDpasMxOp = xegpu::DpasMxOp::create(
472 rewriter, loc, vecTy, operands,
474 tmpC = newDpasMxOp.getResult();
476 newOps.push_back(newDpasMxOp);
479 Value castOp = unpack(newOps, resultTy, cBlockSize, loc, rewriter);
489struct UnrollLoadGatherOp :
public UnrollPattern<xegpu::LoadGatherOp> {
490 using UnrollPattern<xegpu::LoadGatherOp>::UnrollPattern;
491 LogicalResult matchAndRewrite(xegpu::LoadGatherOp op,
494 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
495 Value offsets = op.getOffsets();
496 Value mask = op.getMask();
498 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
504 if (
auto chunkSizeAttr = op->getAttr(
"chunk_size")) {
505 if (
auto intAttr = llvm::dyn_cast<IntegerAttr>(chunkSizeAttr))
506 chunkSize = intAttr.getInt();
510 VectorType maskTy = llvm::dyn_cast<VectorType>(mask.
getType());
511 VectorType offsetsTy = llvm::dyn_cast<VectorType>(offsets.
getType());
512 Type elemTy = valueTy.getElementType();
513 VectorType newValueTy = VectorType::get(*targetShape, elemTy);
522 targetMaskShape.pop_back();
523 int64_t blockedChunkSize = targetShape->back();
524 int64_t numNewChunks = chunkSize / blockedChunkSize;
525 chunkSize = blockedChunkSize;
527 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
528 convertedOffsetTypes = getUnrolledTypes(offsetsTy, targetMaskShape);
531 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
533 pack(offsets, convertedOffsetTypes, targetMaskShape, loc, rewriter);
535 for (
auto maskVal : convertedMasksBase)
536 convertedMasks.append(numNewChunks, maskVal);
538 for (
auto [baseOffset, offsetType] :
539 llvm::zip(convertedOffsetsBase, convertedOffsetTypes)) {
540 for (
int64_t i = 0; i < numNewChunks; ++i) {
542 i * blockedChunkSize);
544 vector::BroadcastOp::create(rewriter, loc, offsetType, inc);
546 arith::AddIOp::create(rewriter, loc, baseOffset, incVec);
547 convertedOffsets.push_back(offsetVal);
551 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
553 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
555 convertedOffsetTypes = getUnrolledTypes(offsetsTy, *targetShape);
557 pack(offsets, convertedOffsetTypes, *targetShape, loc, rewriter);
560 auto layout = op.getLayoutAttr();
562 layout = layout.dropInstData();
565 for (
auto [o, m] : llvm::zip(convertedOffsets, convertedMasks)) {
566 auto newOp = xegpu::LoadGatherOp::create(
567 rewriter, loc, newValueTy, op.getSource(), o, m,
569 op.getL2HintAttr(), op.getL3HintAttr(), layout);
570 newOps.push_back(newOp);
573 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
583struct UnrollStoreScatterOp :
public UnrollPattern<xegpu::StoreScatterOp> {
584 using UnrollPattern<xegpu::StoreScatterOp>::UnrollPattern;
585 LogicalResult matchAndRewrite(xegpu::StoreScatterOp op,
588 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getValue().getType());
589 Value offsets = op.getOffsets();
590 Value mask = op.getMask();
592 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
597 if (
auto chunkSizeAttr = op->getAttr(
"chunk_size")) {
598 if (
auto intAttr = llvm::dyn_cast<IntegerAttr>(chunkSizeAttr))
599 chunkSize = intAttr.getInt();
603 VectorType maskTy = llvm::dyn_cast<VectorType>(mask.
getType());
604 VectorType offsetsTy = llvm::dyn_cast<VectorType>(offsets.
getType());
612 targetMaskShape.pop_back();
613 int64_t blockedChunkSize = targetShape->back();
614 int64_t numNewChunks = chunkSize / blockedChunkSize;
615 chunkSize = blockedChunkSize;
617 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
618 convertedOffsetTypes = getUnrolledTypes(offsetsTy, targetMaskShape);
621 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
623 pack(offsets, convertedOffsetTypes, targetMaskShape, loc, rewriter);
625 for (
auto maskVal : convertedMasksBase)
626 convertedMasks.append(numNewChunks, maskVal);
628 for (
auto [baseOffset, offsetType] :
629 llvm::zip(convertedOffsetsBase, convertedOffsetTypes)) {
630 for (
int64_t i = 0; i < numNewChunks; ++i) {
632 i * blockedChunkSize);
634 vector::BroadcastOp::create(rewriter, loc, offsetType, inc);
636 arith::AddIOp::create(rewriter, loc, baseOffset, incVec);
637 convertedOffsets.push_back(offsetVal);
641 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
643 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
645 convertedOffsetTypes = getUnrolledTypes(offsetsTy, *targetShape);
647 pack(offsets, convertedOffsetTypes, *targetShape, loc, rewriter);
651 getUnrolledTypes(valueTy, *targetShape);
653 pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
655 auto layout = op.getLayoutAttr();
657 layout = layout.dropInstData();
659 for (
auto [v, o, m] :
660 llvm::zip(convertedValues, convertedOffsets, convertedMasks)) {
661 xegpu::StoreScatterOp::create(rewriter, loc, v, op.getDest(), o, m,
663 op.getL1HintAttr(), op.getL2HintAttr(),
664 op.getL3HintAttr(), layout);
672struct UnrollLoadMatrixOp :
public UnrollPattern<xegpu::LoadMatrixOp> {
673 using UnrollPattern<xegpu::LoadMatrixOp>::UnrollPattern;
674 LogicalResult matchAndRewrite(xegpu::LoadMatrixOp op,
677 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
678 assert(valueTy &&
"the value type must be vector type!");
680 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
681 if (!targetShape || targetShape->size() != (
size_t)valueTy.getRank())
684 Type elemTy = valueTy.getElementType();
686 auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
688 VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
695 rewriter, loc, mixedOffsets,
697 offsetsList.push_back(adds);
701 layout = layout.dropInstData();
703 auto newOp = xegpu::LoadMatrixOp::create(
704 rewriter, op.getLoc(), newValueTy, op.getMemDesc(), offsets, layout);
705 newOps.push_back(newOp);
707 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
713struct UnrollStoreMatrixOp :
public UnrollPattern<xegpu::StoreMatrixOp> {
714 using UnrollPattern<xegpu::StoreMatrixOp>::UnrollPattern;
715 LogicalResult matchAndRewrite(xegpu::StoreMatrixOp op,
717 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
722 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getData().getType());
723 assert(valueTy &&
"the value type must be vector type!");
725 auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
728 getUnrolledTypes(valueTy, *targetShape);
730 pack(op.getData(), convertedValTypes, *targetShape, loc, rewriter);
737 rewriter, loc, mixedOffsets,
739 offsetsList.push_back(adds);
742 for (
auto [v, offsets] : llvm::zip_equal(convertedValues, offsetsList))
743 xegpu::StoreMatrixOp::create(rewriter, loc, v, op.getMemDesc(), offsets,
744 layout.dropInstData());
756struct UnrollConvertLayoutOp :
public UnrollPattern<xegpu::ConvertLayoutOp> {
757 using UnrollPattern<xegpu::ConvertLayoutOp>::UnrollPattern;
758 LogicalResult matchAndRewrite(xegpu::ConvertLayoutOp op,
761 Type valType = op.getType();
763 xegpu::DistributeLayoutAttr inputLayout = op.getInputLayoutAttr();
764 xegpu::DistributeLayoutAttr targetLayout = op.getTargetLayoutAttr();
765 if (!inputLayout || !targetLayout)
770 assert(!inputLayout.dropInstData() && !targetLayout.dropInstData() &&
771 "unexpected layout attributes for scalar type");
775 if (inputLayout.getEffectiveInstDataAsInt().empty() ||
776 targetLayout.getEffectiveInstDataAsInt().empty())
779 inputLayout = inputLayout.dropInstData();
780 targetLayout = targetLayout.dropInstData();
782 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
783 assert(valueTy &&
"the value type must be vector type!");
785 std::optional<SmallVector<int64_t>> targetShape =
getTargetShape(op);
786 if (!targetShape || targetShape->size() != (
size_t)valueTy.getRank())
789 Value newSource = op.getSource();
791 if (inputLayout && targetLayout) {
793 getUnrolledTypes(valueTy, *targetShape);
795 pack(op.getOperand(), convertedValTypes, *targetShape, loc, rewriter);
796 for (
auto [v, t] : llvm::zip(convertedValues, convertedValTypes)) {
797 auto newOp = xegpu::ConvertLayoutOp::create(rewriter, loc, t, v,
798 inputLayout, targetLayout);
799 newOps.push_back(newOp);
801 newSource = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
823struct UnrollMultiReductionOp
824 :
public UnrollPattern<vector::MultiDimReductionOp> {
828 : UnrollPattern<vector::MultiDimReductionOp>(context,
options, benefit) {}
830 LogicalResult matchAndRewrite(vector::MultiDimReductionOp reductionOp,
832 VectorType srcTy = reductionOp.getSourceVectorType();
834 int64_t srcRank = srcTy.getRank();
836 Location loc = reductionOp.getLoc();
837 Value source = reductionOp.getSource();
839 vector::CombiningKind kind = reductionOp.getKind();
842 auto resultType = dyn_cast<VectorType>(reductionOp.getDestType());
846 std::optional<SmallVector<int64_t>> targetShapeOpt =
848 if (!targetShapeOpt ||
849 static_cast<int64_t>(targetShapeOpt->size()) != srcRank)
855 for (
int64_t i = 0; i < srcRank; ++i) {
856 if (srcShape[i] % targetShape[i] != 0)
863 for (
int64_t i = 0; i < srcRank; ++i) {
864 if (reductionMask[i])
865 reducedDims.push_back(i);
867 keptDims.push_back(i);
874 numReducedTilesPerDim.push_back(srcShape[d] / targetShape[d]);
879 keptShape.push_back(srcShape[d]);
880 keptTileShape.push_back(targetShape[d]);
884 Value result = arith::ConstantOp::create(rewriter, loc, resultType,
897 for (
auto [idx, dim] : llvm::enumerate(keptDims))
898 baseOffsets[dim] = keptOffsets[idx];
914 for (
auto [idx, dim] : llvm::enumerate(reducedDims))
915 offsets[dim] = reducedTileIdx[idx] * targetShape[dim];
917 Value tile = vector::ExtractStridedSliceOp::create(
918 rewriter, loc, source, offsets, targetShape, strides);
919 tiles.push_back(
tile);
923 Value reduced = tiles[0];
924 for (
size_t i = 1; i < tiles.size(); ++i)
930 Value accSlice = vector::ExtractStridedSliceOp::create(
931 rewriter, loc,
acc, keptOffsets, keptTileShape, accStrides);
933 auto newReduction = vector::MultiDimReductionOp::create(
934 rewriter, loc, reduced, accSlice, reductionMask, kind);
938 result = vector::InsertStridedSliceOp::create(
939 rewriter, loc, newReduction,
result, keptOffsets, dstStrides);
952 .
add<UnrollCreateNdOp, UnrollPrefetchNdOp, UnrollLoadNdOp,
953 UnrollStoreNdOp, UnrollDpasOp, UnrollDpasMxOp, UnrollLoadMatrixOp,
954 UnrollStoreMatrixOp, UnrollLoadGatherOp, UnrollStoreScatterOp,
955 UnrollConvertLayoutOp, UnrollMultiReductionOp>(patterns.
getContext(),
static llvm::ManagedStatic< PassManagerOptions > options
static std::optional< SmallVector< int64_t > > getTargetShape(const vector::UnrollVectorOptions &options, Operation *op)
Return the target shape for unrolling for the given op.
DenseI64ArrayAttr getDenseI64ArrayAttr(ArrayRef< int64_t > values)
IntegerAttr getI64IntegerAttr(int64_t value)
StringAttr getStringAttr(const Twine &bytes)
TypedAttr getZeroAttr(Type type)
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
NamedAttribute represents a combination of a name and an Attribute value.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents a single result from folding an operation.
Operation is the basic unit of execution within MLIR.
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
MLIRContext * getContext() const
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Value makeArithReduction(OpBuilder &b, Location loc, CombiningKind kind, Value v1, Value acc, arith::FastMathFlagsAttr fastmath=nullptr, Value mask=nullptr)
Returns the result value of reducing two scalar/vector values with the corresponding arith operation.
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void populateXeGPUUnrollPatterns(RewritePatternSet &patterns, const UnrollOptions &options)
Collect a set of patterns to unroll xegpu operations to a smaller shapes.
SmallVector< NamedAttribute > dropInstDataOnAttrs(ArrayRef< NamedAttribute > attrs)
Updates the NamedAttribute sequence by dropping inst-data information from any DistributeLayoutAttr f...
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
Include the generated interface declarations.
OpFoldResult getAsIndexOpFoldResult(MLIRContext *ctx, int64_t val)
Convert int64_t to integer attributes of index type and return them as OpFoldResult.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
int64_t computeProduct(ArrayRef< int64_t > basis)
Self-explicit.
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Options to control the XeGPU unrolling.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.