doxygen/XeGPUBlocking_8cpp_source.html

 //===---- XeGPUBlocking.cpp ---- XeGPU Blocking Pass ----------------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/XeGPU/Transforms/Passes.h"


 #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"

 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"

 #include "mlir/Dialect/XeGPU/Transforms/Transforms.h"

 #include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"

 #include "mlir/Interfaces/LoopLikeInterface.h"

 #include "mlir/Pass/PassManager.h"

 #include "mlir/Transforms/DialectConversion.h"

 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"

 #include "llvm/ADT/STLExtras.h"

 #include "llvm/Support/DebugLog.h"


 namespace mlir {

 namespace xegpu {

 #define GEN_PASS_DEF_XEGPUBLOCKING

 #include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"

 } // namespace xegpu

 } // namespace mlir


 #define DEBUG_TYPE "xegpu-blocking"


 using namespace mlir;


 namespace {


 // reslove the unrealized conversion cast ops generated when doing SCF

 // Structural Type Conversion. It will have two formats, N:1 vector

 // cast and 1:N vector cast. vector::insert_strided_slice ops will be

 // used for the first case, and vector::extract_strided_slice ops will be

 // used for the second case.

 static void

 resolveUnrealizedConversionCastOp(UnrealizedConversionCastOp castOp) {

   ValueRange inputs = castOp.getInputs();

   ValueRange outputs = castOp.getOutputs();


   auto hasIdenticalVectorTypes = [](ValueRange values) {

     auto types = values.getTypes();

     return llvm::all_of(types, [&](Type type) {

       return isa<VectorType>(type) && type == types.front();

     });

   };


   // We only interest in the case where all inputs and outputs have the

   // identical VectorTypes

   if (!hasIdenticalVectorTypes(inputs) || !hasIdenticalVectorTypes(outputs)) {

     LDBG() << "skip unrealized conversion cast op not emulating pack/unpack.";

     return;

   }


   VectorType outputTy = dyn_cast<VectorType>(outputs[0].getType());

   OpBuilder builder(castOp);

   if (inputs.size() > 1 && outputs.size() == 1) {

     // the castOp is emulating an unpack op

     ArrayRef<int64_t> shape = outputTy.getShape();

     Value result = xegpu::createVectorWithShapeFromValues(

         builder, castOp.getLoc(), inputs, shape);

     castOp->replaceAllUsesWith(ValueRange(result));

     castOp->erase();

   } else if (castOp.getNumResults() > 1 && castOp.getNumOperands() == 1) {

     // the castOp is emulating a pack op

     ArrayRef<int64_t> tileShape = outputTy.getShape();

     SmallVector<Value> results = xegpu::extractVectorsWithShapeFromValue(

         builder, castOp.getLoc(), inputs[0], tileShape);

     castOp->replaceAllUsesWith(results);

     castOp->erase();

   }

 }


 // This pattern lowers ConvertLayoutOp by removing the inst_data field from the

 // layout attributes. Since both producer and consumer operations handle data

 // partitioning based on their own inst_data, while maintaining original input

 // and output shape, ConvertLayoutOp does not need to manage inst_data.

 struct ConvertLayoutOpPattern

     : public OpRewritePattern<xegpu::ConvertLayoutOp> {

   using OpRewritePattern::OpRewritePattern;

   LogicalResult matchAndRewrite(xegpu::ConvertLayoutOp op,

                                 PatternRewriter &rewriter) const override {

     xegpu::LayoutAttr input_layout = op.getInputLayoutAttr();

     xegpu::LayoutAttr target_layout = op.getTargetLayoutAttr();

     if (!input_layout.getInstData() || !target_layout.getInstData())

       return rewriter.notifyMatchFailure(op, "Not a target ConvertLayoutOp.");


     input_layout = input_layout.dropInstData();

     target_layout = target_layout.dropInstData();

     auto newOp = rewriter.createOrFold<xegpu::ConvertLayoutOp>(

         op.getLoc(), op.getType(), op.getSource(), input_layout, target_layout);

     rewriter.replaceOp(op, newOp);

     return success();

   }

 };


 //===------------------------------------------------------------------------===//

 // The XeGPUBlockingPass leverages the unroll patterns for XeGPU and Vector ops

 // to partition operations that process large shapes into multiple operations on

 // smaller shapes, as specified by the inst_data in the layout attribute. This

 // enables each resulting operation to be efficiently mapped to a hardware

 // instruction.

 //===------------------------------------------------------------------------===//


 class XeGPUBlockingPass final

     : public xegpu::impl::XeGPUBlockingBase<XeGPUBlockingPass> {

 public:

   void runOnOperation() override;


 private:

   // Get the tile shape for a given OpOperand or OpResult by examining the

   // corresponding layout attribute. If layout is not present or is not a

   // subgroup level layout, it returns std::nullopt.

   template <typename T,

             typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||

                                         std::is_same_v<T, OpResult>>>

   std::optional<SmallVector<int64_t>>

   getTileShape(const T &operandOrResult) const;


   // Get the tile shape for a given operation.

   std::optional<SmallVector<int64_t>> getTileShape(Operation *op) const;


   // Determine if the operation requires unrolling. Return false if all operands

   // and results have tile shapes identical to their original types. Otherwise,

   // return true.

   bool needsUnroll(Operation *op) const;

 };

 } // namespace


 template <typename T, typename>

 std::optional<SmallVector<int64_t>>

 XeGPUBlockingPass::getTileShape(const T &operandOrResult) const {

   Value value;

   if constexpr (std::is_same_v<T, OpOperand>)

     value = operandOrResult.get();

   else

     value = (Value)operandOrResult;


   xegpu::LayoutAttr layout = xegpu::getLayoutAttr(operandOrResult);

   if (layout && layout.isSgLayout()) {

     if (auto inst_data = layout.getInstData())

       return llvm::to_vector_of<int64_t>(inst_data.asArrayRef());


     if (auto type = dyn_cast<ShapedType>(value.getType()))

       return llvm::to_vector(type.getShape());

   }

   LDBG() << "failed to getTileShape for: " << value;

   return std::nullopt;

 }


 std::optional<SmallVector<int64_t>>

 XeGPUBlockingPass::getTileShape(Operation *op) const {

   if (isa<xegpu::CreateNdDescOp, xegpu::UpdateNdOffsetOp, xegpu::CreateDescOp,

           xegpu::UpdateOffsetOp>(op))

     return getTileShape(op->getOpResult(0));

   if (isa<xegpu::PrefetchNdOp, xegpu::LoadNdOp, xegpu::PrefetchOp,

           xegpu::LoadGatherOp>(op))

     return getTileShape(op->getOpOperand(0));

   if (isa<xegpu::StoreNdOp, xegpu::StoreScatterOp>(op))

     return getTileShape(op->getOpOperand(1));


   if (isa<xegpu::DpasOp>(op)) {

     std::optional<SmallVector<int64_t>> aTile =

         getTileShape(op->getOpOperand(0));

     std::optional<SmallVector<int64_t>> bTile =

         getTileShape(op->getOpOperand(1));


     if (!aTile || aTile->size() != 2 || !bTile || bTile->size() != 2)

       return std::nullopt;


     // semantic check for A and B

     if ((*aTile)[1] != (*bTile)[0])

       return std::nullopt;


     // semantic check for C

     if (op->getNumOperands() == 3) {

       std::optional<SmallVector<int64_t>> cTile =

           getTileShape(op->getOpOperand(2));

       int64_t expectedCTile[2] = {(*aTile)[0], (*bTile)[1]};

       if (!cTile || !llvm::equal(*cTile, expectedCTile))

         return std::nullopt;

     }


     return SmallVector<int64_t>({(*aTile)[0], (*aTile)[1], (*bTile)[1]});

   }


   if (OpTrait::hasElementwiseMappableTraits(op) && op->getNumResults() == 1)

     return getTileShape(op->getOpResult(0));


   if (isa<vector::MultiDimReductionOp>(op))

     return getTileShape(op->getOpOperand(0));


   if (isa<vector::TransposeOp, vector::BroadcastOp>(op))

     return getTileShape(op->getOpResult(0));


   return std::nullopt;

 }


 bool XeGPUBlockingPass::needsUnroll(Operation *op) const {

   // skip the op if any of its operands or results has workgroup level layouts

   bool hasWgLayoutOperands =

       llvm::any_of(op->getOpOperands(), [](OpOperand &opr) {

         xegpu::LayoutAttr layout = xegpu::getLayoutAttr(opr);

         return layout && layout.isWgLayout();

       });

   bool hasWgLayoutResults =

       llvm::any_of(op->getOpResults(), [](OpResult result) {

         xegpu::LayoutAttr layout = xegpu::getLayoutAttr(result);

         return layout && layout.isWgLayout();

       });

   if (hasWgLayoutOperands || hasWgLayoutResults) {

     LDBG() << "skip unrolling for op with workgroup level layout: " << *op;

     return false;

   }


   auto isUnrollable = [](Value value, ArrayRef<int64_t> tileShape) {

     Type valTy = value.getType();

     if (auto tdescTy = dyn_cast<xegpu::TensorDescType>(valTy)) {

       xegpu::LayoutAttr layout = tdescTy.getLayoutAttr();

       return layout && layout.getInstData();

     }

     auto shapedType = dyn_cast<ShapedType>(valTy);

     return shapedType && !llvm::equal(tileShape, shapedType.getShape());

   };


   bool hasUnrollableOperands =

       llvm::any_of(op->getOpOperands(), [&](OpOperand &opr) {

         std::optional<SmallVector<int64_t>> tileShape = getTileShape(opr);

         return tileShape.has_value() && isUnrollable(opr.get(), *tileShape);

       });

   bool hasUnrollableResults =

       llvm::any_of(op->getOpResults(), [&](OpResult result) {

         std::optional<SmallVector<int64_t>> tileShape = getTileShape(result);

         return tileShape.has_value() && isUnrollable(result, *tileShape);

       });

   return hasUnrollableOperands || hasUnrollableResults;

 }


 void XeGPUBlockingPass::runOnOperation() {

   MLIRContext *ctx = &getContext();

   Operation *op = getOperation();


   // Preserve the LayoutAttr for each operand to the owner's DictionaryAttr.

   // This ensures that the LayoutAttr remains accessible even if the defining

   // operation is replaced.

   xegpu::setLayoutAttrs(op, [](Value v) { return xegpu::getLayoutAttr(v); });


   auto getTileShapeAndCount = [](llvm::ArrayRef<int64_t> shape,

                                  xegpu::LayoutAttr layout) {

     int count = 1;

     SmallVector<int64_t> tileShape(shape);

     if (layout && layout.getInstData()) {

       DenseI32ArrayAttr instData = layout.getInstData();

       tileShape = llvm::to_vector_of<int64_t>(instData.asArrayRef());

       count = computeProduct(shape) / computeProduct(tileShape);

     }

     return std::make_pair(tileShape, count);

   };


   // Perform type conversion for SCF control folow ops

   TypeConverter converter;

   converter.addConversion([](Type type) -> Type { return type; });

   converter.addConversion(

       [&](RankedTensorType type,

           SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {

         Type elemTy = type.getElementType();

         ArrayRef<int64_t> shape = type.getShape();


         auto layout =

             llvm::dyn_cast_if_present<xegpu::LayoutAttr>(type.getEncoding());

         if (layout && layout.isWgLayout())

           return failure();


         int count;

         SmallVector<int64_t> subShape;

         std::tie(subShape, count) = getTileShapeAndCount(shape, layout);

         auto newTy = VectorType::get(subShape, elemTy);

         result.append(count, newTy);

         return success();

       });

   converter.addConversion(

       [&](xegpu::TensorDescType type,

           SmallVectorImpl<Type> &result) -> std::optional<LogicalResult> {

         Type elemTy = type.getElementType();

         ArrayRef<int64_t> shape = type.getShape();


         xegpu::LayoutAttr layout = type.getLayoutAttr();

         if (layout && layout.isWgLayout())

           return failure();


         int count;

         SmallVector<int64_t> subShape;

         std::tie(subShape, count) = getTileShapeAndCount(shape, layout);


         if (layout)

           layout = layout.dropInstData();


         auto newTy = xegpu::TensorDescType::get(

             type.getContext(), subShape, elemTy, type.getEncoding(), layout);

         result.append(count, newTy);

         return success();

       });


   xegpu::doSCFStructuralTypeConversionWithTensorType(op, converter);


   xegpu::UnrollOptions options;

   options.setFilterConstraint(

       [&](Operation *op) -> LogicalResult { return success(needsUnroll(op)); });


   options.setNativeShapeFn([&](Operation *op) { return getTileShape(op); });


   options.setUnrolledTypesFn([&](ShapedType type, ArrayRef<int64_t> tileShape) {

     Type elemTy = type.getElementType();

     Type newTy;


     if (auto tdescTy = dyn_cast<xegpu::TensorDescType>(type)) {


       Attribute encoding = tdescTy.getEncoding();

       // If the encoding is a ScatterTensorDescAttr, we need to

       // potentially adjust the chunk size based on the inst_data.

       if (tdescTy.isScattered()) {

         int64_t chunkSize = tdescTy.getChunkSizeAsInt();


         if (chunkSize > 1) {

           int64_t blockedChunkSize = chunkSize;

           auto instData = tdescTy.getLayoutAttr().getInstData();

           if (!instData.empty())

             blockedChunkSize = instData.asArrayRef().back();


           // To create a new attribute with a different chunk_size:

           auto newEncoding = xegpu::ScatterTensorDescAttr::get(

               ctx, tdescTy.getMemorySpace(), blockedChunkSize);


           encoding = newEncoding;

         }

       }


       newTy =

           xegpu::TensorDescType::get(ctx, tileShape, elemTy, encoding,

                                      tdescTy.getLayoutAttr().dropInstData());

     } else {

       newTy = type.clone(tileShape, elemTy);

     }


     std::optional<SmallVector<int64_t>> ratio =

         computeShapeRatio(type.getShape(), tileShape);

     assert(ratio && "The shape of the type must be a multiple of tileShape.");

     return SmallVector<Type>(computeProduct(*ratio), newTy);

   });


   RewritePatternSet patterns(ctx);

   patterns.add<ConvertLayoutOpPattern>(ctx);


   vector::UnrollVectorOptions vectorOptions;

   vectorOptions.setNativeShapeFn(options.nativeShape);


   populateXeGPUUnrollPatterns(patterns, options);

   vector::populateVectorUnrollPatterns(patterns, vectorOptions);


   (void)applyPatternsGreedily(op, std::move(patterns));


   op->walk([](Operation *op) {

     // Remove the layout attributes cached per operands.

     for (OpOperand &opr : op->getOpOperands()) {

       std::string name = xegpu::getLayoutName(opr);

       if (op->hasAttrOfType<xegpu::LayoutAttr>(name))

         op->removeAttr(name);

     }


     // Update the layout attributes per result.

     for (OpResult result : op->getOpResults()) {

       std::string name = xegpu::getLayoutName(result);

       if (auto layout = op->getAttrOfType<xegpu::LayoutAttr>(name)) {

         op->removeAttr(name);

         if (!isa<LoopLikeOpInterface>(op))

           xegpu::setLayoutAttr(result, layout.dropInstData());

       }

     }


     // Resolve unrealized conversion cast ops emulating pack/unpack

     if (auto castOp = dyn_cast<UnrealizedConversionCastOp>(op))

       resolveUnrealizedConversionCastOp(castOp);

   });

 }

DialectConversion.h

Passes.h

GreedyPatternRewriteDriver.h

getContext
static MLIRContext * getContext(OpFoldResult val)
Definition: IndexingUtils.cpp:296

LoopLikeInterface.h

getTileShape
static std::array< int64_t, 2 > getTileShape(ArrayRef< int64_t > operandShape, Type elementType, int64_t lineSizeBits)
Returns the number of 8 x [128|256|512] bit tiles that compose the given operand shape.
Definition: MMAUtils.cpp:37

options
static llvm::ManagedStatic< PassManagerOptions > options
Definition: PassManagerOptions.cpp:89

PassManager.h

VectorTransforms.h

XeGPUUtils.h

XeGPU.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVectorImpl
Definition: LLVM.h:74

llvm::SmallVector
Definition: LLVM.h:72

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpBuilder::createOrFold
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:517

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:257

mlir::OpResult
This is a value defined by a result of an operation.
Definition: Value.h:447

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getOpResult
OpResult getOpResult(unsigned idx)
Definition: Operation.h:421

mlir::Operation::getAttrOfType
AttrClass getAttrOfType(StringAttr name)
Definition: Operation.h:550

mlir::Operation::getOpOperand
OpOperand & getOpOperand(unsigned idx)
Definition: Operation.h:388

mlir::Operation::hasAttrOfType
bool hasAttrOfType(NameT &&name)
Definition: Operation.h:575

mlir::Operation::walk
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797

mlir::Operation::getNumOperands
unsigned getNumOperands()
Definition: Operation.h:346

mlir::Operation::getOpOperands
MutableArrayRef< OpOperand > getOpOperands()
Definition: Operation.h:383

mlir::Operation::getOpResults
result_range getOpResults()
Definition: Operation.h:420

mlir::Operation::removeAttr
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
Definition: Operation.h:600

mlir::Operation::getNumResults
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:769

mlir::RewritePatternSet
Definition: PatternMatch.h:792

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:702

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition: PatternMatch.cpp:127

mlir::TypeConverter
Type conversion class.
Definition: DialectConversion.h:41

mlir::TypeConverter::addConversion
void addConversion(FnT &&callback)
Register a conversion function.
Definition: DialectConversion.h:161

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::ValueRange::getTypes
type_range getTypes() const

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::detail::DenseArrayAttrImpl< int32_t >

mlir::detail::DenseArrayAttrImpl::asArrayRef
ArrayRef< T > asArrayRef() const
Definition: BuiltinAttributes.h:732

Transforms.h

mlir::OpTrait::hasElementwiseMappableTraits
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
Definition: Operation.cpp:1397

mlir::xegpu::createVectorWithShapeFromValues
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
Definition: XeGPUUtils.cpp:237

mlir::xegpu::populateXeGPUUnrollPatterns
void populateXeGPUUnrollPatterns(RewritePatternSet &patterns, const UnrollOptions &options)
Collect a set of patterns to unroll xegpu operations to a smaller shapes.
Definition: XeGPUUnroll.cpp:684

mlir::xegpu::getLayoutAttr
LayoutAttr getLayoutAttr(const Value value)
Retrieves the LayoutAttr associated with a given Value.
Definition: XeGPUUtils.cpp:114

mlir::xegpu::setLayoutAttr
void setLayoutAttr(const T &operandOrResult, const LayoutAttr layout)
Sets the LayoutAttr for a given OpOperand or OpResult by attaching it to the owner's dictionary attri...
Definition: XeGPUUtils.cpp:160

mlir::xegpu::getLayoutName
std::string getLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach LayoutAttr.
Definition: XeGPUUtils.cpp:103

mlir::xegpu::doSCFStructuralTypeConversionWithTensorType
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
Definition: XeGPUUtils.cpp:262

mlir::xegpu::setLayoutAttrs
void setLayoutAttrs(Operation *op, function_ref< LayoutAttr(Value)> getLayoutImpl)
Set the LayoutAttr for each OpOperand and OpResult of the given operation.
Definition: XeGPUUtils.cpp:177

mlir::xegpu::extractVectorsWithShapeFromValue
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
Definition: XeGPUUtils.cpp:217

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::getType
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:304

mlir::applyPatternsGreedily
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
Definition: GreedyPatternRewriteDriver.cpp:897

mlir::computeProduct
int64_t computeProduct(ArrayRef< int64_t > basis)
Self-explicit.
Definition: IndexingUtils.cpp:92

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::computeShapeRatio
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
Definition: IndexingUtils.cpp:117

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314

mlir::OpRewritePattern::OpRewritePattern
OpRewritePattern(MLIRContext *context, PatternBenefit benefit=1, ArrayRef< StringRef > generatedNames={})
Patterns must specify the root operation name they match against, and can also specify the benefit of...
Definition: PatternMatch.h:319

mlir::vector::UnrollVectorOptions
Options that control the vector unrolling.
Definition: VectorRewritePatterns.h:36

mlir::vector::UnrollVectorOptions::setNativeShapeFn
UnrollVectorOptions & setNativeShapeFn(NativeShapeFnType fn)
Definition: VectorRewritePatterns.h:52

mlir::xegpu::UnrollOptions
Options to control the XeGPU unrolling.
Definition: Transforms.h:27