doxygen/LinalgTransformOps_8cpp_source.html

 //===- LinalgTransformOps.cpp - Implementation of Linalg transform ops ----===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h"


 #include "mlir/AsmParser/AsmParser.h"


 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/Arith/Utils/Utils.h"

 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"

 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"

 #include "mlir/Dialect/GPU/IR/GPUDialect.h"

 #include "mlir/Dialect/Linalg/IR/Linalg.h"

 #include "mlir/Dialect/Linalg/TransformOps/GPUHeuristics.h"

 #include "mlir/Dialect/Linalg/TransformOps/Syntax.h"

 #include "mlir/Dialect/Linalg/Transforms/Hoisting.h"

 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"

 #include "mlir/Dialect/Linalg/Utils/Utils.h"

 #include "mlir/Dialect/SCF/Transforms/TileUsingInterface.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Dialect/Tensor/Utils/Utils.h"

 #include "mlir/Dialect/Transform/IR/TransformDialect.h"

 #include "mlir/Dialect/Transform/IR/TransformOps.h"

 #include "mlir/Dialect/Transform/IR/TransformTypes.h"

 #include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"

 #include "mlir/Dialect/Transform/Utils/Utils.h"

 #include "mlir/Dialect/Utils/IndexingUtils.h"

 #include "mlir/Dialect/Utils/StaticValueUtils.h"

 #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h"

 #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"

 #include "mlir/IR/BuiltinTypeInterfaces.h"

 #include "mlir/IR/PatternMatch.h"

 #include "mlir/IR/TypeUtilities.h"

 #include "mlir/Interfaces/TilingInterface.h"

 #include "mlir/Support/LLVM.h"

 #include "mlir/Support/TypeID.h"

 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"

 #include "llvm/ADT/STLExtras.h"

 #include "llvm/ADT/ScopeExit.h"

 #include "llvm/ADT/TypeSwitch.h"

 #include "llvm/Support/Debug.h"

 #include <type_traits>


 using namespace mlir;

 using namespace mlir::linalg;

 using namespace mlir::transform;


 #define DEBUG_TYPE "linalg-transforms"

 #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")

 #define DBGSNL() (llvm::dbgs() << "\n")

 #define LDBG(X) LLVM_DEBUG(DBGS() << (X) << "\n")


 /// Attempts to apply the pattern specified as template argument to the given

 /// operation. The pattern is expected to have a `returningMatchAndRewrite`

 /// function that returns the "main" result or failure. Returns failure if the

 /// pattern failed to apply. Extra arguments are forwarded to the pattern

 /// constructor.

 template <typename PatternTy, typename... Args>

 static FailureOr<LinalgOp> tryApply(Operation *operation, Args &&...args) {

   // Check if the given operation has the type expected by the pattern.

   using OpTy = typename llvm::function_traits<

       decltype(&PatternTy::returningMatchAndRewrite)>::template arg_t<0>;

   auto op = dyn_cast<OpTy>(operation);

   if (!op)

     return failure();


   // Apply the pattern directly to the op.

   PatternTy pattern(operation->getContext(), std::forward<Args>(args)...);

   // We want to discourage direct use of PatternRewriter in APIs but In this

   // very specific case, an IRRewriter is not enough.

   struct TrivialPatternRewriter : public PatternRewriter {

   public:

     explicit TrivialPatternRewriter(MLIRContext *context)

         : PatternRewriter(context) {}

   };

   TrivialPatternRewriter rewriter(operation->getContext());

   rewriter.setInsertionPoint(operation);

   auto result = pattern.returningMatchAndRewrite(op, rewriter);

   if (failed(result))

     return failure();

   return cast<LinalgOp>(result->getOperation());

 }


 /// Assuming that `ofr` is an index attr or a param of index type

 /// or a transform dialect handle mapped to exactly one op

 /// with one index result, return that value.

 static DiagnosedSilenceableFailure unpackSingleIndexResultPayloadOperations(

     transform::TransformState &state, TransformOpInterface transformOp,

     SmallVector<OpFoldResult> &result, ArrayRef<OpFoldResult> ofrs) {

   for (OpFoldResult ofr : ofrs) {

     if (ofr.is<Attribute>()) {

       if (!isa<IntegerAttr>(ofr.get<Attribute>()))

         return transformOp.emitDefiniteFailure() << "expected IntegerAttr";

       result.push_back(ofr);

       continue;

     }


     Value transformValue = ofr.get<Value>();

     if (isa<TransformParamTypeInterface>(transformValue.getType())) {

       ArrayRef<Attribute> params = state.getParams(transformValue);

       if (params.size() != 1)

         return transformOp.emitDefiniteFailure()

                << "requires exactly one parameter associated";

       result.push_back(params[0]);

       continue;

     }


     auto payloadOps = state.getPayloadOps(transformValue);

     if (!llvm::hasSingleElement(payloadOps)) {

       DiagnosedSilenceableFailure diag =

           transformOp.emitSilenceableError()

           << "handle must be mapped to exactly one payload op";

       diag.attachNote(transformValue.getLoc())

           << "mapped to " << llvm::range_size(payloadOps) << " payload ops";

       return diag;

     }


     Operation *op = *payloadOps.begin();

     if (op->getNumResults() != 1 || !op->getResult(0).getType().isIndex()) {

       DiagnosedSilenceableFailure diag =

           transformOp.emitSilenceableError()

           << "payload op must have exactly 1 index result";

       diag.attachNote(op->getLoc())

           << "has " << op->getNumResults() << " results";

       return diag;

     }

     result.push_back(op->getResult(0));

   }


   return DiagnosedSilenceableFailure::success();

 }


 // Given a list of params that are index attrs or a list of OpFoldResults

 // that are either index attrs or op handles, return a list of OpFoldResults

 // of index attrs or a list of OpFoldResults where all op handles are

 // replaced with the first (and only) OpResult of that payload op.

 // (There must be exactly one parameter associated with the AnyParamType or

 // one mapped payload op which must have exactly one index result.)

 static DiagnosedSilenceableFailure unpackSingleIndexResultPayloadOperations(

     transform::TransformState &state, TransformOpInterface transformOp,

     SmallVector<OpFoldResult> &result, Value packedHandle) {

   if (isa<TransformParamTypeInterface>(packedHandle.getType())) {

     ArrayRef<Attribute> params = state.getParams(packedHandle);

     for (auto param : params) {

       if (!isa<IntegerAttr>(param))

         return transformOp.emitDefiniteFailure()

                << "expected the parameter to be associated with an integer "

                   "attribute";

       result.push_back(param);

     }

     return DiagnosedSilenceableFailure::success();

   }


   for (Operation *op : state.getPayloadOps(packedHandle)) {

     if (op->getNumResults() != 1 || !op->getResult(0).getType().isIndex()) {

       DiagnosedSilenceableFailure diag =

           transformOp.emitSilenceableError()

           << "payload op must have exactly 1 index result";

       diag.attachNote(op->getLoc())

           << "has " << op->getNumResults() << " results";

       return diag;

     }

     result.push_back(op->getResult(0));

   }


   return DiagnosedSilenceableFailure::success();

 }


 /// When possible, converts each `OpFoldResult` in `mixedResult` to

 /// an integer if the value can be statically inferred.  If a result

 /// is a `Value` then it must be either a `ParamType` or a handle

 /// to an a constant like op.

 static DiagnosedSilenceableFailure reifyMixedParamAndHandleResults(

     TransformState &state, TransformOpInterface &transformOp,

     ArrayRef<OpFoldResult> mixedResults, SmallVectorImpl<int64_t> &reified) {

   for (OpFoldResult paramOrHandle : mixedResults) {

     if (isa<Attribute>(paramOrHandle)) {

       reified.push_back(

           cast<IntegerAttr>(paramOrHandle.get<Attribute>()).getInt());

       continue;

     } else if (isa<ParamType>(paramOrHandle.get<Value>().getType())) {

       ArrayRef<Attribute> params = state.getParams(paramOrHandle.get<Value>());

       if (params.size() != 1)

         return transformOp.emitSilenceableError() << "expected a single param";

       reified.push_back(

           cast<IntegerAttr>(params.front()).getValue().getSExtValue());

       continue;

     }


     Value handle = paramOrHandle.get<Value>();

     if (!isa<TransformHandleTypeInterface>(handle.getType()))

       return transformOp.emitSilenceableError() << "unexpected value handle";

     auto payload = state.getPayloadOps(handle);

     if (!llvm::hasSingleElement(payload))

       return transformOp.emitSilenceableError()

              << "requires param or handle that is mapped to 1 payload op";


     Operation *paramOrHandlePayloadOp = *payload.begin();

     if (paramOrHandlePayloadOp->getNumResults() != 1 ||

         !paramOrHandlePayloadOp->getResult(0).getType().isIndex()) {

       return transformOp.emitSilenceableError()

              << "requires param or handle to be result of op with 1 index "

                 "result";

     }


     IntegerAttr attr;

     if (!matchPattern(paramOrHandlePayloadOp->getResult(0), m_Constant(&attr)))

       return transformOp.emitSilenceableError()

              << "requires param or handle to be the result of a constant like "

                 "op";


     reified.push_back(attr.getInt());

   }

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // Apply...PatternsOp

 //===----------------------------------------------------------------------===//


 void transform::ApplyEraseUnnecessaryInputsPatternsOp::populatePatterns(

     RewritePatternSet &patterns) {

   linalg::populateEraseUnnecessaryInputsPatterns(patterns);

 }


 void transform::ApplyDecomposeTensorPackUnpackPatternsOp::populatePatterns(

     RewritePatternSet &patterns) {

   linalg::populateDecomposePackUnpackPatterns(patterns);

 }


 void transform::ApplyDecomposeTensorPadPatternsOp::populatePatterns(

     RewritePatternSet &patterns) {

   linalg::populateDecomposePadPatterns(patterns);

 }


 void transform::ApplyFoldUnitExtentDimsViaReshapesPatternsOp::populatePatterns(

     RewritePatternSet &patterns) {

   linalg::ControlDropUnitDims options;

   linalg::populateFoldUnitExtentDimsPatterns(patterns, options);

 }


 void transform::ApplyFoldUnitExtentDimsViaSlicesPatternsOp::populatePatterns(

     RewritePatternSet &patterns) {

   linalg::ControlDropUnitDims options;

   options.rankReductionStrategy =

       linalg::ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice;

   linalg::populateFoldUnitExtentDimsPatterns(patterns, options);

 }


 void transform::ApplyTilingCanonicalizationPatternsOp::populatePatterns(

     RewritePatternSet &patterns) {

   linalg::populateLinalgTilingCanonicalizationPatterns(patterns);

 }


 void transform::ApplyFoldAddIntoDestPatternsOp::populatePatterns(

     RewritePatternSet &patterns) {

   linalg::populateFoldAddIntoDestPatterns(patterns);

 }


 void transform::ApplyPadVectorizationPatternsOp::populatePatterns(

     RewritePatternSet &patterns) {

   linalg::populatePadOpVectorizationPatterns(patterns);

   linalg::populateInsertSliceVectorizationPatterns(patterns);

 }


 //===----------------------------------------------------------------------===//

 // BufferizeToAllocationOp

 //===----------------------------------------------------------------------===//


 void transform::BufferizeToAllocationOp::build(OpBuilder &b,

                                                OperationState &result,

                                                Value target,

                                                Attribute memorySpace) {

   SmallVector<Type> resultTypes;

   resultTypes.push_back(b.getType<transform::AnyValueType>());

   resultTypes.push_back(b.getType<transform::AnyOpType>());

   return build(b, result,

                /*resultTypes=*/resultTypes,

                /*target=*/target,

                /*memorySpace=*/memorySpace);

 }


 void transform::BufferizeToAllocationOp::build(OpBuilder &b,

                                                OperationState &result,

                                                Value target,

                                                int64_t memorySpace) {

   SmallVector<Type> resultTypes;

   resultTypes.push_back(b.getType<transform::AnyValueType>());

   resultTypes.push_back(b.getType<transform::AnyOpType>());

   return build(b, result,

                /*resultTypes=*/resultTypes,

                /*target=*/target,

                /*memorySpace=*/b.getI64IntegerAttr(memorySpace));

 }


 namespace {

 class NewOpsListener : public RewriterBase::ForwardingListener {

 public:

   using RewriterBase::ForwardingListener::ForwardingListener;


   SmallVector<Operation *> getNewOps() const {

     return SmallVector<Operation *>(newOps.begin(), newOps.end());

   }


 private:

   void notifyOperationInserted(Operation *op,

                                OpBuilder::InsertPoint previous) override {

     ForwardingListener::notifyOperationInserted(op, previous);

     // We only care about newly created ops.

     if (previous.isSet())

       return;

     auto inserted = newOps.insert(op);

     (void)inserted;

     assert(inserted.second && "expected newly created op");

   }


   void notifyOperationErased(Operation *op) override {

     ForwardingListener::notifyOperationErased(op);

     op->walk([&](Operation *op) { newOps.erase(op); });

   }


   DenseSet<Operation *> newOps;

 };

 } // namespace


 DiagnosedSilenceableFailure transform::BufferizeToAllocationOp::apply(

     transform::TransformRewriter &rewriter,

     transform::TransformResults &results, transform::TransformState &state) {

   // Attach listener to keep track of newly created ops.

   OpBuilder::Listener *previousListener = rewriter.getListener();

   auto resetListener =

       llvm::make_scope_exit([&]() { rewriter.setListener(previousListener); });

   NewOpsListener newOpsListener(previousListener);

   rewriter.setListener(&newOpsListener);


   linalg::BufferizeToAllocationOptions options;

   if (getMemcpyOp() == "bufferization.materialize_in_destination") {

     options.memcpyOp = linalg::BufferizeToAllocationOptions::MemcpyOp::

         MaterializeInDestination;

   } else if (getMemcpyOp() == "memref.copy") {

     options.memcpyOp =

         linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy;

   } else if (getMemcpyOp() == "linalg.copy") {

     options.memcpyOp =

         linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy;

   } else {

     llvm_unreachable("invalid memcpy op");

   }

   if (getAllocOp() == "memref.alloc") {

     options.allocOp =

         linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloc;

   } else if (getAllocOp() == "memref.alloca") {

     options.allocOp =

         linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloca;

   } else {

     llvm_unreachable("invalid alloc op");

   }

   options.bufferizeDestinationOnly = getBufferizeDestinationOnly();

   options.emitDealloc = getEmitDealloc();


   // Bufferize ops.

   Attribute memorySpace =

       getMemorySpace().has_value() ? getMemorySpace().value() : Attribute();

   SmallVector<Value> allocatedBuffers;

   for (Operation *op : state.getPayloadOps(getTarget())) {

     Value buffer =

         linalg::bufferizeToAllocation(rewriter, options, op, memorySpace);

     if (!buffer) {

       DiagnosedSilenceableFailure diag = emitSilenceableError()

                                          << "failed to bufferize operation";

       diag.attachNote(op->getLoc()) << "target payload op";

       return diag;

     }

     allocatedBuffers.push_back(buffer);

   }


   // Set results.

   results.setValues(cast<OpResult>(getAllocatedBuffer()), allocatedBuffers);

   results.set(cast<OpResult>(getNewOps()), newOpsListener.getNewOps());

   return DiagnosedSilenceableFailure::success();

 }


 void transform::BufferizeToAllocationOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   if (getBufferizeDestinationOnly()) {

     // The destination is replaced with a newly allocated buffer, but the op

     // itself remains in place.

     onlyReadsHandle(getTargetMutable(), effects);

   } else {

     consumesHandle(getTargetMutable(), effects);

   }

   producesHandle(getOperation()->getOpResults(), effects);

   modifiesPayload(effects);

 }


 LogicalResult transform::BufferizeToAllocationOp::verify() {

   if (getMemcpyOp() != "bufferization.materialize_in_destination" &&

       getMemcpyOp() != "memref.copy" && getMemcpyOp() != "linalg.copy")

     return emitOpError() << "unsupported memcpy op";

   if (getAllocOp() != "memref.alloc" && getAllocOp() != "memref.alloca")

     return emitOpError() << "unsupported alloc op";

   return success();

 }


 //===----------------------------------------------------------------------===//

 // DecomposeOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::DecomposeOp::applyToOne(transform::TransformRewriter &rewriter,

                                    LinalgOp target,

                                    transform::ApplyToEachResultList &results,

                                    transform::TransformState &state) {

 #define DOWNSCALE(trans)                                                       \

   {                                                                            \

     FailureOr<LinalgOp> res = tryApply<trans>(target);                         \

     if (succeeded(res)) {                                                      \

       results.push_back(*res);                                                 \

       return DiagnosedSilenceableFailure::success();                           \

     }                                                                          \

   }


 #define DOWNSCALE_CALL(a, b) DownscaleSizeOneWindowed2DConvolution<a, b>

 #define DOWNSCALE_NORMAL(a, b) DOWNSCALE(DOWNSCALE_CALL(a, b))


   DOWNSCALE_NORMAL(Conv2DNhwcHwcfOp, Conv1DNwcWcfOp)

   DOWNSCALE_NORMAL(Conv2DNchwFchwOp, Conv1DNcwFcwOp)

   DOWNSCALE_NORMAL(PoolingNhwcSumOp, PoolingNwcSumOp)

   DOWNSCALE_NORMAL(PoolingNchwSumOp, PoolingNcwSumOp)

   DOWNSCALE_NORMAL(PoolingNhwcMaxOp, PoolingNwcMaxOp)

   DOWNSCALE_NORMAL(PoolingNhwcMaxUnsignedOp, PoolingNwcMaxUnsignedOp)

   DOWNSCALE_NORMAL(PoolingNhwcMinOp, PoolingNwcMinOp)

   DOWNSCALE_NORMAL(PoolingNhwcMinUnsignedOp, PoolingNwcMinUnsignedOp)

   DOWNSCALE_NORMAL(PoolingNchwMaxOp, PoolingNcwMaxOp)

   DOWNSCALE(DownscaleDepthwiseConv2DNhwcHwcOp)

   DOWNSCALE(DownscaleConv2DOp)

 #undef DOWNSCALE_NORMAL

 #undef DOWNSCALE_CALL

 #undef DOWNSCALE

   return emitDefaultSilenceableFailure(target);

 }


 //===----------------------------------------------------------------------===//

 // DecomposeInterfaceOp

 //===----------------------------------------------------------------------===//


 // Decompose the target operation if it implements the AggregatedOpInterface.

 // Push the decomposed operations (the ones that replaces the values produced by

 // \p target) in the `results`.

 DiagnosedSilenceableFailure transform::DecomposeInterfaceOp::applyToOne(

     transform::TransformRewriter &rewriter, Operation *target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   auto decomposableOp = dyn_cast<AggregatedOpInterface>(target);

   if (!decomposableOp) {

     failed(rewriter.notifyMatchFailure(target,

                                        "payload is not a decomposable op"));

     return emitDefaultSilenceableFailure(target);

   }


   FailureOr<SmallVector<Value>> maybeNewResults =

       decomposableOp.decomposeOperation(rewriter);

   if (failed(maybeNewResults))

     return emitDefaultSilenceableFailure(target);


   rewriter.replaceOp(decomposableOp, *maybeNewResults);

   for (Value val : *maybeNewResults) {

     Operation *definition = val.getDefiningOp();

     if (definition)

       results.push_back(definition);

   }

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // EliminateLinalgOpAnchoredEmptyTensorsOp

 //===----------------------------------------------------------------------===//


 void transform::EliminateLinalgOpAnchoredEmptyTensorsOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   onlyReadsHandle(getTargetMutable(), effects);

   modifiesPayload(effects);

 }


 DiagnosedSilenceableFailure

 transform::EliminateLinalgOpAnchoredEmptyTensorsOp::apply(

     transform::TransformRewriter &rewriter, TransformResults &transformResults,

     TransformState &state) {

   bufferization::OneShotBufferizationOptions options;

   options.allowReturnAllocsFromLoops = true;


   for (Operation *target : state.getPayloadOps(getTarget())) {

     bufferization::OneShotAnalysisState state(target, options);

     if (failed(analyzeOp(target, state)))

       return mlir::emitSilenceableFailure(target->getLoc())

              << "failed to analyze op";

     if (failed(linalg::linalgOpAnchoredEmptyTensorEliminationStep(

             rewriter, target, state)))

       return mlir::emitSilenceableFailure(target->getLoc())

              << "failed to eliminate LinalgOp anchored tensor.empty ops";

   }

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // FuseOp

 //===----------------------------------------------------------------------===//


 /// Apply a tiling transformation to all payload ops and store both the

 /// tiled operation as well as the created tile loops.

 template <typename Range>

 static LogicalResult applyTilingToAll(

     RewriterBase &rewriter, Operation *transformOp, Range &&payloadOps,

     unsigned numLoops, transform::TransformResults &transformResults,

     function_ref<FailureOr<scf::SCFTileAndFuseResult>(TilingInterface)>

         applyFn) {

   SmallVector<Operation *> tiledLinalgOps;

   SmallVector<SmallVector<Operation *>> loopOps(numLoops);


   for (Operation *target : payloadOps) {

     auto tilingInterfaceOp = dyn_cast<TilingInterface>(target);

     if (!tilingInterfaceOp)

       return transformOp->emitError("only TilingInterface ops are supported");


     rewriter.setInsertionPoint(target);

     FailureOr<scf::SCFTileAndFuseResult> tiledResults =

         applyFn(tilingInterfaceOp);

     if (failed(tiledResults))

       return failure();


     // Perform the replacement of tiled and fused values.

     SmallVector<Operation *> opsToReplace{target};

     llvm::append_range(opsToReplace, tiledResults->fusedProducers);

     for (Operation *toReplace : opsToReplace) {

       for (OpResult res : toReplace->getResults())

         if (auto replacement = tiledResults->replacements.lookup(res))

           rewriter.replaceAllUsesWith(res, replacement);

       if (toReplace->use_empty()) {

         rewriter.eraseOp(toReplace);

       }

     }


     // Report back the relevant handles to the transform op.

     tiledLinalgOps.push_back(tiledResults->tiledAndFusedOps.front());

     assert(tiledResults->loops.size() == numLoops &&

            "Mismatched number of loops, tile and fuse transform should have "

            "failed");

     for (unsigned int i = 0; i < numLoops; ++i)

       loopOps[i].push_back(tiledResults->loops[i]);

   }


   transformResults.set(transformOp->getOpResult(0), tiledLinalgOps);

   for (unsigned int i = 0; i < numLoops; ++i)

     transformResults.set(transformOp->getOpResult(i + 1), loopOps[i]);


   return success();

 }


 DiagnosedSilenceableFailure

 transform::FuseOp::apply(transform::TransformRewriter &rewriter,

                          mlir::transform::TransformResults &transformResults,

                          mlir::transform::TransformState &state) {

   SmallVector<int64_t> tileSizes =

       extractFromIntegerArrayAttr<int64_t>(getTileSizes());

   SmallVector<int64_t> tileInterchange =

       extractFromIntegerArrayAttr<int64_t>(getTileInterchange());


   scf::SCFTilingOptions tilingOptions;

   tilingOptions.interchangeVector = tileInterchange;

   SmallVector<OpFoldResult> tileSizesOfr =

       getAsIndexOpFoldResult(rewriter.getContext(), tileSizes);

   tilingOptions = tilingOptions.setTileSizes(tileSizesOfr);

   scf::SCFTileAndFuseOptions tileAndFuseOptions;

   tileAndFuseOptions.tilingOptions = tilingOptions;


   if (getApplyCleanup()) {

     MLIRContext *context = rewriter.getContext();

     RewritePatternSet patterns(context);

     tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, context);

     tensor::populateMergeConsecutiveInsertExtractSlicePatterns(patterns);

     tileAndFuseOptions.cleanupPatterns = std::move(patterns);

   }


   LogicalResult result = applyTilingToAll(

       rewriter, getOperation(), state.getPayloadOps(getTarget()),

       tileSizes.size() - llvm::count(tileSizes, 0), transformResults,

       [&](TilingInterface tilingInterfaceOp)

           -> FailureOr<scf::SCFTileAndFuseResult> {

         return tileConsumerAndFuseProducersUsingSCF(rewriter, tilingInterfaceOp,

                                                     tileAndFuseOptions);

       });

   return failed(result) ? DiagnosedSilenceableFailure::definiteFailure()

                         : DiagnosedSilenceableFailure::success();

 }


 LogicalResult transform::FuseOp::verify() {

   SmallVector<int64_t> permutation =

       extractFromIntegerArrayAttr<int64_t>(getTileInterchange());

   auto sequence = llvm::to_vector(llvm::seq<int64_t>(0, permutation.size()));

   if (!std::is_permutation(sequence.begin(), sequence.end(),

                            permutation.begin(), permutation.end())) {

     return emitOpError() << "expects interchange to be a permutation, found "

                          << getTileInterchange();

   }


   SmallVector<int64_t> sizes =

       extractFromIntegerArrayAttr<int64_t>(getTileSizes());

   size_t numExpectedLoops = sizes.size() - llvm::count(sizes, 0);

   if (numExpectedLoops != getNumResults() - 1)

     return emitOpError() << "expects " << numExpectedLoops << " loop results";


   return success();

 }


 //===----------------------------------------------------------------------===//

 // FuseIntoContainingOp

 //===----------------------------------------------------------------------===//


 void transform::FuseIntoContainingOp::build(OpBuilder &builder,

                                             OperationState &result,

                                             Value producerOp,

                                             Value containingOp) {

   result.addOperands({producerOp, containingOp});

   auto resultType = transform::AnyOpType::get(builder.getContext());

   result.addTypes({resultType, resultType});

 }


 /// Add new operands to the forall op for users of the producerOp

 /// that are dominated by the containing scf.forall op.

 static Operation *replaceForAllWithNewSignature(

     RewriterBase &rewriter, Diagnostic &diag, Operation *producerOp,

     Operation *containingOp, TilingResult &tileAndFuseResult,

     int64_t resultNumber, SmallVector<OpFoldResult> &offsets,

     SmallVector<OpFoldResult> &sizes) {


   // Count number of users not including the containing op

   SetVector<Operation *> dominatedUsers;

   DominanceInfo domInfo(containingOp);

   for (Operation *user : producerOp->getResult(resultNumber).getUsers()) {

     if (!containingOp->isAncestor(user) &&

         (domInfo.dominates(containingOp, user))) {

       dominatedUsers.insert(user);

     }

   }

   if (dominatedUsers.empty())

     return nullptr;


   // Create new scf.forall op

   auto forallOp = cast<scf::ForallOp>(containingOp);

   OpBuilder::InsertionGuard g(rewriter);

   rewriter.setInsertionPoint(forallOp);


   // Get new output

   Location loc = forallOp.getLoc();

   auto genericOp = dyn_cast<linalg::GenericOp>(producerOp);

   if (!genericOp)

     return nullptr;

   SmallVector<Value> outputs = genericOp.getOutputs();

   SmallVector<Value> newOuts(forallOp.getOutputs());

   newOuts.push_back(outputs[resultNumber]);


   // Create new scf.forall op

   auto newforallOp = rewriter.create<scf::ForallOp>(

       loc, forallOp.getMixedLowerBound(), forallOp.getMixedUpperBound(),

       forallOp.getMixedStep(), newOuts, forallOp.getMapping());

   rewriter.eraseBlock(newforallOp.getBody());

   newforallOp.getRegion().takeBody(forallOp.getRegion());


   // Add additional block argument for new value being returned

   // and replaces all uses of the new output with corresponding bbArg

   // inside the scf.forall to enable fusion into this new scf.forall.

   newforallOp.getBody()->addArgument(newOuts.back().getType(),

                                      newOuts.back().getLoc());

   auto bbArgs = newforallOp.getBody()->getArguments();

   rewriter.replaceUsesWithIf(newOuts.back(), bbArgs.back(),

                              [&](OpOperand &use) {

                                Operation *op = use.getOwner();

                                return newforallOp->isProperAncestor(op);

                              });


   // Fix terminator

   scf::InParallelOp terminatorOp = newforallOp.getTerminator();

   SmallVector<Operation *> yieldingOps = llvm::to_vector<4>(llvm::map_range(

       terminatorOp.getYieldingOps(), [](Operation &op) { return &op; }));

   Operation *firstYieldOp = yieldingOps.front();

   rewriter.setInsertionPoint(firstYieldOp);

   Value src = tileAndFuseResult.tiledValues[0];

   Value dst = newforallOp.getRegionIterArgs().back();

   SmallVector<OpFoldResult> strides(offsets.size(), rewriter.getIndexAttr(1));

   rewriter.create<tensor::ParallelInsertSliceOp>(firstYieldOp->getLoc(), src,

                                                  dst, offsets, sizes, strides);


   for (auto result : llvm::enumerate(forallOp.getResults())) {

     rewriter.replaceAllUsesWith(result.value(),

                                 newforallOp->getResult(result.index()));

   }

   rewriter.replaceUsesWithIf(producerOp->getResult(resultNumber),

                              newforallOp->getResults().back(),

                              [&](OpOperand &use) {

                                Operation *user = use.getOwner();

                                return dominatedUsers.contains(user);

                              });

   return newforallOp;

 }


 /// Find the first "extract" user of `producerOp` and tile it right before its

 /// use. The tiled op is fused under the `containingOp`.

 /// Return this fused op on success or nullptr if anything fails.

 /// If tiled op has uses that are dominated by `containingOp`, return

 /// a new `containingOp` with results of the fused op appended to

 /// results of the `containingOp` or nullptr if there are no dominated uses.

 static std::tuple<SmallVector<Operation *>, Operation *>

 tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag,

                            Operation *producerOp, Operation *containingOp) {

   LLVM_DEBUG(DBGS() << "Try to fuse a direct extract use\n");

   auto tileableProducer = dyn_cast<TilingInterface>(producerOp);

   if (!tileableProducer) {

     diag.attachNote(producerOp->getLoc())

         << "producer is not a TileableInterface: " << *producerOp;

     return {};

   }


   // Search the producer slices accessed within the containing operation.

   // TODO: Generalize to more extract/insert/parallel_insert triples, maybe

   // evolve into an interface.

   auto it = llvm::find_if(tileableProducer->getUsers(), [&](Operation *user) {

     auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(user);

     return sliceOp && containingOp->isProperAncestor(sliceOp);

   });


   // Find a fusion opportunity.

   if (it == tileableProducer->getUsers().end()) {

     diag.attachNote(tileableProducer->getLoc())

         << "could not find fusion opportunity for: " << *tileableProducer;

     return {};

   }

   auto sliceOpToTile = cast<tensor::ExtractSliceOp>(*it);


   // Try to fuse the producer in-place.

   OpBuilder::InsertionGuard guard(rewriter);

   rewriter.setInsertionPoint(sliceOpToTile);


   // Tile the producer.

   int64_t resultNumber =

       cast<OpResult>(sliceOpToTile.getSource()).getResultNumber();

   LLVM_DEBUG(DBGS() << "resultNumber: " << resultNumber << "\n");


   SmallVector<OpFoldResult> offsets = sliceOpToTile.getMixedOffsets();

   SmallVector<OpFoldResult> sizes = sliceOpToTile.getMixedSizes();


   FailureOr<TilingResult> tileAndFuseResult =

       tileableProducer.generateResultTileValue(rewriter, resultNumber, offsets,

                                                sizes);


   if (failed(tileAndFuseResult)) {

     diag.attachNote(tileableProducer->getLoc())

         << "failed to tile producer op: " << *tileableProducer;

     return {};

   }


 #ifndef NDEBUG

   for (auto *tiledOp : tileAndFuseResult->tiledOps) {

     LLVM_DEBUG(DBGS() << "tiledProducer: " << *tiledOp << "\n");

   }

 #endif


   // Replace the extract op.

   auto maybeRankReduced = tensor::ExtractSliceOp::rankReduceIfNeeded(

       rewriter, sliceOpToTile->getLoc(), tileAndFuseResult->tiledValues[0],

       cast<RankedTensorType>(sliceOpToTile->getResult(0).getType()).getShape());

   if (failed(maybeRankReduced)) {

     diag.attachNote(producerOp->getLoc())

         << "shape types don't match (missing canonicalization?):\nTiledOp: "

         << tileAndFuseResult->tiledValues[0]

         << "\nSliceOp: " << sliceOpToTile.getOperation() << '\n';

     return {};

   }

   rewriter.replaceOp(sliceOpToTile, *maybeRankReduced);


   // Add new outputs to containing op, if required

   Operation *newContainingOp = replaceForAllWithNewSignature(

       rewriter, diag, producerOp, containingOp, *tileAndFuseResult,

       resultNumber, offsets, sizes);


   return std::make_tuple(tileAndFuseResult->tiledOps, newContainingOp);

 }


 /// First, find the first "scf::ForallOp" user of `producerOp` and ensure

 /// it is exactly the `containingOp`, otherwise bail.

 /// Then, find the first "extract" user of the tied block argument and tile it

 /// right before its "extract" use. The tiled op is fused under the

 /// `containingOp`.

 /// Return this fused op on success or nullptr if anything fails.

 static SmallVector<Operation *>

 tileAndFuseFirstExtractUseThroughContainingOpBlockArgument(

     RewriterBase &rewriter, Diagnostic &diag, Operation *producerOp,

     Operation *containingOp) {

   LLVM_DEBUG(DBGS() << "Try to fuse an extract use through block argument\n");


   auto tileableProducer = dyn_cast<TilingInterface>(producerOp);

   if (!tileableProducer) {

     diag.attachNote(producerOp->getLoc())

         << "producer is not a TileableInterface: " << *producerOp;

     return {};

   }


   // Search the first use by a "scf::ForallOp" user.

   scf::ForallOp forallOp;

   auto itProducerUses =

       llvm::find_if(tileableProducer->getUses(), [&](OpOperand &use) {

         forallOp = dyn_cast<scf::ForallOp>(use.getOwner());

         return forallOp;

       });

   // If it's not from the containing op, return.

   if (!forallOp || forallOp != containingOp) {

     diag.attachNote(tileableProducer->getLoc())

         << "could not find a use by the containing op: " << *tileableProducer;

     return {};

   }


   // Search the producer slices accessed within the containing

   // operation.

   // TODO: Generalize to more extract/insert/parallel_insert triples.

   //   Maybe evolve into an interface.

   OpOperand *pUse = &(*itProducerUses);

   BlockArgument bbArg = forallOp.getTiedBlockArgument(pUse);


   // Search the producer slices accessed within the containing operation.

   // TODO: Generalize to more extract/insert/parallel_insert triples, maybe

   // evolve into an interface.

   auto itBBArgUsers = llvm::find_if(bbArg.getUsers(), [&](Operation *user) {

     auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(user);

     return sliceOp && containingOp->isProperAncestor(sliceOp);

   });


   // Find a fusion opportunity.

   if (itBBArgUsers == bbArg.getUsers().end()) {

     diag.attachNote(containingOp->getLoc())

         << "could not find fusion opportunity for bbArg: " << bbArg;

     return {};

   }

   auto sliceOpToTile = cast<tensor::ExtractSliceOp>(*itBBArgUsers);


   // Try to fuse the producer in-place.

   OpBuilder::InsertionGuard guard(rewriter);

   rewriter.setInsertionPoint(sliceOpToTile);


   // Replace the use in the tileableProducer before tiling: clone, replace and

   // then tile.

   int64_t resultNumber = cast<OpResult>(pUse->get()).getResultNumber();

   LLVM_DEBUG(DBGS() << "resultNumber: " << resultNumber << "\n");


   // Gather destination tensors.

   SmallVector<Value> destinationTensors;

   if (failed(tensor::getOrCreateDestinations(

           rewriter, tileableProducer->getLoc(), tileableProducer,

           destinationTensors))) {

     diag.attachNote(tileableProducer->getLoc())

         << "failed to get destination tensors for: " << *tileableProducer;

     return {};

   }


   IRMapping bvm;

   bvm.map(destinationTensors[resultNumber], bbArg);

   auto tileableProducerClone =

       cast<TilingInterface>(rewriter.clone(*tileableProducer, bvm));

   auto scopeGuard =

       llvm::make_scope_exit([&]() { rewriter.eraseOp(tileableProducerClone); });


   // Tile the producer.

   FailureOr<TilingResult> tileAndFuseResult =

       tileableProducerClone.generateResultTileValue(

           rewriter, resultNumber, sliceOpToTile.getMixedOffsets(),

           sliceOpToTile.getMixedSizes());

   if (failed(tileAndFuseResult)) {

     diag.attachNote(tileableProducer->getLoc())

         << "failed to tile producer op: " << *tileableProducer;

     return {};

   }


   // Replace the extract op.

   auto maybeRankReduced = tensor::ExtractSliceOp::rankReduceIfNeeded(

       rewriter, sliceOpToTile->getLoc(), tileAndFuseResult->tiledValues[0],

       cast<RankedTensorType>(sliceOpToTile->getResult(0).getType()).getShape());

   assert(succeeded(maybeRankReduced) && "unexpected shape");

   rewriter.replaceOp(sliceOpToTile, *maybeRankReduced);


   // Replace the use in containingOp.

   rewriter.modifyOpInPlace(containingOp, [&]() {

     containingOp->setOperand(pUse->getOperandNumber(),

                              destinationTensors.front());

   });


   return tileAndFuseResult->tiledOps;

 }


 static Operation *cloneAndFuseFirstUse(RewriterBase &rewriter, Diagnostic &diag,

                                        Operation *producerOp,

                                        Operation *containingOp) {

   LLVM_DEBUG(DBGS() << "Try to fuse an use by cloning\n");


   // Gather all uses inside the containing op.

   SmallVector<OpOperand *> uses;

   for (OpResult result : producerOp->getOpResults()) {

     for (OpOperand &use : result.getUses()) {

       if (containingOp->isProperAncestor(use.getOwner())) {

         uses.push_back(&use);

         continue;

       }

       // Cannot clone and fuse if the use is by the containing op itself: fail

       // immediately.

       if (containingOp == use.getOwner()) {

         diag.attachNote(producerOp->getLoc())

             << "producer op use by containing op cannot be fused by cloning";

         return nullptr;

       }

     }

   }


   // Check for a non-empty list of fusion opportunities.

   if (uses.empty()) {

     diag.attachNote(producerOp->getLoc()) << "no fusion opportunity by cloning";

     return nullptr;

   }


   // Clone and fuse inside the containing op.

   Operation *fusedOp = nullptr;

   OpOperand *use = uses.front();

   // Parallel insert slice is not a valid clone destination.

   // TODO: Generalize to other type of ops.

   assert(!isa<tensor::ParallelInsertSliceOp>(use->getOwner()) &&

          "Parallel insert slice is not a valid clone destination");

   unsigned resultNumber = cast<OpResult>(use->get()).getResultNumber();

   LLVM_DEBUG(DBGS() << "resultNumber: " << resultNumber << "\n");


   OpBuilder::InsertionGuard guard(rewriter);

   rewriter.setInsertionPoint(use->getOwner());

   fusedOp = rewriter.clone(*producerOp);

   rewriter.modifyOpInPlace(

       use->getOwner(), [&] { use->set(fusedOp->getOpResult(resultNumber)); });


   return fusedOp;

 }


 bool transform::FuseIntoContainingOp::allowsRepeatedHandleOperands() {

   // Allow repeated handles since we are fusing everything anyway.

   return true;

 }


 DiagnosedSilenceableFailure

 transform::FuseIntoContainingOp::apply(transform::TransformRewriter &rewriter,

                                        transform::TransformResults &results,

                                        transform::TransformState &state) {

   SmallVector<Operation *> fusedOps;

   auto producerOps = state.getPayloadOps(getProducerOp());

   auto containingOps = state.getPayloadOps(getContainingOp());

   if (!llvm::hasSingleElement(containingOps)) {

     return emitDefiniteFailure()

            << "requires exactly one containing_op handle (got "

            << llvm::range_size(containingOps) << ")";

   }

   Operation *containingOp = *containingOps.begin();


   // If nothing to fuse, propagate success.

   if (std::empty(producerOps)) {

     results.set(cast<OpResult>(getFusedOp()), SmallVector<mlir::Operation *>{});

     results.set(cast<OpResult>(getNewContainingOp()), {containingOp});

     return DiagnosedSilenceableFailure::success();

   }


   // Helper function to find the next producer that should be fused. Take any

   // producer that has a use inside the containing op.

   SetVector<Operation *> remainingProducers(producerOps.begin(),

                                             producerOps.end());

   auto getNextProducer = [&]() -> FailureOr<Operation *> {

     for (const auto &it : enumerate(remainingProducers)) {

       Operation *producerOp = it.value();

       // The containing op may be a user of producerOp: use isAncestor.

       int64_t numUsesInContainingOp =

           llvm::count_if(producerOp->getUsers(), [&](Operation *op) {

             return containingOp->isAncestor(op);

           });

       // TODO: When resolving the TODO below (no duplicate ops), take an op

       // that has no use among the remaining producers. This is a topological

       // sorting.

       if (numUsesInContainingOp > 0) {

         if (numUsesInContainingOp == 1)

           remainingProducers.erase(remainingProducers.begin() + it.index());

         return producerOp;

       }

     }

     return failure();

   };


   while (!remainingProducers.empty()) {

     auto nextProducer = getNextProducer();

     if (failed(nextProducer)) {

       auto diag = mlir::emitSilenceableFailure(getLoc())

                   << "could not find next producer to fuse into container";

       diag.attachNote(containingOp->getLoc()) << "containing op";

       return diag;

     }


     Operation *producerOp = *nextProducer;


     // Default diagnostic, to be complemented with more failure information.

     Diagnostic diag(producerOp->getLoc(), DiagnosticSeverity::Remark);

     diag << "could not fuse " << *producerOp << " into " << *containingOp;


     // TODO: If there are multiple uses of the producer in the containing op,

     // we currently tile/clone the op multiple times (once per use). In some

     // cases, we can tile/clone once and reuse the value for each use.

     // Futhermore, producers should then be traversed according to a

     // topological sorting.

     auto [tiledOps, newContainingOp] =

         tileAndFuseFirstExtractUse(rewriter, diag, producerOp, containingOp);

     if (!tiledOps.empty()) {

       LLVM_DEBUG(DBGS() << "\nFused a direct extract use\n" << *containingOp);

       fusedOps.append(tiledOps);

       if (newContainingOp) {

         // Update handles associated with the containing op so we don't need to

         // invalidate them. This is a hack to support better composability

         // between tiling and fusion while a proper mechanism is being

         // investigated.

         //

         // DO NOT replicate this elsewhere unless you understand what you are

         // doing.

         LogicalResult replacementStatus =

             rewriter.notifyPayloadOperationReplaced(containingOp,

                                                     newContainingOp);

         (void)replacementStatus;

         assert(succeeded(replacementStatus) &&

                "unable to update transform state mapping");

         rewriter.eraseOp(containingOp);

         containingOp = newContainingOp;

       }

       continue;

     }


     SmallVector<Operation *> tiledContainingOpOperand =

         tileAndFuseFirstExtractUseThroughContainingOpBlockArgument(

             rewriter, diag, producerOp, containingOp);

     if (!tiledContainingOpOperand.empty()) {

       LLVM_DEBUG(DBGS() << "\nFused an extract use through block argument\n"

                         << *containingOp);

       fusedOps.append(tiledContainingOpOperand);

       continue;

     }


     Operation *cloned =

         cloneAndFuseFirstUse(rewriter, diag, producerOp, containingOp);

     if (cloned) {

       LLVM_DEBUG(DBGS() << "\nFused an use by cloning\n" << *containingOp);

       fusedOps.push_back(cloned);

       continue;

     }

     return DiagnosedSilenceableFailure::silenceableFailure(std::move(diag));

   }


   results.set(cast<OpResult>(getFusedOp()), fusedOps);

   results.set(cast<OpResult>(getNewContainingOp()), {containingOp});

   return DiagnosedSilenceableFailure::success();

 }


 void transform::FuseIntoContainingOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   consumesHandle(getProducerOpMutable(), effects);

   onlyReadsHandle(getContainingOpMutable(), effects);

   producesHandle(getOperation()->getOpResults(), effects);

   modifiesPayload(effects);

 }


 //===----------------------------------------------------------------------===//

 // GeneralizeOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::GeneralizeOp::applyToOne(transform::TransformRewriter &rewriter,

                                     LinalgOp target,

                                     transform::ApplyToEachResultList &results,

                                     transform::TransformState &state) {

   // Exit early if no transformation is needed.

   if (isa<GenericOp>(target)) {

     results.push_back(target);

     return DiagnosedSilenceableFailure::success();

   }

   rewriter.setInsertionPoint(target);

   FailureOr<LinalgOp> generic = generalizeNamedOp(rewriter, target);

   if (succeeded(generic)) {

     results.push_back(generic->getOperation());

     return DiagnosedSilenceableFailure::success();

   }

   return emitDefaultSilenceableFailure(target);

 }


 //===----------------------------------------------------------------------===//

 // SpecializeOp

 //===----------------------------------------------------------------------===/


 DiagnosedSilenceableFailure

 transform::SpecializeOp::applyToOne(transform::TransformRewriter &rewriter,

                                     LinalgOp target,

                                     transform::ApplyToEachResultList &results,

                                     transform::TransformState &state) {

   // Exit early if the operation is not a generic.

   if (!isa<GenericOp>(target)) {

     results.push_back(target);

     return DiagnosedSilenceableFailure::success();

   }

   rewriter.setInsertionPoint(target);

   FailureOr<LinalgOp> named =

       specializeGenericOp(rewriter, cast<GenericOp>(target));

   if (succeeded(named)) {

     results.push_back(named->getOperation());

     return DiagnosedSilenceableFailure::success();

   }

   return emitDefaultSilenceableFailure(target);

 }


 //===----------------------------------------------------------------------===//

 // InterchangeOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::InterchangeOp::applyToOne(transform::TransformRewriter &rewriter,

                                      GenericOp target,

                                      transform::ApplyToEachResultList &results,

                                      transform::TransformState &state) {

   ArrayRef<int64_t> interchangeVector = getIteratorInterchange();

   // Exit early if no transformation is needed.

   if (interchangeVector.empty()) {

     results.push_back(target);

     return DiagnosedSilenceableFailure::success();

   }


   unsigned numLoops = cast<LinalgOp>(target.getOperation()).getNumLoops();

   if (interchangeVector.size() != numLoops) {

     return emitSilenceableError()

            << getIteratorInterchangeAttrName() << " has length ("

            << interchangeVector.size()

            << ") different from the number of loops in the target operation ("

            << numLoops << ")";

   }

   FailureOr<GenericOp> res = interchangeGenericOp(

       rewriter, target, SmallVector<unsigned>(interchangeVector));

   if (failed(res))

     return emitDefiniteFailure() << "failed to apply";

   results.push_back(res->getOperation());

   return DiagnosedSilenceableFailure::success();

 }


 LogicalResult transform::InterchangeOp::verify() {

   ArrayRef<int64_t> permutation = getIteratorInterchange();

   auto sequence = llvm::to_vector(llvm::seq<int64_t>(0, permutation.size()));

   if (!std::is_permutation(sequence.begin(), sequence.end(),

                            permutation.begin(), permutation.end())) {

     return emitOpError()

            << "expects iterator_interchange to be a permutation, found "

            << getIteratorInterchange();

   }

   return success();

 }


 //===----------------------------------------------------------------------===//

 // LowerPackOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::LowerPackOp::applyToOne(

     transform::TransformRewriter &rewriter, tensor::PackOp target,

     transform::ApplyToEachResultList &transformResults,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   FailureOr<LowerPackResult> res = lowerPack(rewriter, target);

   if (failed(res)) {

     return mlir::emitSilenceableFailure(target->getLoc())

            << "cannot lower to pad + expand + transpose";

   }

   transformResults.push_back(res->padOp);

   transformResults.push_back(res->expandShapeOp);

   transformResults.push_back(res->transposeOp);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // LowerUnPackOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::LowerUnPackOp::applyToOne(

     transform::TransformRewriter &rewriter, tensor::UnPackOp target,

     transform::ApplyToEachResultList &transformResults,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   FailureOr<LowerUnPackOpResult> res = lowerUnPack(rewriter, target);

   if (failed(res)) {

     DiagnosedSilenceableFailure diag =

         emitSilenceableError()

         << "cannot lower to transpose + collapse + extract";

     diag.attachNote(target->getLoc()) << "target payload op";

     return diag;

   }

   transformResults.push_back(res->emptyOp);

   transformResults.push_back(res->transposeOp);

   transformResults.push_back(res->collapseShapeOp);

   transformResults.push_back(res->extractSliceOp);

   return DiagnosedSilenceableFailure::success();

 }


 //===---------------------------------------------------------------------===//

 // MatchOp

 //===---------------------------------------------------------------------===//


 void transform::MatchOp::build(OpBuilder &builder, OperationState &result,

                                Value target, ArrayRef<StringRef> opNames) {

   result.addOperands(target);

   result.addAttribute(MatchOp::getOpsAttrName(result.name),

                       builder.getStrArrayAttr(opNames));

   result.addTypes(transform::AnyOpType::get(builder.getContext()));

 }


 void transform::MatchOp::build(OpBuilder &builder, OperationState &result,

                                TypeRange resultTypes, Value target,

                                ArrayRef<StringRef> opNames) {

   result.addOperands(target);

   result.addAttribute(MatchOp::getOpsAttrName(result.name),

                       builder.getStrArrayAttr(opNames));

   result.addTypes(resultTypes);

 }


 DiagnosedSilenceableFailure

 transform::MatchOp::apply(transform::TransformRewriter &rewriter,

                           transform::TransformResults &results,

                           transform::TransformState &state) {

   llvm::StringSet<> strs;

   if (getOps().has_value())

     strs.insert(getOps()->getAsValueRange<StringAttr>().begin(),

                 getOps()->getAsValueRange<StringAttr>().end());


   auto payloadOps = state.getPayloadOps(getTarget());

   if (!llvm::hasSingleElement(payloadOps)) {

     return emitDefiniteFailure("requires exactly one target handle");

   }


   SmallVector<Operation *> res;

   bool incorrectNumOperandTypes = false;

   auto matchFun = [&](Operation *op) {

     if (getOps().has_value() && !strs.contains(op->getName().getStringRef()))

       return;


     // Interfaces cannot be matched by name, just by ID.

     // So we specifically encode the interfaces we care about for this op.

     if (getInterface().has_value()) {

       auto iface = getInterface().value();

       if (iface == transform::MatchInterfaceEnum::LinalgOp &&

           !isa<LinalgOp>(op))

         return;

       if (iface == transform::MatchInterfaceEnum::TilingInterface &&

           !isa<TilingInterface>(op))

         return;

       if (iface == transform::MatchInterfaceEnum::LoopLikeInterface &&

           !isa<LoopLikeOpInterface>(op))

         return;

     }


     // Check if all specified attributes match.

     if (getOpAttrs().has_value()) {

       DictionaryAttr opAttrs = getOpAttrs().value();

       for (NamedAttribute attr : opAttrs) {

         if (attr.getName() == getInterfaceAttrName() ||

             attr.getName() == getOpsAttrName())

           continue;

         if (!op->hasAttr(attr.getName()))

           return;

         if (op->getAttr(attr.getName()) != attr.getValue())

           return;

       }

     }


     if (getFilterResultType().has_value()) {

       Type t = getFilterResultType().value();

       if (op->getNumResults() != 1 || op->getResultTypes().front() != t)

         return;

     }


     if (getFilterOperandTypes().has_value()) {

       mlir::ArrayAttr types = getFilterOperandTypes().value();

       auto operandTypes = op->getOperandTypes();


       if (types.size() == 1) {

         // All the operands must must be equal to the specified type

         auto typeattr =

             dyn_cast<mlir::TypeAttr>(getFilterOperandTypes().value()[0]);

         Type t = cast<::mlir::Type>(typeattr.getValue());

         if (!llvm::all_of(op->getOperandTypes(),

                           [&](Type operandType) { return operandType == t; }))

           return;

       } else {

         // The operand types must match all the types in the list (in the same

         // order in with they are specified)

         if (types.size() != operandTypes.size()) {

           incorrectNumOperandTypes = true;

           return;

         }


         for (auto [attr, operandType] :

              llvm::zip_equal(getFilterOperandTypes().value(), operandTypes)) {

           auto typeattr = cast<mlir::TypeAttr>(attr);

           Type type = cast<::mlir::Type>(typeattr.getValue());


           if (type != operandType)

             return;

         }

       }

     }


     // All constraints are satisfied.

     res.push_back(op);

     return;

   };


   (*payloadOps.begin())->walk(matchFun);

   if (incorrectNumOperandTypes)

     return emitDefiniteFailure("If filter_operand_types contains more than a "

                                "type, then it must contain as much types as "

                                "the number of operands in the target ops");

   results.set(cast<OpResult>(getResult()), res);

   return DiagnosedSilenceableFailure::success();

 }


 //===---------------------------------------------------------------------===//

 // MultiTileSizesOp

 //===---------------------------------------------------------------------===//


 static void printMultitileSizesTypes(OpAsmPrinter &printer, Operation *op,

                                      Type targetType, Type lowSizeType, Type,

                                      Type) {

   printer.printFunctionalType(TypeRange{targetType}, TypeRange{lowSizeType});

 }


 static ParseResult parseMultitileSizesTypes(OpAsmParser &parser,

                                             Type &targetType, Type &lowSizeType,

                                             Type &highSizeType,

                                             Type &splitPointType) {

   FunctionType funcType;

   llvm::SMLoc typeLoc = parser.getCurrentLocation();

   if (failed(parser.parseType<FunctionType>(funcType)))

     return failure();


   if (funcType.getNumInputs() != 1 || funcType.getNumResults() != 1) {

     parser.emitError(typeLoc) << "expects a trailing functional type with one "

                                  "argument and one result";

   }

   targetType = funcType.getInput(0);

   lowSizeType = highSizeType = splitPointType = funcType.getResult(0);


   return success();

 }


 DiagnosedSilenceableFailure transform::MultiTileSizesOp::applyToOne(

     transform::TransformRewriter &rewriter, LinalgOp target,

     transform::ApplyToEachResultList &results, TransformState &state) {

   if (isa<TransformParamTypeInterface>(getLowSize().getType())) {

     if (target.hasDynamicShape()) {

       auto diag = emitSilenceableError()

                   << "cannot compute parametric tile sizes for dynamically "

                      "shaped payload op";

       diag.attachNote(target->getLoc()) << "payload op";

       return diag;

     }


     FailureOr<StaticMultiSizeSpecification> spec = computeStaticMultiTileSizes(

         target, getDimension(), getTargetSize(), getDivisor());

     if (failed(spec)) {

       return emitSilenceableError()

              << "failed to compute multi-size tiling sizes";

     }


     Builder builder(target.getContext());

     results.assign(llvm::map_range(

         ArrayRef<int64_t>({spec->lowTileSize, spec->highTileSize,

                            spec->lowTileSize * spec->lowTripCount}),

         [&builder, this](int64_t value) {

           return builder.getIntegerAttr(

               cast<ParamType>(getLowSize().getType()).getType(), value);

         }));

     return DiagnosedSilenceableFailure::success();

   }


   OpBuilder builder(target.getContext());

   builder.setInsertionPoint(target);

   OpFoldResult targetSize = builder.getIndexAttr(getTargetSize());

   OpFoldResult divisor = builder.getIndexAttr(getDivisor());

   FailureOr<MultiSizeSpecification> spec = computeMultiTileSizes(

       builder, target, getDimension(), targetSize, divisor);

   if (failed(spec)) {

     return emitSilenceableError() << "could not generate tile size computation";

   }


   AffineExpr s0 = builder.getAffineSymbolExpr(0);

   AffineExpr s1 = builder.getAffineSymbolExpr(1);

   Operation *splitPoint =

       affine::makeComposedAffineApply(builder, target.getLoc(), s0 * s1,

                                       {spec->lowTileSize, spec->lowTripCount});

   Operation *lowTileSize = spec->lowTileSize.getDefiningOp();

   Operation *highTileSize = spec->highTileSize.getDefiningOp();

   assert(lowTileSize && highTileSize && splitPoint &&

          "tile sizes are not produced by operations");

   results.reserve(results.size() + 3);

   results.push_back(lowTileSize);

   results.push_back(highTileSize);

   results.push_back(splitPoint);

   return DiagnosedSilenceableFailure::success();

 }


 void transform::MultiTileSizesOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   onlyReadsHandle(getTargetMutable(), effects);

   producesHandle(getOperation()->getOpResults(), effects);

   if (isa<TransformParamTypeInterface>(getLowSize().getType()))

     onlyReadsPayload(effects);

   else

     modifiesPayload(effects);

 }


 LogicalResult transform::MultiTileSizesOp::verify() {

   if (getLowSize().getType() != getHighSize().getType() ||

       getLowSize().getType() != getSplitPoint().getType()) {

     return emitOpError() << "expects all results type to be the same";

   }

   return success();

 }


 //===---------------------------------------------------------------------===//

 // PackOp

 //===---------------------------------------------------------------------===//


 void transform::PackOp::build(OpBuilder &builder, OperationState &result,

                               Value target,

                               ArrayRef<OpFoldResult> mixedPackedSizes) {

   SmallVector<int64_t> staticPackedSizes;

   SmallVector<Value> dynamicPackedSizes;

   dispatchIndexOpFoldResults(mixedPackedSizes, dynamicPackedSizes,

                              staticPackedSizes);

   // Call the default builder which sets up the proper operands segment sizes

   // attributes for multiple variadic operands. In the absence of this, horrible

   // bugs ensue.

   Type linalgOpHType = transform::OperationType::get(

       builder.getContext(), GenericOp::getOperationName());

   build(builder, result,

         /*resultType=*/linalgOpHType,

         /*target=*/target,

         /*dynamic_sizes=*/dynamicPackedSizes,

         /*static_sizes=*/builder.getDenseI64ArrayAttr(staticPackedSizes));

 }


 SmallVector<OpFoldResult> transform::PackOp::getMixedPackedSizes() {

   Builder b(getContext());

   return getMixedValues(getStaticPackedSizes(), getPackedSizes(), b);

 }


 DiagnosedSilenceableFailure

 transform::PackOp::apply(transform::TransformRewriter &rewriter,

                          transform::TransformResults &transformResults,

                          transform::TransformState &state) {

   auto targetOps = state.getPayloadOps(getTarget());

   // If nothing to pack, propagate success.

   if (std::empty(targetOps)) {

     transformResults.set(cast<OpResult>(getPackedOp()),

                          ArrayRef<Operation *>({}));

     return DiagnosedSilenceableFailure::success();

   }

   // Fail on multi-op handles.

   auto linalgOp = dyn_cast<LinalgOp>(*targetOps.begin());

   if (!llvm::hasSingleElement(targetOps) || !linalgOp) {

     return emitSilenceableError()

            << "requires target to map to exactly 1 LinalgOp (got "

            << llvm::range_size(targetOps) << ")";

   }

   // Fail on mismatched number of pack sizes.

   if (getMixedPackedSizes().size() != linalgOp.getNumLoops()) {

     return emitSilenceableError()

            << "requires number of packed sizes match the number of loops ("

            << getMixedPackedSizes().size() << " vs " << linalgOp.getNumLoops()

            << ")";

   }


   // Unpack handles to constants or actual SSA index values.

   SmallVector<OpFoldResult> packedSizes;

   DiagnosedSilenceableFailure status = unpackSingleIndexResultPayloadOperations(

       state, *this, packedSizes, getMixedPackedSizes());


   rewriter.setInsertionPoint(linalgOp);

   FailureOr<PackResult> maybeResult = pack(rewriter, linalgOp, packedSizes);

   if (failed(maybeResult))

     return emitDefiniteFailure("data tiling failed");


   transformResults.set(cast<OpResult>(getPackedOp()),

                        {maybeResult->packedLinalgOp.getOperation()});

   return DiagnosedSilenceableFailure::success();

 }


 void transform::PackOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   transform::consumesHandle(getTargetMutable(), effects);

   transform::onlyReadsHandle(getPackedSizesMutable(), effects);

   transform::producesHandle(getOperation()->getOpResults(), effects);

   transform::modifiesPayload(effects);

 }


 //===---------------------------------------------------------------------===//

 // PackGreedilyOp.

 //===---------------------------------------------------------------------===//


 LogicalResult transform::PackGreedilyOp::verify() {

   if (!isPermutationVector(getMatmulInnerDimsOrder())) {

     return emitOpError() << getMatmulInnerDimsOrderAttrName()

                          << " is not a valid permutation";

   }

   // TODO: relax to allow empty once we have another strategy than just matmul.

   if (!getMatmulPaddedSizesNextMultipleOf().empty()) {

     for (auto [s, nmo] :

          llvm::zip_equal(getMixedMatmulPackedSizes(),

                          getMatmulPaddedSizesNextMultipleOf())) {

       std::optional<int64_t> maybeStaticPackedSize = getConstantIntValue(s);

       if (nmo != 0 &&

           (!maybeStaticPackedSize.has_value() || *maybeStaticPackedSize != 0)) {

         return emitOpError() << "at most one of the packed_size and the "

                                 "padded_sizes_next_multiple_of can be nonzero "

                                 "for the matmul strategy";

       }

     }

   }

   return success();

 }


 DiagnosedSilenceableFailure

 PackGreedilyOp::apply(transform::TransformRewriter &rewriter,

                       transform::TransformResults &transformResults,

                       transform::TransformState &state) {

   SmallVector<Operation *> results;

   for (Operation *op : state.getPayloadOps(getTarget())) {

     auto linalgOp = dyn_cast<LinalgOp>(op);

     if (!linalgOp)

       continue;

     // linalgOp will be replaced and the insertion point may be invalidated if

     // we set it before -> set it after.

     rewriter.setInsertionPointAfter(linalgOp);

     // Failing to pack greedily is perfectly fine.

     // In the future we will want to order packings according to some metric.

     FailureOr<PackResult> packResult = packMatmulGreedily(

         /*rewriter=*/rewriter,

         /*linalgOp=*/linalgOp,

         /*mnkPackedSizes=*/getMixedMatmulPackedSizes(),

         /*mnkPaddedSizesNextMultipleOf=*/

         getMatmulPaddedSizesNextMultipleOf(),

         /*mnkOrder=*/getMatmulInnerDimsOrder());

     if (succeeded(packResult)) {

       results.push_back(packResult->packedLinalgOp);

       continue;

     }

     results.push_back(linalgOp);

   }

   transformResults.set(cast<OpResult>(getPackedOp()), results);

   return DiagnosedSilenceableFailure::success();

 }


 SmallVector<OpFoldResult> PackGreedilyOp::getMixedMatmulPackedSizes() {

   Builder b(getContext());

   return getMixedValues(getStaticMatmulPackedSizes(), getMatmulPackedSizes(),

                         b);

 }


 void transform::PackGreedilyOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   transform::consumesHandle(getTargetMutable(), effects);

   transform::onlyReadsHandle(getMatmulPackedSizesMutable(), effects);

   transform::producesHandle(getOperation()->getOpResults(), effects);

   transform::modifiesPayload(effects);

 }


 //===---------------------------------------------------------------------===//

 // PackTransposeOp

 //===---------------------------------------------------------------------===//


 LogicalResult transform::PackTransposeOp::verify() {

   if (!isPermutationVector(getInnerPerm())) {

     return emitOpError() << getInnerPermAttrName()

                          << " is not a valid permutation";

   }

   if (!isPermutationVector(getOuterPerm())) {

     return emitOpError() << getOuterPermAttrName()

                          << " is not a valid permutation";

   }

   if (getInnerPerm().empty() && getOuterPerm().empty()) {

     return emitOpError() << " at least one of " << getInnerPermAttrName()

                          << " or " << getOuterPermAttrName()

                          << " must be specified";

   }

   return success();

 }


 namespace {

 enum class OuterOrInnerPerm { Outer = 0, Inner = 1 };

 } // namespace


 /// Return true if `permutation` is a valid permutation of the

 /// `outer_dims_perm` (case OuterOrInnerPerm::Outer) or `inner_dims_pos`

 /// (OuterOrInnerPerm::Inner) of the `tensor.pack` or `tensor.unpack` `op.

 /// This is the case when the `permutation` rank matches the rank expected by

 /// `op` and `permutation` is itself a permutation vector.

 /// Return true if either `op` or `permutation` are empty to allow a simpler

 /// polymorphic implementation.

 template <typename RelayoutOpTy>

 bool isValidPackingPermutation(

     RelayoutOpTy op, ArrayRef<int64_t> permutation,

     OuterOrInnerPerm outerOrInnerPerm = OuterOrInnerPerm::Outer) {

   static_assert(

       llvm::is_one_of<RelayoutOpTy, tensor::PackOp, tensor::UnPackOp>::value,

       "applies to only pack or unpack operations");

   if (!op || permutation.empty())

     return true;

   size_t innerRank = op.getInnerDimsPos().size();

   if (outerOrInnerPerm == OuterOrInnerPerm::Inner)

     return permutation.size() == innerRank && isPermutationVector(permutation);

   // op.getOuterDimsPerm() may be empty, in which case it is identity.

   // Don't rely on it.

   if (std::is_same<RelayoutOpTy, tensor::PackOp>::value) {

     return permutation.size() == op.getSourceRank() &&

            isPermutationVector(permutation);

   }

   return permutation.size() == op.getDestRank() &&

          isPermutationVector(permutation);

 }


 DiagnosedSilenceableFailure

 transform::PackTransposeOp::apply(transform::TransformRewriter &rewriter,

                                   transform::TransformResults &transformResults,

                                   transform::TransformState &state) {

   auto packOrUnpackOps = state.getPayloadOps(getTargetPackOrUnPackOp());

   auto linalgOps = state.getPayloadOps(getTargetLinalgOp());

   // Step 1. If nothing to pack, propagate success.

   if (std::empty(packOrUnpackOps)) {

     transformResults.set(cast<OpResult>(getPackedOp()), {});

     transformResults.set(cast<OpResult>(getPackOp()), {});

     transformResults.set(cast<OpResult>(getUnPackOp()), {});

     return DiagnosedSilenceableFailure::success();

   }


   // Step 2. Bunch of runtime sanity check and error messages.

   // Step 2.1. Fail on multi-op handles.

   if (!llvm::hasSingleElement(packOrUnpackOps) ||

       !llvm::hasSingleElement(linalgOps)) {

     return emitSilenceableError()

            << "requires target to map to exactly 1 "

               "packing op and 1 packed op ("

            << "got " << llvm::range_size(packOrUnpackOps) << " and "

            << llvm::range_size(linalgOps) << ")";

   }


   // Step 2.2. Fail on wrong type.

   auto packOp = dyn_cast<tensor::PackOp>(*packOrUnpackOps.begin());

   auto unPackOp = dyn_cast<tensor::UnPackOp>(*packOrUnpackOps.begin());

   if ((!packOp && !unPackOp)) {

     return emitSilenceableError() << "requires target to map to a "

                                      "tensor.pack or tensor.unpack";

   }

   LinalgOp linalgOpTarget = dyn_cast<LinalgOp>(*linalgOps.begin());

   if (!linalgOpTarget)

     return emitSilenceableError() << "requires a LinalgOp target";


   // Step 2.3. Fail if we can't get the producer / consumer Linalg op.

   LinalgOp linalgOp;

   if (packOp && packOp.getResult().hasOneUse())

     linalgOp = dyn_cast<LinalgOp>(*(packOp.getResult().getUsers().begin()));

   else if (unPackOp)

     linalgOp = unPackOp.getSource().getDefiningOp<LinalgOp>();

   if (linalgOp != linalgOpTarget) {

     auto errorMsg =

         packOp ? StringLiteral{"not a single use by the LinalgOp target"}

                : StringLiteral{"not produced by the LinalgOp target"};

     return emitSilenceableError() << errorMsg;

   }


   // Step 2.4. If we have an UnPackOp, we need to fetch the symmetrical

   // PackOp.

   if (unPackOp) {

     assert(!packOp && "packOp must be null on entry when unPackOp is not null");

     OpOperand *packUse = linalgOp.getDpsInitOperand(

         cast<OpResult>(unPackOp.getSource()).getResultNumber());

     packOp = dyn_cast_or_null<tensor::PackOp>(packUse->get().getDefiningOp());

     if (!packOp || !packOp.getResult().hasOneUse())

       return emitSilenceableError() << "could not find matching pack op";

   }


   // Step 2.5. Fail if any permutation does not validate.

   for (auto permType : {OuterOrInnerPerm::Outer, OuterOrInnerPerm::Inner}) {

     ArrayRef<int64_t> perm =

         (permType == OuterOrInnerPerm::Outer) ? getOuterPerm() : getInnerPerm();

     auto errorMsg = (permType == OuterOrInnerPerm::Outer)

                         ? StringLiteral{"invalid outer_perm"}

                         : StringLiteral{"invalid inner_perm"};

     if (!isValidPackingPermutation(packOp, perm, permType) ||

         !isValidPackingPermutation(unPackOp, perm, permType)) {

       Operation *packOrUnpackOp =

           unPackOp ? unPackOp.getOperation() : packOp.getOperation();

       return emitSilenceableError() << errorMsg << ": " << *packOrUnpackOp;

     }

   }


   // From here on, packOp and linalgOp are always present, unPackOp may or may

   // not be present.

   assert(packOp && linalgOp && "unexpected null op");


   // Step 3. Actually transpose the ops.

   FailureOr<PackTransposeResult> res = packTranspose(

       rewriter, packOp, linalgOp, unPackOp, getOuterPerm(), getInnerPerm());

   // Preconditions have been checked, it is an error to fail here.

   assert(succeeded(res) && "unexpected packTranspose failure");


   // Step 4. Return results.

   transformResults.set(cast<OpResult>(getPackOp()), {res->transposedPackOp});

   transformResults.set(cast<OpResult>(getPackedOp()),

                        {res->transposedLinalgOp});

   if (unPackOp) {

     transformResults.set(cast<OpResult>(getUnPackOp()),

                          {res->transposedUnPackOp});

   } else {

     transformResults.set(cast<OpResult>(getUnPackOp()), {});

   }


   return DiagnosedSilenceableFailure::success();

 }


 //===---------------------------------------------------------------------===//

 // PadOp

 //===---------------------------------------------------------------------===//


 void transform::PadOp::build(OpBuilder &b, OperationState &result, Value target,

                              ArrayRef<int64_t> paddingDimensions,

                              ArrayRef<int64_t> padToMultipleOf,

                              ArrayRef<int64_t> nofoldFlags,

                              ArrayRef<Attribute> transposePaddings,

                              StringRef copyBackOp) {

   auto resultType = transform::AnyOpType::get(b.getContext());

   return build(/*builder=*/b,

                /*result=*/result,

                /*types=*/TypeRange{resultType, resultType},

                /*target=*/target,

                /*paddingValues=*/ArrayAttr(), // let inference handle this

                /*paddingDimensions=*/b.getI64ArrayAttr(paddingDimensions),

                /*padToMultipleOf=*/ValueRange{},

                /*padToMultipleOf=*/

                (padToMultipleOf.empty()

                     ? DenseI64ArrayAttr()

                     : b.getDenseI64ArrayAttr(padToMultipleOf)),

                /*nofoldFlags=*/b.getI64ArrayAttr(nofoldFlags),

                /*transposePaddings=*/b.getArrayAttr(transposePaddings),

                /*copyBackOp=*/b.getStringAttr(copyBackOp));

 }


 void transform::PadOp::build(OpBuilder &b, OperationState &result, Value target,

                              ArrayRef<int64_t> paddingDimensions,

                              ArrayRef<OpFoldResult> mixedPadToMultipleOf,

                              ArrayRef<int64_t> nofoldFlags,

                              ArrayRef<Attribute> transposePaddings,

                              StringRef copyBackOp) {

   auto resultType = transform::AnyOpType::get(b.getContext());

   SmallVector<int64_t> staticPadToMultipleOf;

   SmallVector<Value> dynamicPadToMultipleOf;

   dispatchIndexOpFoldResults(mixedPadToMultipleOf, dynamicPadToMultipleOf,

                              staticPadToMultipleOf);

   return build(/*builder=*/b,

                /*result=*/result,

                /*types=*/TypeRange{resultType, resultType},

                /*target=*/target,

                /*paddingValues=*/ArrayAttr(), // let inference handle this

                /*paddingDimensions=*/b.getI64ArrayAttr(paddingDimensions),

                /*padToMultipleOf=*/dynamicPadToMultipleOf,

                /*padToMultipleOf=*/staticPadToMultipleOf,

                /*nofoldFlags=*/b.getI64ArrayAttr(nofoldFlags),

                /*transposePaddings=*/b.getArrayAttr(transposePaddings),

                /*copyBackOp=*/b.getStringAttr(copyBackOp));

 }


 void PadOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   consumesHandle(getTargetMutable(), effects);

   onlyReadsHandle(getPadToMultipleOfMutable(), effects);

   producesHandle(getOperation()->getOpResults(), effects);

   modifiesPayload(effects);

 }


 SmallVector<OpFoldResult> PadOp::getMixedPadToMultipleOf() {

   Builder b(getContext());

   return getMixedValues(getStaticPadToMultipleOf(), getPadToMultipleOf(), b);

 }


 DiagnosedSilenceableFailure

 transform::PadOp::apply(transform::TransformRewriter &rewriter,

                         transform::TransformResults &results,

                         transform::TransformState &state) {

   auto transformOp = cast<TransformOpInterface>(getOperation());

   SmallVector<Operation *> paddedOps, padOps, copyBackOps;


   for (Operation *target : state.getPayloadOps(getTarget())) {

     auto linalgTarget = dyn_cast<LinalgOp>(target);

     if (!linalgTarget) {

       auto diag = emitSilenceableError() << "expected LinalgOp target";

       diag.attachNote(target->getLoc()) << "target op";

       return diag;

     }


     // Convert the integer packing flags to booleans.

     SmallVector<bool> nofoldFlags;

     for (int64_t packPadding :

          extractFromIntegerArrayAttr<int64_t>(getNofoldFlags()))

       nofoldFlags.push_back(static_cast<bool>(packPadding));


     // Convert the padding values to attributes.

     SmallVector<Attribute> paddingValues;

     for (auto const &it :

          llvm::zip(getPaddingValues(), linalgTarget->getOperandTypes())) {

       auto attr = dyn_cast<TypedAttr>(std::get<0>(it));

       if (!attr) {

         emitOpError("expects padding values to be typed attributes");

         return DiagnosedSilenceableFailure::definiteFailure();

       }

       Type elementType = getElementTypeOrSelf(std::get<1>(it));

       // Try to parse string attributes to obtain an attribute of element type.

       if (auto stringAttr = dyn_cast<StringAttr>(attr)) {

         auto parsedAttr = dyn_cast_if_present<TypedAttr>(parseAttribute(

             stringAttr, getContext(), elementType,

             /*numRead=*/nullptr, /*isKnownNullTerminated=*/true));

         if (!parsedAttr || parsedAttr.getType() != elementType) {

           auto diag = this->emitOpError("expects a padding that parses to ")

                       << elementType << ", got " << std::get<0>(it);

           diag.attachNote(linalgTarget.getLoc()) << "when applied to this op";

           return DiagnosedSilenceableFailure::definiteFailure();

         }

         paddingValues.push_back(parsedAttr);

         continue;

       }

       // Otherwise, add the attribute directly.

       if (attr.getType() != elementType) {

         auto diag = this->emitOpError("expects a padding value of type ")

                     << elementType << ", got " << attr;

         diag.attachNote(linalgTarget.getLoc()) << "when applied to this op";

         return DiagnosedSilenceableFailure::definiteFailure();

       }

       paddingValues.push_back(attr);

     }


     // Extract the transpose vectors.

     SmallVector<SmallVector<int64_t>> transposePaddings;

     for (Attribute transposeVector : cast<ArrayAttr>(getTransposePaddings()))

       transposePaddings.push_back(extractFromIntegerArrayAttr<int64_t>(

           cast<ArrayAttr>(transposeVector)));


     LinalgOp paddedOp;

     LinalgPaddingOptions options;

     options.paddingDimensions =

         extractFromIntegerArrayAttr<int64_t>(getPaddingDimensions());


     SmallVector<int64_t> padToMultipleOf;

     DiagnosedSilenceableFailure status = reifyMixedParamAndHandleResults(

         state, transformOp, getMixedPadToMultipleOf(), padToMultipleOf);

     if (!status.succeeded())

       return status;

     if (padToMultipleOf.empty())

       padToMultipleOf =

           SmallVector<int64_t>(options.paddingDimensions.size(), 1);


     options.padToMultipleOf = padToMultipleOf;

     options.paddingValues = paddingValues;

     options.nofoldFlags = nofoldFlags;

     if (getCopyBackOp() ==

         bufferization::MaterializeInDestinationOp::getOperationName()) {

       options.copyBackOp = LinalgPaddingOptions::CopyBackOp::

           BufferizationMaterializeInDestination;

     } else if (getCopyBackOp() == linalg::CopyOp::getOperationName()) {

       options.copyBackOp = LinalgPaddingOptions::CopyBackOp::LinalgCopy;

     } else if (getCopyBackOp() == kCopyOpNone) {

       options.copyBackOp = LinalgPaddingOptions::CopyBackOp::None;

     } else {

       llvm_unreachable("unsupported copy_back op");

     }


     SmallVector<Value> replacements;

     SmallVector<tensor::PadOp> newPadOps;

     if (failed(rewriteAsPaddedOp(rewriter, linalgTarget, options, paddedOp,

                                  replacements, newPadOps))) {

       auto diag = emitSilenceableError() << "failed to pad op";

       diag.attachNote(target->getLoc()) << "target op";

       return diag;

     }


     // We need to perform our own replacement here because this API is still

     // used in patterns that "pad and hoist", for which the replacement values

     // need to be different.

     // TODO: clean this up and stop "pad and hoist" behavior more globally now

     // that we have more composable abstractions.

     rewriter.replaceOp(linalgTarget, replacements);

     paddedOps.push_back(paddedOp);

     padOps.append(newPadOps.begin(), newPadOps.end());

     if (options.copyBackOp != LinalgPaddingOptions::CopyBackOp::None) {

       for (Value v : replacements) {

         Operation *copyBackOp = v.getDefiningOp();

         if (!llvm::is_contained(copyBackOps, copyBackOp))

           copyBackOps.push_back(copyBackOp);

       }

     }

   }


   results.set(cast<OpResult>(getPadded()), paddedOps);

   results.set(cast<OpResult>(getPad()), padOps);

   results.set(cast<OpResult>(getCopy()), copyBackOps);

   return DiagnosedSilenceableFailure::success();

 }


 LogicalResult transform::PadOp::verify() {

   SmallVector<int64_t> nofoldFlags =

       extractFromIntegerArrayAttr<int64_t>(getNofoldFlags());

   if (any_of(nofoldFlags, [](int64_t packPadding) {

         return packPadding != 0 && packPadding != 1;

       })) {

     return emitOpError()

            << "expects nofold_flags to contain booleans (0/1), found "

            << getNofoldFlags();

   }


   SmallVector<int64_t> paddingDimensions =

       extractFromIntegerArrayAttr<int64_t>(getPaddingDimensions());

   if (any_of(paddingDimensions,

              [](int64_t paddingDimension) { return paddingDimension < 0; })) {

     return emitOpError() << "expects padding_dimensions to contain positive "

                             "integers, found "

                          << getPaddingDimensions();

   }

   if (!getMixedPadToMultipleOf().empty()) {

     if (getMixedPadToMultipleOf().size() != paddingDimensions.size()) {

       return emitOpError() << "expects as many multiples as padding_dimensions";

     }

   }

   ArrayAttr transposes = getTransposePaddings();

   for (Attribute attr : transposes) {

     SmallVector<int64_t> transpose = extractFromIntegerArrayAttr<int64_t>(attr);

     auto sequence = llvm::to_vector(llvm::seq<int64_t>(0, transpose.size()));

     if (!std::is_permutation(sequence.begin(), sequence.end(),

                              transpose.begin(), transpose.end())) {

       return emitOpError()

              << "expects transpose_paddings to be a permutation, found "

              << attr;

     }

   }

   if (getCopyBackOp() !=

           bufferization::MaterializeInDestinationOp::getOperationName() &&

       getCopyBackOp() != linalg::CopyOp::getOperationName() &&

       getCopyBackOp() != kCopyOpNone)

     return emitOpError() << "invalid copy_back_op";

   return success();

 }


 //===---------------------------------------------------------------------===//

 // HoistPadOp

 //===---------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::HoistPadBuildPackingLoopNestOp::apply(

     transform::TransformRewriter &rewriter,

     transform::TransformResults &transformResults,

     transform::TransformState &state) {

   auto targetOps = state.getPayloadOps(getTarget());

   auto loopOps = state.getPayloadOps(getLoop());

   if (!llvm::hasSingleElement(targetOps) || !llvm::hasSingleElement(loopOps)) {

     return emitDefiniteFailure()

            << "requires exactly one target and one loop handle (got "

            << llvm::range_size(targetOps) << " and "

            << llvm::range_size(loopOps) << ")";

   }


   auto padOp = dyn_cast_or_null<tensor::PadOp>(*targetOps.begin());

   auto loopOp = dyn_cast_or_null<scf::ForOp>(*loopOps.begin());

   if (!padOp || !loopOp)

     return emitDefiniteFailure() << "requires exactly 2 non-null handles";


   FailureOr<linalg::detail::PackingResult> result =

       linalg::detail::buildPackingLoopNest(rewriter, padOp, loopOp,

                                            getTranspose());

   if (failed(result))

     return emitDefiniteFailure() << "could not build packing loop nest";


   if (result->clonedLoopIvs.empty()) {

     transformResults.set(cast<OpResult>(getPackingLoop()),

                          {result->hoistedPadOp.getOperation()});

     return DiagnosedSilenceableFailure::success();

   }

   auto outerPackedLoop =

       scf::getForInductionVarOwner(result->clonedLoopIvs.front());

   transformResults.set(cast<OpResult>(getPackingLoop()),

                        {outerPackedLoop.getOperation()});

   return DiagnosedSilenceableFailure::success();

 }


 LogicalResult transform::HoistPadBuildPackingLoopNestOp::verify() {

   ArrayRef<int64_t> transpose = getTranspose();

   auto sequence = llvm::to_vector(llvm::seq<int64_t>(0, transpose.size()));

   if (!std::is_permutation(sequence.begin(), sequence.end(), transpose.begin(),

                            transpose.end())) {

     return emitOpError() << "expects transpose to be a permutation, found "

                          << getTranspose();

   }

   return success();

 }


 void transform::HoistPadBuildPackingLoopNestOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   transform::onlyReadsHandle(getTargetMutable(), effects);

   transform::onlyReadsHandle(getLoopMutable(), effects);

   transform::producesHandle(getOperation()->getOpResults(), effects);

   transform::modifiesPayload(effects);

 }


 DiagnosedSilenceableFailure

 transform::HoistPadOp::applyToOne(transform::TransformRewriter &rewriter,

                                   tensor::PadOp target,

                                   transform::ApplyToEachResultList &results,

                                   transform::TransformState &state) {

   tensor::PadOp hoistedPadOp;

   SmallVector<TransposeOp> transposeOps;

   FailureOr<Value> result =

       hoistPaddingOnTensors(rewriter, target, getNumLoops(), getTranspose(),

                             hoistedPadOp, transposeOps);

   if (succeeded(result)) {

     // We need to perform our own replacement here because this API is still

     // used in patterns that "pad and hoist", for which the replacement values

     // need to be different.

     // TODO: clean this up and stop "pad and hoist" behavior more globally now

     // that we have more composable abstractions.

     rewriter.replaceOp(target, *result);

     results.push_back(hoistedPadOp);

     return DiagnosedSilenceableFailure::success();

   }

   return emitDefaultSilenceableFailure(target);

 }


 LogicalResult transform::HoistPadOp::verify() {

   ArrayRef<int64_t> transpose = getTranspose();

   auto sequence = llvm::to_vector(llvm::seq<int64_t>(0, transpose.size()));

   if (!std::is_permutation(sequence.begin(), sequence.end(), transpose.begin(),

                            transpose.end())) {

     return emitOpError() << "expects transpose to be a permutation, found "

                          << getTranspose();

   }

   return success();

 }


 //===----------------------------------------------------------------------===//

 // PromoteOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::PromoteOp::applyToOne(transform::TransformRewriter &rewriter,

                                  LinalgOp target,

                                  transform::ApplyToEachResultList &results,

                                  transform::TransformState &state) {

   LinalgPromotionOptions promotionOptions;

   if (!getOperandsToPromote().empty())

     promotionOptions = promotionOptions.setOperandsToPromote(

         extractFromIntegerArrayAttr<int64_t>(getOperandsToPromote()));

   if (getUseFullTilesByDefault())

     promotionOptions = promotionOptions.setUseFullTileBuffersByDefault(

         getUseFullTilesByDefault());

   if (getUseAlloca())

     promotionOptions = promotionOptions.setUseAlloca(getUseAlloca());

   if (!getUseFullTileBuffers().empty())

     promotionOptions = promotionOptions.setUseFullTileBuffers(

         llvm::to_vector(getUseFullTileBuffers().getAsValueRange<BoolAttr>()));

   if (getAlignment().has_value())

     promotionOptions = promotionOptions.setAlignment(*getAlignment());

   if (getMemorySpace().has_value())

     promotionOptions = promotionOptions.setMemorySpace(*getMemorySpace());


   if (getMapping().has_value()) {

     // The mapping should only contain an element

     auto mapping = *getMapping();

     if (mapping.size() > 1)

       return emitDefaultDefiniteFailure(target);


     auto addressSpace = cast<mlir::gpu::GPUMemorySpaceMappingAttr>(mapping[0]);


     if (addressSpace.getAddressSpace() ==

         mlir::gpu::GPUDialect::getWorkgroupAddressSpace()) {

       promotionOptions =

           promotionOptions

               .setAllocationDeallocationFns(allocateWorkgroupMemory,

                                             deallocateWorkgroupMemory)

               .setCopyInOutFns(copyToWorkgroupMemory, copyToWorkgroupMemory)

               .setUseFullTileBuffers({false, false});

     } else if (addressSpace.getAddressSpace() ==

                mlir::gpu::GPUDialect::getPrivateAddressSpace()) {

       promotionOptions =

           promotionOptions

               .setAllocationDeallocationFns(allocateGPUPrivateMemory,

                                             deallocateGPUPrivateMemory)

               .setCopyInOutFns(copyToGPUPrivateMemory, copyToGPUPrivateMemory)

               .setUseFullTileBuffers({false, false});

     } else {

       return emitDefaultDefiniteFailure(target);

     }

   }


   if (failed(promoteSubviewsPrecondition(target, promotionOptions)))

     return emitDefaultDefiniteFailure(target);


   rewriter.setInsertionPoint(target);

   FailureOr<LinalgOp> res = promoteSubViews(rewriter, target, promotionOptions);

   if (failed(res))

     return emitDefaultDefiniteFailure(target);

   results.push_back(target);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // ReplaceOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::ReplaceOp::apply(transform::TransformRewriter &rewriter,

                             TransformResults &transformResults,

                             TransformState &state) {

   auto payload = state.getPayloadOps(getTarget());


   // Check for invalid targets.

   for (Operation *target : payload) {

     if (target->getNumOperands() > 0)

       return emitDefiniteFailure() << "expected target without operands";

     if (!target->hasTrait<OpTrait::IsIsolatedFromAbove>() &&

         target->getNumRegions() > 0)

       return emitDefiniteFailure()

              << "expected target that is isolated from above";

   }


   // Clone and replace.

   Operation *pattern = &getBodyRegion().front().front();

   SmallVector<Operation *> replacements;

   for (Operation *target : payload) {

     if (getOperation()->isAncestor(target))

       continue;

     rewriter.setInsertionPoint(target);

     Operation *replacement = rewriter.clone(*pattern);

     rewriter.replaceOp(target, replacement->getResults());

     replacements.push_back(replacement);

   }

   transformResults.set(cast<OpResult>(getReplacement()), replacements);

   return DiagnosedSilenceableFailure::success();

 }


 void transform::ReplaceOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   consumesHandle(getTargetMutable(), effects);

   producesHandle(getOperation()->getOpResults(), effects);

   modifiesPayload(effects);

 }


 LogicalResult transform::ReplaceOp::verify() {

   if (!getBodyRegion().hasOneBlock())

     return emitOpError() << "expected one block";

   if (std::distance(getBodyRegion().front().begin(),

                     getBodyRegion().front().end()) != 1)

     return emitOpError() << "expected one operation in block";

   Operation *replacement = &getBodyRegion().front().front();

   if (replacement->getNumOperands() > 0)

     return replacement->emitOpError()

            << "expected replacement without operands";

   if (!replacement->hasTrait<OpTrait::IsIsolatedFromAbove>() &&

       replacement->getNumRegions() > 0)

     return replacement->emitOpError()

            << "expect op that is isolated from above";

   return success();

 }


 //===----------------------------------------------------------------------===//

 // ScalarizeOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::ScalarizeOp::applyToOne(transform::TransformRewriter &rewriter,

                                    LinalgOp target,

                                    transform::ApplyToEachResultList &results,

                                    transform::TransformState &state) {

   scf::SCFTilingOptions tilingOptions;

   tilingOptions.setTileSizeComputationFunction([&](OpBuilder &b, Operation *) {

     SmallVector<OpFoldResult> tileSizes;

     Location loc = target.getLoc();

     SmallVector<OpFoldResult> allShapeSizes =

         target.createFlatListOfOperandDims(b, loc);

     AffineMap map = target.getShapesToLoopsMap();

     if (!map)

       return tileSizes;

     SmallVector<OpFoldResult> shapeSizes =

         affine::makeComposedFoldedMultiResultAffineApply(rewriter, loc, map,

                                                          allShapeSizes);

     // If the shape size is dynamic, tile by 1.

     // Otherwise, do not tile (i.e. tile size 0).

     for (OpFoldResult shapeSize : shapeSizes) {

       tileSizes.push_back(getConstantIntValue(shapeSize) ? b.getIndexAttr(0)

                                                          : b.getIndexAttr(1));

     }

     return tileSizes;

   });

   SmallVector<int64_t> emptyTileSizes;

   rewriter.setInsertionPoint(target);

   FailureOr<scf::SCFTilingResult> maybeTilingResult = tileUsingSCF(

       rewriter, cast<TilingInterface>(target.getOperation()), tilingOptions);

   if (failed(maybeTilingResult))

     return emitDefaultDefiniteFailure(target);


   if (target->getNumResults())

     rewriter.replaceOp(target, maybeTilingResult->replacements);

   else

     rewriter.eraseOp(target);


   results.reserve(maybeTilingResult->tiledOps.size());

   for (Operation *tiled : maybeTilingResult->tiledOps)

     results.push_back(tiled);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // ConvertToLoopsOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::ConvertToLoopsOp::apply(transform::TransformRewriter &rewriter,

                                    transform::TransformResults &results,

                                    transform::TransformState &state) {

   SmallVector<Operation *> loops;

   for (Operation *target : state.getPayloadOps(getTarget())) {

     auto tilingOp = dyn_cast<TilingInterface>(*target);

     if (!target) {

       DiagnosedSilenceableFailure diag =

           emitSilenceableError()

           << "expected the payload to implement TilingInterface";

       diag.attachNote(target->getLoc()) << "payload op";

       return diag;

     }

     rewriter.setInsertionPoint(target);

     FailureOr<SmallVector<scf::ForOp>> generatedLoops =

         scf::lowerToLoopsUsingSCFForOp(rewriter, tilingOp);

     if (failed(generatedLoops))

       return emitDefaultDefiniteFailure(target);

     for (scf::ForOp &loop : *generatedLoops) {

       loops.push_back(loop.getOperation());

     }

     rewriter.eraseOp(target);

   }

   results.set(cast<OpResult>(getResult()), loops);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // RewriteInDestinationPassingStyleOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::RewriteInDestinationPassingStyleOp::applyToOne(

     transform::TransformRewriter &rewriter, Operation *target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   SmallVector<Operation *> res;

   rewriter.setInsertionPoint(target);

   FailureOr<Operation *> maybeResult =

       TypeSwitch<Operation *, FailureOr<Operation *>>(target)

           .Case<tensor::FromElementsOp, tensor::GenerateOp, tensor::PadOp>(

               [&rewriter](auto op) {

                 return rewriteInDestinationPassingStyle(rewriter, op);

               });

   if (failed(maybeResult))

     return emitDefaultSilenceableFailure(target);

   results.push_back(*maybeResult);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // SplitOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 SplitOp::apply(transform::TransformRewriter &rewriter,

                TransformResults &results, TransformState &state) {

   // Collect the dynamic split points if provided.

   SmallVector<Operation *> payload =

       llvm::to_vector(state.getPayloadOps(getTarget()));


   bool isMultiwaySplit = getMultiway();


   if (isMultiwaySplit && !llvm::hasSingleElement(payload)) {

     return mlir::emitSilenceableFailure(getLoc())

            << "requires exactly one target when "

               "multiway split is enabled (got "

            << llvm::range_size(payload) << ")";

   }


   SmallVector<OpFoldResult> chunkSizes;


   if (!isMultiwaySplit)

     chunkSizes.reserve(payload.size());


   if (getDynamicChunkSizes()) {

     auto diag = DiagnosedSilenceableFailure::success();

     if (isa<TransformHandleTypeInterface>(getDynamicChunkSizes().getType())) {

       chunkSizes = llvm::to_vector(llvm::map_range(

           state.getPayloadOps(getDynamicChunkSizes()), [&](Operation *op) {

             if (op->getNumResults() != 1 ||

                 !op->getResult(0).getType().isIndex()) {

               diag = emitSilenceableError()

                      << "expected dynamic split point handle to point to a "

                         "single-result index-typed op";

               diag.attachNote(op->getLoc()) << "dynamic split point";

             }

             return OpFoldResult(op->getResult(0));

           }));

     } else {

       chunkSizes = llvm::to_vector(

           llvm::map_range(state.getParams(getDynamicChunkSizes()),

                           [](Attribute attr) { return OpFoldResult(attr); }));

     }

     if (diag.isSilenceableFailure())

       return diag;


     // For multiway split, a single payload is expected to have multiple

     // split points.

     if (!isMultiwaySplit && chunkSizes.size() != payload.size()) {

       return emitDefiniteFailure()

              << "expected the dynamic split point handle to point to as "

                 "many operations ("

              << chunkSizes.size() << ") as the target handle ("

              << payload.size() << ")";

     }

   } else {

     chunkSizes.resize(payload.size(),

                       rewriter.getIndexAttr(getStaticChunkSizes()));

   }


   auto checkStructuredOpAndDimensions =

       [&](LinalgOp linalgOp, Location loc) -> DiagnosedSilenceableFailure {

     if (!linalgOp) {

       auto diag = emitSilenceableError() << "only applies to structured ops";

       diag.attachNote(loc) << "target op";

       return diag;

     }


     if (getDimension() >= linalgOp.getNumLoops()) {

       auto diag = emitSilenceableError() << "dimension " << getDimension()

                                          << " does not exist in target op";

       diag.attachNote(loc) << "target op";

       return diag;

     }

     return DiagnosedSilenceableFailure::success();

   };


   auto checkFailureInSplitting =

       [&](bool hasFailed, Location loc) -> DiagnosedSilenceableFailure {

     if (hasFailed) {

       auto diag = emitDefiniteFailure() << "internal failure in splitting";

       diag.attachNote(loc) << "target op";

       return diag;

     }

     return DiagnosedSilenceableFailure::success();

   };


   SmallVector<Operation *> opList;

   if (isMultiwaySplit) {


     // Split a single target operation at multiple points.

     TilingInterface head, tail;

     Operation *target = payload.front();


     LinalgOp linalgOp = dyn_cast<LinalgOp>(target);


     // Check that the target is a valid LinalgOp with correct dimensions.

     DiagnosedSilenceableFailure diag =

         checkStructuredOpAndDimensions(linalgOp, target->getLoc());

     if (diag.isSilenceableFailure())

       return diag;


     for (auto &&[idx, chunkSize] : llvm::enumerate(chunkSizes)) {


       if (idx > 0)

         target = tail.getOperation();


       if (!target)

         break;


       linalgOp = cast<LinalgOp>(target);

       Location loc = target->getLoc();


       rewriter.setInsertionPoint(linalgOp);

       std::tie(head, tail) = linalg::splitOp(

           rewriter, cast<TilingInterface>(linalgOp.getOperation()),

           getDimension(), chunkSize);


       // Propagate errors.

       DiagnosedSilenceableFailure diag =

           checkFailureInSplitting(!head && !tail, loc);

       if (diag.isDefiniteFailure())

         return diag;


       opList.push_back(head.getOperation());

     }


     // Append any leftover parts to the end of the result list.

     if (tail)

       opList.push_back(tail.getOperation());


   } else {

     // Split each target operation.

     SmallVector<Operation *> first, second;

     Operation *noSecondPart = nullptr;

     for (const auto &pair : llvm::zip(payload, chunkSizes)) {

       Operation *target = std::get<0>(pair);

       Location loc = target->getLoc();

       LinalgOp linalgOp = dyn_cast<LinalgOp>(target);

       DiagnosedSilenceableFailure diag =

           checkStructuredOpAndDimensions(linalgOp, target->getLoc());


       if (diag.isSilenceableFailure())

         return diag;


       rewriter.setInsertionPoint(linalgOp);

       std::tie(first.emplace_back(), second.emplace_back()) = linalg::splitOp(

           rewriter, cast<TilingInterface>(linalgOp.getOperation()),

           getDimension(), std::get<1>(pair));


       // Propagate errors.

       DiagnosedSilenceableFailure diagSplit =

           checkFailureInSplitting(!first.back() && !second.back(), loc);

       if (diagSplit.isDefiniteFailure())

         return diag;


       // Do not add null second parts.

       if (!second.back()) {

         noSecondPart = target;

         second.pop_back();

       }

     }


     if (second.size() != first.size() && !second.empty()) {

       auto diag = emitSilenceableError()

                   << "splitting does not produce the second part for a subset "

                      "of targets";

       diag.attachNote()

           << "expected splitting to produce the second part of all "

              "or none of the targets";

       diag.attachNote(noSecondPart->getLoc())

           << "first target with no second part";

       return diag;

     }


     opList.append(first);

     if (second.size())

       opList.append(second);

   }

   results.set(cast<OpResult>(getSplitList()), opList);

   return DiagnosedSilenceableFailure::success();

 }


 void SplitOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   consumesHandle(getTargetMutable(), effects);

   if (getDynamicChunkSizes())

     onlyReadsHandle(getDynamicChunkSizesMutable(), effects);

   producesHandle(getOperation()->getOpResults(), effects);

   modifiesPayload(effects);

 }


 ParseResult SplitOp::parse(OpAsmParser &parser, OperationState &result) {

   OpAsmParser::UnresolvedOperand target, dynamicChunkSizes;

   IntegerAttr staticChunkSizes;

   if (parser.parseOperand(target) || parser.parseKeyword("after"))

     return failure();


   OptionalParseResult dynamicPointParseResult =

       parser.parseOptionalOperand(dynamicChunkSizes);

   if (!dynamicPointParseResult.has_value()) {

     int64_t staticChunkSizesValue;

     if (failed(parser.parseInteger(staticChunkSizesValue)))

       return failure();


     staticChunkSizes =

         parser.getBuilder().getI64IntegerAttr(staticChunkSizesValue);

   }


   Type targetType;

   if (parser.parseOptionalAttrDict(result.attributes) ||

       parser.parseColonType(targetType) ||

       parser.resolveOperand(target, targetType, result.operands)) {

     return failure();

   }

   if (dynamicPointParseResult.has_value()) {

     Type ChunkSizesType;

     if (failed(*dynamicPointParseResult) || parser.parseComma() ||

         parser.parseType(ChunkSizesType) ||

         parser.resolveOperand(dynamicChunkSizes, ChunkSizesType,

                               result.operands)) {

       return failure();

     }


     staticChunkSizes =

         parser.getBuilder().getI64IntegerAttr(ShapedType::kDynamic);

   }


   result.addAttribute(

       SplitOp::getStaticChunkSizesAttrName(result.name).getValue(),

       staticChunkSizes);

   result.addTypes(targetType);

   return success();

 }


 void SplitOp::print(OpAsmPrinter &printer) {

   printer << " " << getTarget() << " after ";

   int64_t staticChunkSize = static_cast<int64_t>(getStaticChunkSizes());

   if (staticChunkSize != ShapedType::kDynamic)

     printer << staticChunkSize;

   else

     printer << getDynamicChunkSizes();

   printer << " ";

   printer.printOptionalAttrDict(getOperation()->getAttrs(),

                                 {getStaticChunkSizesAttrName()});

   printer << " : " << getTarget().getType();

   if (staticChunkSize == ShapedType::kDynamic)

     printer << ", " << getDynamicChunkSizes().getType();

 }


 LogicalResult SplitOp::verify() {

   if ((static_cast<int64_t>(getStaticChunkSizes()) != ShapedType::kDynamic) ^

       (getDynamicChunkSizes() == nullptr)) {

     return emitOpError() << "expects either a dynamic or a static split "

                             "point to be provided";

   }

   return success();

 }


 //===----------------------------------------------------------------------===//

 // SplitReductionOp

 //===----------------------------------------------------------------------===//


 void transform::SplitReductionOp::build(

     OpBuilder &builder, OperationState &result, Value target,

     int64_t splitFactor, int64_t insertSplitDimension, bool innerParallel,

     bool useScalingAlgorithm, bool useAlloc) {

   MLIRContext *ctx = builder.getContext();

   result.addOperands(target);

   result.addAttribute(SplitReductionOp::getSplitFactorAttrName(result.name),

                       builder.getI64IntegerAttr(splitFactor));

   result.addAttribute(

       SplitReductionOp::getInsertSplitDimensionAttrName(result.name),

       builder.getI64IntegerAttr(insertSplitDimension));

   if (innerParallel) {

     result.addAttribute(SplitReductionOp::getInnerParallelAttrName(result.name),

                         builder.getUnitAttr());

   }

   if (useScalingAlgorithm) {

     result.addAttribute(

         SplitReductionOp::getUseScalingAlgorithmAttrName(result.name),

         builder.getUnitAttr());

   }

   if (useAlloc) {

     result.addAttribute(SplitReductionOp::getUseAllocAttrName(result.name),

                         builder.getUnitAttr());

   }

   auto resultType = transform::AnyOpType::get(ctx);

   result.addTypes({resultType, resultType, resultType, resultType});

 }


 DiagnosedSilenceableFailure transform::SplitReductionOp::applyToOne(

     transform::TransformRewriter &rewriter, LinalgOp target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   ControlSplitReductionFn splitFn = [&](LinalgOp) {

     return linalg::SplitReductionOptions{int64_t(getSplitFactor()),

                                          unsigned(getInsertSplitDimension()),

                                          bool(getInnerParallel())};

   };

   rewriter.setInsertionPoint(target);

   FailureOr<SplitReductionResult> splitResult =

       (getUseScalingAlgorithm())

           ? splitReductionByScaling(rewriter, target, splitFn, getUseAlloc())

           : splitReduction(rewriter, target, splitFn, getUseAlloc());

   if (failed(splitResult))

     return emitDefaultDefiniteFailure(target);


   results.push_back(splitResult->initOrAlloc);

   results.push_back(splitResult->fillOp);

   results.push_back(splitResult->splitLinalgOp);

   results.push_back(splitResult->resultCombiningLinalgOp);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // TileReductionUsingForOp

 //===----------------------------------------------------------------------===//


 void transform::TileReductionUsingForOp::build(

     OpBuilder &builder, OperationState &result, Value target,

     ArrayRef<int64_t> staticTileSizes) {

   // Call the default builder.

   // This is future-proof re mixed static-dynamic and setting up the proper

   // operands segment sizes attributes for multiple variadic operands.

   // In the absence of this, horrible bugs ensue.

   // TODO: support mixed static-dynamic (see TileUsingForallOp).

   MLIRContext *ctx = builder.getContext();

   auto opTy = transform::AnyOpType::get(ctx);

   auto staticTileSizesAttr = builder.getDenseI64ArrayAttr(staticTileSizes);

   build(builder, result,

         /*resultTypes=*/TypeRange{opTy, opTy, opTy, opTy},

         /*target=*/target,

         /*tile_sizes=*/staticTileSizesAttr);

 }


 DiagnosedSilenceableFailure transform::TileReductionUsingForOp::applyToOne(

     transform::TransformRewriter &rewriter, LinalgOp target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   FailureOr<scf::SCFReductionTilingResult> result = scf::tileReductionUsingScf(

       rewriter, cast<PartialReductionOpInterface>(target.getOperation()),

       getAsOpFoldResult(rewriter.getI64ArrayAttr(getTileSizes())));


   if (failed(result))

     return emitDefaultSilenceableFailure(target);

   for (Value initValue : result->initialValues)

     results.push_back(initValue.getDefiningOp());

   for (auto parallelTiledOp : result->parallelTiledOps)

     results.push_back(parallelTiledOp);

   for (auto mergeOp : result->mergeOps)

     results.push_back(mergeOp);

   results.push_back(result->loops.front());

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // TileReductionUsingForallOp

 //===----------------------------------------------------------------------===//


 void transform::TileReductionUsingForallOp::build(

     OpBuilder &builder, OperationState &result, Value target,

     ArrayRef<int64_t> staticNumThreads, ArrayRef<int64_t> staticTileSizes,

     ArrayAttr mapping) {

   // Call the default builder.

   // This is future-proof re mixed static-dynamic and setting up the proper

   // operands segment sizes attributes for multiple variadic operands.

   // In the absence of this, horrible bugs ensue.

   // TODO: support mixed static-dynamic (see TileUsingForallOp).

   MLIRContext *ctx = builder.getContext();

   auto opTy = transform::AnyOpType::get(ctx);

   auto staticNumThreadsAttr = builder.getDenseI64ArrayAttr(staticNumThreads);

   auto staticTileSizesAttr = builder.getDenseI64ArrayAttr(staticTileSizes);

   build(builder, result,

         /*resultTypes=*/TypeRange{opTy, opTy, opTy, opTy},

         /*target=*/target,

         /*num_threads=*/staticNumThreadsAttr,

         /*tile_sizes=*/staticTileSizesAttr,

         /*mapping=*/mapping);

 }


 DiagnosedSilenceableFailure transform::TileReductionUsingForallOp::applyToOne(

     transform::TransformRewriter &rewriter, LinalgOp target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   SmallVector<OpFoldResult> numThreads =

       getAsOpFoldResult(rewriter.getI64ArrayAttr(getNumThreads()));

   SmallVector<OpFoldResult> tileSizes =

       getAsOpFoldResult(rewriter.getI64ArrayAttr(getTileSizes()));

   FailureOr<linalg::ForallReductionTilingResult> result =

       linalg::tileReductionUsingForall(

           rewriter, cast<PartialReductionOpInterface>(target.getOperation()),

           numThreads, tileSizes, getMapping());


   if (failed(result)) {

     auto diag = emitSilenceableError() << "could not tile reduction";

     diag.attachNote(target.getLoc()) << "target operation";

     return diag;

   }

   for (Value initValue : result->initialValues)

     results.push_back(initValue.getDefiningOp());

   for (auto parallelTiledOp : result->parallelTiledOps)

     results.push_back(parallelTiledOp);

   for (auto mergeOp : result->mergeOps)

     results.push_back(mergeOp);

   results.push_back(result->loops);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // ContinuousTileSizesOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::ContinuousTileSizesOp::apply(transform::TransformRewriter &rewriter,

                                         TransformResults &transformResults,

                                         TransformState &state) {


   SmallVector<Operation *> targetOps =

       llvm::to_vector(state.getPayloadOps(getTarget()));


   if (!llvm::hasSingleElement(targetOps)) {

     return mlir::emitSilenceableFailure(getLoc())

            << "requires exactly one target (got " << llvm::range_size(targetOps)

            << ")";

   }


   Operation *target = *targetOps.begin();

   auto linalgOp = dyn_cast<LinalgOp>(target);

   auto tileableOp = dyn_cast<TilingInterface>(target);


   if (!linalgOp)

     return emitDefiniteFailure() << "expected Linalg Op";


   OpBuilder builder(linalgOp.getContext());


   if (isa<TransformParamTypeInterface>(getChunkSizes().getType())) {

     if (linalgOp.hasDynamicShape()) {

       auto diag = emitSilenceableError()

                   << "cannot compute parametric tile sizes for dynamically "

                      "shaped payload op";

       diag.attachNote(linalgOp->getLoc()) << "payload op";

       return diag;

     }


     FailureOr<StaticContinuousTileSizeSpecification> spec =

         computeStaticContinuousTileSizes(linalgOp, getDimension(),

                                          getTargetSize());

     if (failed(spec)) {

       return emitSilenceableError()

              << "failed to compute multi-size tiling sizes";

     }


     SmallVector<int64_t> chunkSizes;


     for (auto &&[tileSize, tripCount] :

          llvm::zip_equal(spec->tileSizes, spec->tripCounts))

       chunkSizes.push_back(tileSize * tripCount);


     auto getI64AttrsFromI64 = [&](ArrayRef<int64_t> values) {

       return llvm::map_to_vector(values, [&](int64_t value) -> Attribute {

         return builder.getI64IntegerAttr(value);

       });

     };

     transformResults.setParams(cast<OpResult>(getTileSizes()),

                                getI64AttrsFromI64(spec->tileSizes));

     transformResults.setParams(cast<OpResult>(getChunkSizes()),

                                getI64AttrsFromI64(chunkSizes));


     return DiagnosedSilenceableFailure::success();

   }


   builder.setInsertionPoint(linalgOp);


   OpFoldResult targetSize = builder.getIndexAttr(getTargetSize());

   unsigned dimension = getDimension();


   FailureOr<ContinuousTileSizeSpecification> spec = computeContinuousTileSizes(

       builder, tileableOp, dimension, targetSize, true);

   if (failed(spec)) {

     return emitSilenceableError() << "could not generate tile size computation";

   }


   AffineExpr s0 = builder.getAffineSymbolExpr(0);

   AffineExpr s1 = builder.getAffineSymbolExpr(1);

   auto apply = [&](AffineExpr expr, ArrayRef<OpFoldResult> ofrs) -> Value {

     return affine::makeComposedAffineApply(builder, linalgOp->getLoc(), expr,

                                            ofrs);

   };


   SmallVector<Value> chunkSizes;

   Value splitPoint;

   for (auto &&[tileSize, tripCount] :

        llvm::zip_equal(spec->tileSizes, spec->tripCounts)) {

     splitPoint = apply(s0 * s1, {tileSize, tripCount});

     chunkSizes.push_back(splitPoint);

   }


   auto getDefiningOps = [&](ArrayRef<Value> values) {

     return llvm::map_to_vector(values, [&](Value value) -> Operation * {

       return value.getDefiningOp();

     });

   };


   transformResults.set(cast<OpResult>(getTileSizes()),

                        getDefiningOps(spec->tileSizes));

   transformResults.set(cast<OpResult>(getChunkSizes()),

                        getDefiningOps(chunkSizes));


   return DiagnosedSilenceableFailure::success();

 }


 LogicalResult transform::ContinuousTileSizesOp::verify() {


   if (getTileSizes().getType() != getChunkSizes().getType()) {

     return emitOpError() << "expects all results type to be the same";

   }


   return success();

 }


 void transform::ContinuousTileSizesOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   if (isa<TransformParamTypeInterface>(getTileSizes().getType()))

     onlyReadsPayload(effects);

   else

     modifiesPayload(effects);

   onlyReadsHandle(getTargetMutable(), effects);

   producesHandle(getOperation()->getOpResults(), effects);

 }


 static void printContinuousTileSizeTypes(OpAsmPrinter &printer, Operation *op,

                                          Type targetType, Type tile_sizes,

                                          Type) {

   printer.printFunctionalType(TypeRange{targetType}, TypeRange{tile_sizes});

 }


 static ParseResult parseContinuousTileSizeTypes(OpAsmParser &parser,

                                                 Type &targetType,

                                                 Type &tileSizesType,

                                                 Type &chunkSizesType) {

   FunctionType funcType;

   llvm::SMLoc typeLoc = parser.getCurrentLocation();

   if (failed(parser.parseType<FunctionType>(funcType)))

     return failure();


   if (funcType.getNumInputs() != 1 || funcType.getNumResults() != 1) {

     parser.emitError(typeLoc) << "expects a trailing functional type with one "

                                  "argument and one result";

   }

   targetType = funcType.getInput(0);

   tileSizesType = chunkSizesType = funcType.getResult(0);


   return success();

 }


 //===----------------------------------------------------------------------===//

 // TileUsingForOp

 //===----------------------------------------------------------------------===//


 void transform::TileUsingForOp::build(

     OpBuilder &builder, OperationState &result, TypeRange loopTypes,

     Value target, ArrayRef<int64_t> staticTileSizes,

     ArrayRef<int64_t> interchange,

     std::optional<ArrayRef<bool>> scalableSizes) {

   return build(builder, result, loopTypes,

                /*target=*/target,

                /*mixedTileSizes=*/

                getAsOpFoldResult(builder.getI64ArrayAttr(staticTileSizes)),

                interchange, scalableSizes);

 }


 void transform::TileUsingForOp::build(

     OpBuilder &builder, OperationState &result, Value target,

     ArrayRef<int64_t> staticTileSizes, ArrayRef<int64_t> interchange,

     std::optional<ArrayRef<bool>> scalableSizes) {

   build(builder, result, target,

         getAsOpFoldResult(builder.getI64ArrayAttr(staticTileSizes)),

         interchange, scalableSizes);

 }


 void transform::TileUsingForOp::build(

     OpBuilder &builder, OperationState &result, Value target,

     ArrayRef<OpFoldResult> mixedTileSizes, ArrayRef<int64_t> interchange,

     std::optional<ArrayRef<bool>> scalableSizes) {

   // Loop types are automaticaly splat by the callee, setting up one is

   // enough.

   SmallVector<Type> loopTypes(1, builder.getType<transform::AnyOpType>());

   build(builder, result, loopTypes, target, mixedTileSizes, interchange,

         scalableSizes);

 }


 void transform::TileUsingForOp::build(

     OpBuilder &builder, OperationState &result, TypeRange loopTypes,

     Value target, ArrayRef<OpFoldResult> mixedTileSizes,

     ArrayRef<int64_t> interchange,

     std::optional<ArrayRef<bool>> scalableSizes) {

   SmallVector<int64_t> staticTileSizes;

   SmallVector<Value> dynamicTileSizes;

   dispatchIndexOpFoldResults(mixedTileSizes, dynamicTileSizes, staticTileSizes);

   // Call the default builder which sets up the proper operands segment sizes

   // attributes for multiple variadic operands. In the absence of this,

   // horrible bugs ensue.

   auto staticTileSizesAttr = builder.getDenseI64ArrayAttr(staticTileSizes);

   unsigned numExpectedLoops =

       staticTileSizes.size() - llvm::count(staticTileSizes, 0);

   SmallVector<Type> resultTypes;

   resultTypes.reserve(numExpectedLoops);

   assert((loopTypes.size() == 1 || loopTypes.size() == numExpectedLoops) &&

          "expected one loop type or as many as loops");

   if (loopTypes.size() == 1)

     resultTypes.append(numExpectedLoops, loopTypes[0]);

   else

     llvm::append_range(resultTypes, loopTypes);

   SmallVector<bool> expandedScalableSizes(mixedTileSizes.size(), false);

   if (scalableSizes.has_value())

     expandedScalableSizes.assign(scalableSizes->begin(), scalableSizes->end());

   build(builder, result, /*tiled_linalg_op=*/target.getType(),

         /*loops=*/resultTypes,

         /*target=*/target,

         /*dynamic_sizes=*/dynamicTileSizes,

         /*static_sizes=*/staticTileSizesAttr,

         /*interchange=*/builder.getDenseI64ArrayAttr(interchange),

         /*scalable_sizes=*/expandedScalableSizes);

 }


 LogicalResult transform::TileUsingForOp::verify() {

   if (getMixedSizes().size() != getScalableSizes().size())

     return emitOpError("expected same number of sizes (")

            << getMixedSizes().size() << ") and scalable sizes ("

            << getScalableSizes().size() << ")";

   ArrayRef<int64_t> staticSizes = getStaticSizes();

   unsigned numExpectedLoops = staticSizes.size() - llvm::count(staticSizes, 0);

   if (getLoops().size() != numExpectedLoops)

     return emitOpError("expected number of loops to tile (")

            << numExpectedLoops << ") to match number of `loops` results ("

            << getLoops().size() << ")";

   return success();

 }


 DiagnosedSilenceableFailure

 transform::TileUsingForOp::apply(transform::TransformRewriter &rewriter,

                                  TransformResults &transformResults,

                                  TransformState &state) {

   ArrayRef<int64_t> tileSizes = getStaticSizes();


   SmallVector<Operation *> targets =

       llvm::to_vector(state.getPayloadOps(getTarget()));

   SmallVector<SmallVector<Operation *>> dynamicSizeProducers;

   SmallVector<SmallVector<int64_t>> paramSizes;

   dynamicSizeProducers.reserve(getDynamicSizes().size());

   paramSizes.reserve(getDynamicSizes().size());

   for (Value transformValue : getDynamicSizes()) {

     if (isa<ParamType>(transformValue.getType())) {

       dynamicSizeProducers.push_back({});

       ArrayRef<Attribute> params = state.getParams(transformValue);

       paramSizes.push_back(

           llvm::to_vector(llvm::map_range(params, [](Attribute attr) {

             return cast<IntegerAttr>(attr).getValue().getSExtValue();

           })));


       if (paramSizes.back().size() != targets.size()) {

         DiagnosedSilenceableFailure diag =

             emitSilenceableError()

             << "expected as many parameter values ("

             << dynamicSizeProducers.back().size() << ") as target ops ("

             << targets.size() << ")";

         diag.attachNote(transformValue.getLoc()) << "for this parameter";

         return diag;

       }


       continue;

     }

     paramSizes.push_back({});

     dynamicSizeProducers.push_back(

         llvm::to_vector(state.getPayloadOps(transformValue)));


     if (dynamicSizeProducers.back().size() != targets.size()) {

       DiagnosedSilenceableFailure diag =

           emitSilenceableError()

           << "expected as many dynamic size-producing operations ("

           << dynamicSizeProducers.back().size() << ") as target ops ("

           << targets.size() << ")";

       diag.attachNote(transformValue.getLoc()) << "for this handle";

       return diag;

     }


     for (Operation *op : dynamicSizeProducers.back()) {

       if (op->getNumResults() == 1 &&

           isa<IndexType>(op->getResult(0).getType())) {

         continue;

       }


       DiagnosedSilenceableFailure diag =

           emitSilenceableError() << "expected sizes to be produced by ops "

                                     "with a single index-type result";

       diag.attachNote(op->getLoc()) << "size producer op";

       diag.attachNote(transformValue.getLoc()) << "for this handle";

       return diag;

     }

   }


   SmallVector<Operation *> tiled;

   SmallVector<SmallVector<Operation *, 4>, 4> loops;

   loops.resize(getLoops().size());

   auto scalableSizes = getScalableSizes();

   for (auto [i, op] : llvm::enumerate(targets)) {

     auto tilingInterface = dyn_cast<TilingInterface>(op);

     if (!tilingInterface) {

       DiagnosedSilenceableFailure diag =

           emitSilenceableError()

           << "only ops implementing TilingInterface are supported";

       diag.attachNote(op->getLoc()) << "target op";

       return diag;

     }

     if (tileSizes.size() > tilingInterface.getLoopIteratorTypes().size()) {

       DiagnosedSilenceableFailure diag =

           emitSilenceableError()

           << "too many tiles provided, expected at most "

           << tilingInterface.getLoopIteratorTypes().size() << " found "

           << tileSizes.size();

       diag.attachNote(op->getLoc()) << "target op";

       return diag;

     }


     scf::SCFTilingOptions tilingOptions;

     if (tileSizes.empty()) {

       tilingOptions.setTileSizeComputationFunction(

           [](OpBuilder &, Operation *) -> SmallVector<OpFoldResult> {

             return {};

           });

     } else {

       tilingOptions.setTileSizeComputationFunction([&, index = i](OpBuilder &b,

                                                                   Operation *) {

         SmallVector<OpFoldResult> sizes;

         sizes.reserve(tileSizes.size());

         unsigned dynamicIdx = 0;


         for (auto [ofrIdx, ofr] : llvm::enumerate(getMixedSizes())) {

           if (auto attr = llvm::dyn_cast_if_present<Attribute>(ofr)) {

             if (scalableSizes[ofrIdx]) {

               auto val = b.create<arith::ConstantIndexOp>(

                   getLoc(), cast<IntegerAttr>(attr).getInt());

               Value vscale =

                   b.create<vector::VectorScaleOp>(getLoc(), b.getIndexType());

               sizes.push_back(

                   b.create<arith::MulIOp>(getLoc(), val, vscale).getResult());

             } else {

               sizes.push_back(attr);

             }

             continue;

           }

           ArrayRef<Operation *> dynamicSizes = dynamicSizeProducers[dynamicIdx];

           ArrayRef<int64_t> params = paramSizes[dynamicIdx];

           ++dynamicIdx;

           assert((dynamicSizes.empty() ^ params.empty()) &&

                  "expected either dynamic sizes or parameters");

           if (!params.empty()) {

             sizes.push_back(b.getIndexAttr(params[index]));

           } else {

             sizes.push_back(dynamicSizes[index]->getResult(0));

           }

         }

         return sizes;

       });

     }


     tilingOptions.setInterchange(getInterchange());

     FailureOr<scf::SCFTilingResult> maybeTilingResult =

         tileUsingSCF(rewriter, tilingInterface, tilingOptions);

     if (failed(maybeTilingResult))

       return DiagnosedSilenceableFailure::definiteFailure();


     rewriter.replaceOp(op, maybeTilingResult->replacements);


     tiled.append(maybeTilingResult->tiledOps);

     for (const auto &en2 : llvm::enumerate(maybeTilingResult->loops))

       loops[en2.index()].push_back(en2.value());

   }


   transformResults.set(cast<OpResult>(getTiledLinalgOp()), tiled);

   for (const auto &en : llvm::enumerate(loops))

     transformResults.set(cast<OpResult>(getLoops()[en.index()]), en.value());


   return DiagnosedSilenceableFailure::success();

 }


 SmallVector<OpFoldResult> transform::TileUsingForOp::getMixedSizes() {

   ValueRange dynamic = getDynamicSizes();

   ArrayRef<int64_t> tileSizes = getStaticSizes();

   SmallVector<OpFoldResult> results;

   results.reserve(tileSizes.size());

   unsigned dynamicPos = 0;

   Builder builder(getContext());

   for (int64_t size : tileSizes) {

     if (size == ShapedType::kDynamic) {

       results.push_back(dynamic[dynamicPos++]);

     } else {

       results.push_back(builder.getIndexAttr(size));

     }

   }

   return results;

 }


 void transform::TileUsingForOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   consumesHandle(getTargetMutable(), effects);

   onlyReadsHandle(getDynamicSizesMutable(), effects);

   producesHandle(getOperation()->getOpResults(), effects);

   modifiesPayload(effects);

 }


 //===----------------------------------------------------------------------===//

 // TileUsingForallOp

 //===----------------------------------------------------------------------===//


 void transform::TileUsingForallOp::build(OpBuilder &builder,

                                          OperationState &result, Value target,

                                          ArrayRef<int64_t> staticTileSizes,

                                          transform::TileSizesSpec,

                                          ArrayAttr mapping) {

   return build(builder, result,

                /*target=*/target,

                /*mixedTileSizes=*/

                getAsOpFoldResult(builder.getI64ArrayAttr(staticTileSizes)),

                /*_=*/TileSizesSpec(),

                /*mapping=*/mapping);

 }


 void transform::TileUsingForallOp::build(OpBuilder &builder,

                                          OperationState &result, Value target,

                                          ArrayRef<OpFoldResult> mixedTileSizes,

                                          transform::TileSizesSpec,

                                          ArrayAttr mapping) {

   SmallVector<int64_t> staticTileSizes;

   SmallVector<Value> dynamicTileSizes;

   dispatchIndexOpFoldResults(mixedTileSizes, dynamicTileSizes, staticTileSizes);

   // Call the default builder which sets up the proper operands segment sizes

   // attributes for multiple variadic operands. In the absence of this,

   // horrible bugs ensue.

   MLIRContext *ctx = builder.getContext();

   auto operationType = transform::AnyOpType::get(ctx);

   auto staticTileSizesAttr = builder.getDenseI64ArrayAttr(staticTileSizes);

   build(builder, result,

         /*resultTypes=*/TypeRange{operationType, operationType},

         /*target=*/target,

         /*num_threads=*/ValueRange{},

         /*tile_sizes=*/dynamicTileSizes,

         /*packed_num_threads=*/Value(),

         /*packed_tile_sizes=*/Value(),

         /*static_num_threads=*/builder.getDenseI64ArrayAttr({}),

         /*static_tile_sizes=*/staticTileSizesAttr,

         /*mapping=*/mapping);

 }


 void transform::TileUsingForallOp::build(OpBuilder &builder,

                                          OperationState &result, Value target,

                                          ArrayRef<int64_t> staticNumThreads,

                                          transform::NumThreadsSpec,

                                          ArrayAttr mapping) {

   return build(builder, result, target,

                getAsOpFoldResult(builder.getI64ArrayAttr(staticNumThreads)),

                NumThreadsSpec(), mapping);

 }


 void transform::TileUsingForallOp::build(OpBuilder &builder,

                                          OperationState &result, Value target,

                                          ArrayRef<OpFoldResult> mixedNumThreads,

                                          transform::NumThreadsSpec,

                                          ArrayAttr mapping) {

   SmallVector<int64_t> staticNumThreads;

   SmallVector<Value> dynamicNumThreads;

   dispatchIndexOpFoldResults(mixedNumThreads, dynamicNumThreads,

                              staticNumThreads);

   // Call the default builder which sets up the proper operands segment sizes

   // attributes for multiple variadic operands. In the absence of this,

   // horrible bugs ensue.

   MLIRContext *ctx = builder.getContext();

   auto operationType = transform::AnyOpType::get(ctx);

   auto staticNumThreadsAttr = builder.getDenseI64ArrayAttr(staticNumThreads);

   build(builder, result,

         /*resultTypes=*/TypeRange{operationType, operationType},

         /*target=*/target,

         /*num_threads=*/dynamicNumThreads,

         /*tile_sizes=*/ValueRange{},

         /*packed_num_threads=*/Value(),

         /*packed_tile_sizes=*/Value(),

         /*static_num_threads=*/staticNumThreadsAttr,

         /*static_tile_sizes=*/builder.getDenseI64ArrayAttr({}),

         /*mapping=*/mapping);

 }


 /// Given `lbs`, `ubs` and `steps` of loops, return (for each loop), the

 /// normalized upper bound.

 static SmallVector<OpFoldResult>

 normalizeUpperBounds(RewriterBase &rewriter, Location loc,

                      ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs,

                      ArrayRef<OpFoldResult> steps) {

   AffineExpr s0, s1, s2;

   bindSymbols(rewriter.getContext(), s0, s1, s2);

   AffineExpr normalizedUbExpr = (s1 - s0).ceilDiv(s2);

   SmallVector<OpFoldResult> normalizedUbs;

   for (auto [lb, ub, step] : llvm::zip_equal(lbs, ubs, steps)) {

     OpFoldResult normalizedUb = affine::makeComposedFoldedAffineApply(

         rewriter, loc, normalizedUbExpr, {lb, ub, step});

     normalizedUbs.push_back(normalizedUb);

   }

   return normalizedUbs;

 }


 /// When a loop is normalized, the uses of the induction variable within the

 /// loop need to replaced with `original_lb + old_iv * original_step`.

 static SmallVector<Value> denormalizeIndVar(RewriterBase &rewriter,

                                             Location loc, ValueRange ivs,

                                             ArrayRef<OpFoldResult> lbs,

                                             ArrayRef<OpFoldResult> steps) {

   AffineExpr s0, s1;

   AffineExpr d0;

   bindSymbols(rewriter.getContext(), s0, s1);

   bindDims(rewriter.getContext(), d0);

   AffineExpr denormExpr = s0 + d0 * s1;

   SmallVector<Value> denormalizedIvs;


   for (auto [iv, lb, step] : llvm::zip_equal(ivs, lbs, steps)) {

     OpFoldResult denormValue = affine::makeComposedFoldedAffineApply(

         rewriter, loc, denormExpr, ArrayRef<OpFoldResult>{iv, lb, step});

     denormalizedIvs.push_back(

         getValueOrCreateConstantIndexOp(rewriter, loc, denormValue));

   }

   return denormalizedIvs;

 }


 /// Given a `scf.forall` loop return a loop op with the loop bounds

 /// normalized.

 /// TODO: Replace this with a general utility to normalize `scf.forall`.

 /// At the time of writing, this wasnt done since adding this to `scf`

 /// dialect would disallow using of `affine.apply` operations due

 /// to cyclic dependencies. To avoid churn in lit tests

 /// with the change this was added with, defer that to a follow up.

 static scf::ForallOp normalizeForallLoopOp(RewriterBase &rewriter,

                                            scf::ForallOp loop) {

   SmallVector<OpFoldResult> lbs = loop.getMixedLowerBound();

   SmallVector<OpFoldResult> ubs = loop.getMixedUpperBound();

   SmallVector<OpFoldResult> steps = loop.getMixedStep();


   if (llvm::all_of(

           lbs, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 0); }) &&

       llvm::all_of(

           steps, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 1); })) {

     return loop;

   }


   Location loc = loop.getLoc();

   SmallVector<OpFoldResult> normalizedUbs =

       normalizeUpperBounds(rewriter, loc, lbs, ubs, steps);

   SmallVector<OpFoldResult> normalizedLbs(normalizedUbs.size(),

                                           rewriter.getIndexAttr(0));

   SmallVector<OpFoldResult> normalizedSteps(normalizedUbs.size(),

                                             rewriter.getIndexAttr(1));


   auto normalizedForallOp = rewriter.create<scf::ForallOp>(

       loc, normalizedLbs, normalizedUbs, normalizedSteps, loop.getOutputs(),

       loop.getMapping(), [](OpBuilder &, Location, ValueRange) {});


   auto normalizedLoopIvs = normalizedForallOp.getInductionVars();

   OpBuilder::InsertionGuard g(rewriter);

   Block *normalizedLoopBlock = normalizedForallOp.getBody();

   rewriter.setInsertionPointToStart(normalizedLoopBlock);


   SmallVector<Value> argValues =

       denormalizeIndVar(rewriter, loc, normalizedLoopIvs, lbs, steps);

   argValues.append(normalizedForallOp.getRegionIterArgs().begin(),

                    normalizedForallOp.getRegionIterArgs().end());

   Block *origLoopBlock = loop.getBody();

   rewriter.mergeBlocks(origLoopBlock, normalizedLoopBlock, argValues);


   rewriter.replaceOp(loop, normalizedForallOp);

   return normalizedForallOp;

 }


 DiagnosedSilenceableFailure transform::tileToForallOpImpl(

     RewriterBase &rewriter, transform::TransformState &state,

     TransformOpInterface transformOp, Operation *target,

     ArrayRef<OpFoldResult> mixedNumThreads,

     ArrayRef<OpFoldResult> mixedTileSizes, std::optional<ArrayAttr> mapping,

     scf::SCFTilingResult &tilingResult) {

   // Transform all targets one by one.

   auto tileableOp = dyn_cast<TilingInterface>(target);

   if (!tileableOp) {

     DiagnosedSilenceableFailure diag =

         transformOp.emitSilenceableError()

         << "only TilingInterface ops are supported";

     diag.attachNote(target->getLoc()) << "target op";

     return diag;

   }

   rewriter.setInsertionPoint(tileableOp);

   scf::SCFTilingOptions options;

   options.setLoopType(scf::SCFTilingOptions::LoopType::ForallOp);

   if (!mixedNumThreads.empty()) {

     options.setNumThreads(mixedNumThreads);

   } else {

     options.setTileSizes(mixedTileSizes);

   }

   if (mapping) {

     options.setMapping(mapping.value().getValue());

   }

   FailureOr<scf::SCFTilingResult> maybeTilingResult =

       scf::tileUsingSCF(rewriter, tileableOp, options);


   if (failed(maybeTilingResult))

     return transformOp.emitDefaultSilenceableFailure(tileableOp);


   rewriter.replaceOp(tileableOp, maybeTilingResult->replacements);


   tilingResult = *maybeTilingResult;


   if (mixedNumThreads.empty()) {

     auto generatedForallOp = cast<scf::ForallOp>(tilingResult.loops.front());

     OpBuilder::InsertionGuard g(rewriter);

     rewriter.setInsertionPoint(generatedForallOp);

     scf::ForallOp normalizedForallOp =

         normalizeForallLoopOp(rewriter, generatedForallOp);

     tilingResult.loops.front() = normalizedForallOp;

   }


   return DiagnosedSilenceableFailure::success();

 }


 DiagnosedSilenceableFailure transform::TileUsingForallOp::apply(

     transform::TransformRewriter &rewriter,

     transform::TransformResults &transformResults,

     transform::TransformState &state) {

   auto transformOp = cast<TransformOpInterface>(getOperation());


   // Result payload ops.

   SmallVector<Operation *> tileOps;

   SmallVector<Operation *> tiledOps;


   // Unpack handles.

   SmallVector<OpFoldResult> mixedNumThreads;

   DiagnosedSilenceableFailure status =

       getPackedNumThreads()

           ? unpackSingleIndexResultPayloadOperations(

                 state, transformOp, mixedNumThreads, getPackedNumThreads())

           : unpackSingleIndexResultPayloadOperations(

                 state, transformOp, mixedNumThreads, getMixedNumThreads());

   if (!status.succeeded())

     return status;

   SmallVector<OpFoldResult> mixedTileSizes;

   status = getPackedTileSizes()

                ? unpackSingleIndexResultPayloadOperations(

                      state, transformOp, mixedTileSizes, getPackedTileSizes())

                : unpackSingleIndexResultPayloadOperations(

                      state, transformOp, mixedTileSizes, getMixedTileSizes());

   if (!status.succeeded())

     return status;


   for (Operation *target : state.getPayloadOps(getTarget())) {

     scf::SCFTilingResult tilingResult;

     DiagnosedSilenceableFailure diag = tileToForallOpImpl(

         rewriter, state, transformOp, target, mixedNumThreads, mixedTileSizes,

         getMapping(), tilingResult);

     if (!diag.succeeded())

       return diag;

     tileOps.push_back(tilingResult.loops.front());

     tiledOps.append(tilingResult.tiledOps);

   }


   transformResults.set(cast<OpResult>(getForallOp()), tileOps);

   transformResults.set(cast<OpResult>(getTiledOp()), tiledOps);


   return DiagnosedSilenceableFailure::success();

 }


 void transform::TileUsingForallOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   consumesHandle(getTargetMutable(), effects);

   onlyReadsHandle(getTileSizesMutable(), effects);

   onlyReadsHandle(getNumThreadsMutable(), effects);

   onlyReadsHandle(getPackedNumThreadsMutable(), effects);

   onlyReadsHandle(getPackedTileSizesMutable(), effects);

   producesHandle(getOperation()->getOpResults(), effects);

   modifiesPayload(effects);

 }


 SmallVector<OpFoldResult> TileUsingForallOp::getMixedNumThreads() {

   Builder b(getContext());

   return getMixedValues(getStaticNumThreads(), getNumThreads(), b);

 }


 SmallVector<OpFoldResult> TileUsingForallOp::getMixedTileSizes() {

   Builder b(getContext());

   return getMixedValues(getStaticTileSizes(), getTileSizes(), b);

 }


 LogicalResult TileUsingForallOp::verify() {

   int numThreadsSpec = static_cast<int>(!getMixedNumThreads().empty()) +

                        static_cast<int>(getPackedNumThreads() != Value());

   if (numThreadsSpec > 1)

     return emitOpError(

         "num_threads and packed_num_threads are mutually exclusive");

   int tileSizesSpec = static_cast<int>(!getMixedTileSizes().empty()) +

                       static_cast<int>(getPackedTileSizes() != Value());

   if (tileSizesSpec > 1)

     return emitOpError(

         "tile_sizes and packed_tile_sizes are mutually exclusive");

   if (numThreadsSpec == 0 && tileSizesSpec == 0)

     return emitOpError("either (packed_)num_threads or (packed_)tile_sizes "

                        "must be specified");

   return success();

 }


 //===----------------------------------------------------------------------===//

 // VectorizeChildrenAndApplyPatternsOp

 //===----------------------------------------------------------------------===//


 void transform::VectorizeChildrenAndApplyPatternsOp::build(

     OpBuilder &builder, OperationState &result, Value target,

     bool vectorizePadding, bool vectorizeExtract, bool flatten1DDepthwiseConv) {

   result.addOperands(target);

   if (vectorizePadding) {

     result.addAttribute(

         VectorizeChildrenAndApplyPatternsOp::getVectorizePaddingAttrName(

             result.name),

         builder.getUnitAttr());

   }

   if (vectorizeExtract) {

     result.addAttribute(

         VectorizeChildrenAndApplyPatternsOp::getVectorizeNdExtractAttrName(

             result.name),

         builder.getUnitAttr());

   }

   if (flatten1DDepthwiseConv) {

     result.addAttribute(

         VectorizeChildrenAndApplyPatternsOp::getFlatten_1dDepthwiseConvAttrName(

             result.name),

         builder.getUnitAttr());

   }

   result.addTypes(transform::AnyOpType::get(builder.getContext()));

 }


 namespace {

 /// This is an helper only to call vectorize via a pattern inside of

 /// VectorizeChildrenAndApplyPatternsOp::applyToOne.

 struct VectorizationPattern : public RewritePattern {

   explicit VectorizationPattern(MLIRContext *context,

                                 bool vectorizeExtract = false,

                                 bool flattenConv = false)

       : RewritePattern(MatchAnyOpTypeTag(), /*benefit=*/1, context),

         vectorizeNDExtract(vectorizeExtract),

         flatten1DDepthwiseConv(flattenConv) {}

   LogicalResult matchAndRewrite(Operation *op,

                                 PatternRewriter &rewriter) const override {

     if (!linalg::hasVectorizationImpl(op))

       return rewriter.notifyMatchFailure(op,

                                          "Unsupported Op, cannot vectorize");

     return vectorize(rewriter, op, /*inputVectorSizes=*/{},

                      /*inputScalableVecDims=*/{}, vectorizeNDExtract,

                      flatten1DDepthwiseConv);

   }


 private:

   /// Controls whether to vectorize `tensor.extract` when the input tensor is

   /// rank >= 2.

   bool vectorizeNDExtract = false;

   /// Controls whether to "flatten" the channel dimension when vectorising 1D

   /// depthwise convolutions. This should lead to bette vectorization for

   /// tensors with a low number of channel dimensions.

   bool flatten1DDepthwiseConv = false;

 };

 } // namespace


 DiagnosedSilenceableFailure

 transform::VectorizeChildrenAndApplyPatternsOp::applyToOne(

     transform::TransformRewriter &rewriter, Operation *target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   if (!target->hasTrait<OpTrait::IsIsolatedFromAbove>()) {

     auto diag = this->emitOpError("requires isolated-from-above targets");

     diag.attachNote(target->getLoc()) << "non-isolated target";

     return DiagnosedSilenceableFailure::definiteFailure();

   }


   MLIRContext *ctx = getContext();

   RewritePatternSet patterns(ctx);

   patterns.add<VectorizationPattern>(ctx, getVectorizeNdExtract(),

                                      getFlatten_1dDepthwiseConv());


   if (!getDisableTransferPermutationMapLoweringPatterns())

     vector::populateVectorTransferPermutationMapLoweringPatterns(patterns);


   if (!getDisableMultiReductionToContractPatterns())

     vector::populateVectorReductionToContractPatterns(patterns);


   vector::populateSinkVectorOpsPatterns(patterns);


   patterns.add<linalg::LinalgCopyVTRForwardingPattern,

                linalg::LinalgCopyVTWForwardingPattern>(ctx,

                                                        /*benefit=*/2);

   vector::TransferReadOp::getCanonicalizationPatterns(patterns, ctx);

   vector::TransferWriteOp::getCanonicalizationPatterns(patterns, ctx);

   tensor::populateFoldTensorSubsetIntoVectorTransferPatterns(patterns);


   patterns.add<CopyVectorizationPattern>(ctx);


   // Add misc. vectorization patterns (e.g. for tensor.insert_slice)

   linalg::populateInsertSliceVectorizationPatterns(patterns);


   if (getVectorizePadding()) {

     linalg::populatePadOpVectorizationPatterns(patterns);

     // This creates an alternative path for lowering tensor.pad - by

     // decomposing it into e.g. linalg.fill.

     linalg::populateDecomposePadPatterns(patterns);

   }

   vector::populateVectorStepLoweringPatterns(patterns);


   TrackingListener listener(state, *this);

   GreedyRewriteConfig config;

   config.listener = &listener;

   if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns), config)))

     return emitDefaultDefiniteFailure(target);


   results.push_back(target);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // VectorizeOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::VectorizeOp::apply(

     transform::TransformRewriter &rewriter,

     mlir::transform::TransformResults &transformResults,

     mlir::transform::TransformState &state) {

   auto targets = state.getPayloadOps(getTarget());

   if (std::empty(targets))

     return DiagnosedSilenceableFailure::success();

   auto transformOp = cast<TransformOpInterface>(getOperation());

   SmallVector<int64_t> vectorSizes;

   DiagnosedSilenceableFailure status = reifyMixedParamAndHandleResults(

       state, transformOp, getMixedVectorSizes(), vectorSizes);

   if (!status.succeeded())

     return status;


   // TODO: Check that the correct number of vectorSizes was provided.

   for (Operation *target : targets) {

     if (!linalg::hasVectorizationImpl(target)) {

       return mlir::emitSilenceableFailure(target->getLoc())

              << "Unsupported Op, cannot vectorize";

     }


     if (failed(linalg::vectorize(rewriter, target, vectorSizes,

                                  getScalableSizes(),

                                  getVectorizeNdExtract().value_or(false)))) {

       return mlir::emitSilenceableFailure(target->getLoc())

              << "Attempted to vectorize, but failed";

     }

   }


   return DiagnosedSilenceableFailure::success();

 }


 void transform::VectorizeOp::getEffects(

     SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {

   consumesHandle(getTargetMutable(), effects);

   onlyReadsHandle(getVectorSizesMutable(), effects);

   modifiesPayload(effects);

 }


 SmallVector<OpFoldResult> VectorizeOp::getMixedVectorSizes() {

   OpBuilder b(getContext());

   return getMixedValues(getStaticVectorSizes(), getVectorSizes(), b);

 }


 LogicalResult transform::VectorizeOp::verify() {

   if (getStaticVectorSizes().size() != getScalableSizes().size())

     return emitOpError("expected same number of vector sizes (")

            << getStaticVectorSizes().size() << ") and scalable sizes ("

            << getScalableSizes().size() << ")";

   return success();

 }


 //===----------------------------------------------------------------------===//

 // HoistRedundantVectorTransfersOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::HoistRedundantVectorTransfersOp::applyToOne(

     transform::TransformRewriter &rewriter, func::FuncOp target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   // WARNING: This hoisting does not model parallelism and is generally

   // incorrect when used on distributed loops with memref semantics!

   // TODO: obsolete and should be retired.

   linalg::hoistRedundantVectorTransfers(target, getVerifyNonZeroTrip());

   results.push_back(target);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // HoistRedundantVectorBroadcastsOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure

 transform::HoistRedundantVectorBroadcastsOp::applyToOne(

     transform::TransformRewriter &rewriter, mlir::Operation *target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   linalg::hoistRedundantVectorBroadcasts(rewriter, target);

   results.push_back(target);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // ConvertConv2DToImg2ColOp.

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::ConvertConv2DToImg2ColOp::applyToOne(

     transform::TransformRewriter &rewriter, linalg::LinalgOp target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   auto maybeTransformed =

       TypeSwitch<Operation *, FailureOr<std::pair<Operation *, Operation *>>>(

           target)

           .Case([&](linalg::Conv2DNhwcHwcfOp op) {

             return rewriteInIm2Col(rewriter, op);

           })

           .Case([&](linalg::Conv2DNhwcFhwcOp op) {

             return rewriteInIm2Col(rewriter, op);

           })

           .Case([&](linalg::DepthwiseConv2DNhwcHwcOp op) {

             return rewriteInIm2Col(rewriter, op);

           })

           .Case([&](linalg::Conv2DNchwFchwOp op) {

             return rewriteInIm2Col(rewriter, op);

           })

           .Default([&](Operation *op) {

             return rewriter.notifyMatchFailure(op, "not supported");

           });

   if (failed(maybeTransformed))

     return emitDefaultSilenceableFailure(target);

   // Handle to the operation producing the img2col tensor.

   results.push_back(maybeTransformed->first);

   // Handle to the operation that replaces the original convolution.

   results.push_back(maybeTransformed->second);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // FlattenElementwiseLinalgOp.

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::FlattenElementwiseLinalgOp::applyToOne(

     transform::TransformRewriter &rewriter, linalg::LinalgOp target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   if (!isElementwise(target))

     return mlir::emitSilenceableFailure(target->getLoc())

            << "only elementwise flattening is supported";


   // If rank <= 1, do nothing

   if (target.getNumLoops() <= 1) {

     results.push_back(target);

     return DiagnosedSilenceableFailure::success();

   }


   // Attempt to flatten all dims to one.

   ReassociationIndices reassociation(target.getNumLoops());

   std::iota(reassociation.begin(), reassociation.end(), 0);

   auto maybeFlattened =

       collapseOpIterationDims(target, reassociation, rewriter);

   if (failed(maybeFlattened))

     return mlir::emitSilenceableFailure(target->getLoc())

            << "attempted to flatten, but failed";

   results.push_back(maybeFlattened->collapsedOp);

   rewriter.replaceOp(target, maybeFlattened->results);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // TransposeConv2DOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::TransposeConv2DOp::applyToOne(

     transform::TransformRewriter &rewriter, linalg::LinalgOp target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   auto maybeTransformed =

       TypeSwitch<Operation *, FailureOr<Operation *>>(target)

           .Case([&](linalg::Conv2DNhwcFhwcOp op) {

             return transposeConv2D(rewriter, op);

           })

           .Case([&](linalg::Conv2DNhwcFhwcQOp op) {

             return transposeConv2D(rewriter, op);

           })

           .Default([&](Operation *op) {

             return rewriter.notifyMatchFailure(op, "not supported");

           });

   if (failed(maybeTransformed))

     return emitDefaultSilenceableFailure(target);

   // Handle to the new Conv2D operation with transposed filters

   results.push_back(*maybeTransformed);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // TransposeMatmulOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::TransposeMatmulOp::applyToOne(

     transform::TransformRewriter &rewriter, linalg::LinalgOp target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   bool transposeLHS = getInputToTranspose() == TransposeMatmulInput::lhs;

   auto maybeTransformed =

       TypeSwitch<Operation *, FailureOr<Operation *>>(target)

           .Case([&](linalg::MatmulOp op) {

             return transposeMatmul(rewriter, op, transposeLHS);

           })

           .Case([&](linalg::BatchMatmulOp op) {

             return transposeBatchMatmul(rewriter, op, transposeLHS);

           })

           .Default([&](Operation *op) { return failure(); });

   if (failed(maybeTransformed))

     return emitSilenceableFailure(target->getLoc()) << "not supported";

   // Handle to the new Matmul operation with transposed filters

   results.push_back(*maybeTransformed);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // InsertSliceToCopyOp

 //===----------------------------------------------------------------------===//

 template <typename OpTy>

 DiagnosedSilenceableFailure doit(RewriterBase &rewriter, OpTy target,

                                  transform::ApplyToEachResultList &results,

                                  transform::TransformState &state) {

   static_assert(llvm::is_one_of<OpTy, tensor::InsertSliceOp,

                                 tensor::ParallelInsertSliceOp>() &&

                 "wrong op type");


   if (auto copySource =

           target.getSource().template getDefiningOp<linalg::CopyOp>()) {

     results.push_back(copySource);

     return DiagnosedSilenceableFailure::success();

   }


   // If we are inside an InParallel region, temporarily set the insertion point

   // outside: only tensor.parallel_insert_slice ops are allowed in there.

   if constexpr (std::is_same_v<OpTy, tensor::ParallelInsertSliceOp>) {

     rewriter.setInsertionPoint(

         target->template getParentOfType<scf::InParallelOp>());

   }


   Value extracted = rewriter.create<tensor::ExtractSliceOp>(

       target.getLoc(), target.getDest(), target.getMixedOffsets(),

       target.getMixedSizes(), target.getMixedStrides());

   Value copied = rewriter

                      .create<linalg::CopyOp>(target.getLoc(),

                                              target.getSource(), extracted)

                      .getResult(0);

   // Reset the insertion point.

   rewriter.setInsertionPoint(target);

   rewriter.replaceOpWithNewOp<OpTy>(

       target, copied, target.getDest(), target.getMixedOffsets(),

       target.getMixedSizes(), target.getMixedStrides());


   results.push_back(copied.getDefiningOp());

   return DiagnosedSilenceableFailure::success();

 }


 DiagnosedSilenceableFailure transform::InsertSliceToCopyOp::applyToOne(

     transform::TransformRewriter &rewriter, Operation *targetOp,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {


   rewriter.setInsertionPoint(targetOp);

   if (auto target = dyn_cast<tensor::InsertSliceOp>(targetOp))

     return doit(rewriter, target, results, state);

   if (auto target = dyn_cast<tensor::ParallelInsertSliceOp>(targetOp))

     return doit(rewriter, target, results, state);


   DiagnosedSilenceableFailure diag =

       emitSilenceableError()

       << "only InsertSliceOp and ParallelInsertSliceOp ops are supported";

   diag.attachNote(targetOp->getLoc()) << "target op";

   return diag;

 }


 //===----------------------------------------------------------------------===//

 // MapCopyToThreadsOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::MapCopyToThreadsOp::applyToOne(

     transform::TransformRewriter &rewriter, Operation *target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   // Check if the op is supported.

   if (!isa<linalg::CopyOp, tensor::PadOp>(target)) {

     DiagnosedSilenceableFailure diag =

         emitSilenceableError()

         << "only linalg.copy and tensor.pad target ops are supported";

     diag.attachNote(target->getLoc()) << "target op";

     return diag;

   }

   assert(target->getNumResults() == 1 && "expected single result");

   auto resultShapedType = cast<ShapedType>(target->getResult(0).getType());

   if (!resultShapedType.hasStaticShape()) {

     DiagnosedSilenceableFailure diag =

         emitSilenceableError()

         << "only statically sized ops of rank <= 3 are supported";

     diag.attachNote(target->getLoc()) << "target op";

     return diag;

   }


   // Conservatively set the minimum viable desired bitwidth alignment.

   int64_t desiredBitAlignment = getDesiredBitAlignment();

   int64_t eltBitwidth =

       resultShapedType.getElementType().getIntOrFloatBitWidth();

   if (desiredBitAlignment % eltBitwidth != 0) {

     desiredBitAlignment = eltBitwidth;

   }


   gpu::CopyMappingInfo mapping(

       /*ctx=*/getContext(),

       /*totalNumThreads=*/getTotalNumThreads(),

       /*alignment=*/desiredBitAlignment,

       /*sizes=*/resultShapedType.getShape(),

       /*favorPredication=*/false,

       /*elementalBitwidth=*/

       resultShapedType.getElementType().getIntOrFloatBitWidth());

   if (mapping.status == gpu::CopyMappingInfo::Status::Invalid) {

     DiagnosedSilenceableFailure diag =

         emitSilenceableError()

         << "too few threads to map copy op to threads on the most minor "

            "dimension, given alignment and vector size constraints, try "

            "smaller tile size of mapping to more threads";

     diag.attachNote(target->getLoc()) << "target op";

     return diag;

   }


   // OpBuilder only used to compute attributes.

   OpBuilder b(getContext());

   scf::SCFTilingResult tilingResult;

   DiagnosedSilenceableFailure diag = tileToForallOpImpl(

       /*rewriter=*/rewriter,

       /*state=*/state,

       /*transformOp=*/*this,

       /*target=*/target,

       /*mixedNumThreads=*/getMixedValues(mapping.numThreads, {}, b),

       /*mixedTileSizes=*/ArrayRef<OpFoldResult>{},

       /*mapping=*/b.getArrayAttr(mapping.threadMapping),

       /*tilingResult=*/tilingResult);

   if (!diag.succeeded())

     return diag;


   results.push_back(tilingResult.loops.front());

   for (auto op : tilingResult.tiledOps)

     results.push_back(op);

   return DiagnosedSilenceableFailure::success();

 }


 //===----------------------------------------------------------------------===//

 // WinogradConv2DOp

 //===----------------------------------------------------------------------===//


 DiagnosedSilenceableFailure transform::WinogradConv2DOp::applyToOne(

     transform::TransformRewriter &rewriter, linalg::LinalgOp target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   FailureOr<Operation *> maybeTransformed = failure();

   bool supported = TypeSwitch<Operation *, bool>(target)

                        .Case([&](linalg::Conv2DNhwcFhwcOp op) {

                          maybeTransformed =

                              winogradConv2D(rewriter, op, getM(), getR());

                          return true;

                        })

                        .Default([&](Operation *op) { return false; });


   if (!supported) {

     return emitSilenceableError()

            << "this operation is not supported to convert to Winograd Conv2D";

   }


   if (supported && failed(maybeTransformed)) {

     return emitSilenceableError() << "apply Winograd Conv2D failed";

   }


   results.push_back(*maybeTransformed);

   return DiagnosedSilenceableFailure::success();

 }


 DiagnosedSilenceableFailure transform::DecomposeWinogradOp::applyToOne(

     transform::TransformRewriter &rewriter, Operation *target,

     transform::ApplyToEachResultList &results,

     transform::TransformState &state) {

   rewriter.setInsertionPoint(target);

   FailureOr<Operation *> maybeTransformed = failure();

   bool supported =

       TypeSwitch<Operation *, bool>(target)

           .Case([&](linalg::WinogradFilterTransformOp op) {

             maybeTransformed = decomposeWinogradFilterTransformOp(rewriter, op);

             return true;

           })

           .Case([&](linalg::WinogradInputTransformOp op) {

             maybeTransformed = decomposeWinogradInputTransformOp(rewriter, op);

             return true;

           })

           .Case([&](linalg::WinogradOutputTransformOp op) {

             maybeTransformed = decomposeWinogradOutputTransformOp(rewriter, op);

             return true;

           })

           .Default([&](Operation *op) { return false; });


   if (!supported) {

     DiagnosedSilenceableFailure diag =

         emitSilenceableError()

         << "this operation is not supported to decompose into other operations";

     diag.attachNote(target->getLoc()) << "target op";

     return diag;

   }


   if (supported && failed(maybeTransformed)) {

     DiagnosedSilenceableFailure diag =

         emitSilenceableError() << "decompose Winograd operations failed";

     diag.attachNote(target->getLoc()) << "target op";

     return diag;

   }


   results.push_back(*maybeTransformed);

   return DiagnosedSilenceableFailure::success();

 }


 #include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOpsEnums.cpp.inc"


 #define GET_OP_CLASSES

 #include "mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp.inc"

AffineOps.h

AsmParser.h

Bufferization.h

BuiltinTypeInterfaces.h

Utils.h

Utils.h

Utils.h

Utils.h

GPUDialect.h

GPUHeuristics.h

GreedyPatternRewriteDriver.h

Hoisting.h

getContext
static MLIRContext * getContext(OpFoldResult val)
Definition: IndexingUtils.cpp:295

IndexingUtils.h

doit
DiagnosedSilenceableFailure doit(RewriterBase &rewriter, OpTy target, transform::ApplyToEachResultList &results, transform::TransformState &state)
Definition: LinalgTransformOps.cpp:3730

cloneAndFuseFirstUse
static Operation * cloneAndFuseFirstUse(RewriterBase &rewriter, Diagnostic &diag, Operation *producerOp, Operation *containingOp)
Definition: LinalgTransformOps.cpp:903

DOWNSCALE
#define DOWNSCALE(trans)

isValidPackingPermutation
bool isValidPackingPermutation(RelayoutOpTy op, ArrayRef< int64_t > permutation, OuterOrInnerPerm outerOrInnerPerm=OuterOrInnerPerm::Outer)
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
Definition: LinalgTransformOps.cpp:1619

reifyMixedParamAndHandleResults
static DiagnosedSilenceableFailure reifyMixedParamAndHandleResults(TransformState &state, TransformOpInterface &transformOp, ArrayRef< OpFoldResult > mixedResults, SmallVectorImpl< int64_t > &reified)
When possible, converts each OpFoldResult in mixedResult to an integer if the value can be statically...
Definition: LinalgTransformOps.cpp:179

unpackSingleIndexResultPayloadOperations
static DiagnosedSilenceableFailure unpackSingleIndexResultPayloadOperations(transform::TransformState &state, TransformOpInterface transformOp, SmallVector< OpFoldResult > &result, ArrayRef< OpFoldResult > ofrs)
Assuming that ofr is an index attr or a param of index type or a transform dialect handle mapped to e...
Definition: LinalgTransformOps.cpp:93

printContinuousTileSizeTypes
static void printContinuousTileSizeTypes(OpAsmPrinter &printer, Operation *op, Type targetType, Type tile_sizes, Type)
Definition: LinalgTransformOps.cpp:2822

normalizeForallLoopOp
static scf::ForallOp normalizeForallLoopOp(RewriterBase &rewriter, scf::ForallOp loop)
Given a scf.forall loop return a loop op with the loop bounds normalized.
Definition: LinalgTransformOps.cpp:3230

denormalizeIndVar
static SmallVector< Value > denormalizeIndVar(RewriterBase &rewriter, Location loc, ValueRange ivs, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > steps)
When a loop is normalized, the uses of the induction variable within the loop need to replaced with o...
Definition: LinalgTransformOps.cpp:3203

DOWNSCALE_NORMAL
#define DOWNSCALE_NORMAL(a, b)

tryApply
static FailureOr< LinalgOp > tryApply(Operation *operation, Args &&...args)
Attempts to apply the pattern specified as template argument to the given operation.
Definition: LinalgTransformOps.cpp:65

printMultitileSizesTypes
static void printMultitileSizesTypes(OpAsmPrinter &printer, Operation *op, Type targetType, Type lowSizeType, Type, Type)
Definition: LinalgTransformOps.cpp:1339

replaceForAllWithNewSignature
static Operation * replaceForAllWithNewSignature(RewriterBase &rewriter, Diagnostic &diag, Operation *producerOp, Operation *containingOp, TilingResult &tileAndFuseResult, int64_t resultNumber, SmallVector< OpFoldResult > &offsets, SmallVector< OpFoldResult > &sizes)
Add new operands to the forall op for users of the producerOp that are dominated by the containing sc...
Definition: LinalgTransformOps.cpp:636

parseContinuousTileSizeTypes
static ParseResult parseContinuousTileSizeTypes(OpAsmParser &parser, Type &targetType, Type &tileSizesType, Type &chunkSizesType)
Definition: LinalgTransformOps.cpp:2828

tileAndFuseFirstExtractUseThroughContainingOpBlockArgument
static SmallVector< Operation * > tileAndFuseFirstExtractUseThroughContainingOpBlockArgument(RewriterBase &rewriter, Diagnostic &diag, Operation *producerOp, Operation *containingOp)
First, find the first "scf::ForallOp" user of producerOp and ensure it is exactly the containingOp,...
Definition: LinalgTransformOps.cpp:801

parseMultitileSizesTypes
static ParseResult parseMultitileSizesTypes(OpAsmParser &parser, Type &targetType, Type &lowSizeType, Type &highSizeType, Type &splitPointType)
Definition: LinalgTransformOps.cpp:1345

normalizeUpperBounds
static SmallVector< OpFoldResult > normalizeUpperBounds(RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > steps)
Given lbs, ubs and steps of loops, return (for each loop), the normalized upper bound.
Definition: LinalgTransformOps.cpp:3186

DBGS
#define DBGS()
Definition: LinalgTransformOps.cpp:55

applyTilingToAll
static LogicalResult applyTilingToAll(RewriterBase &rewriter, Operation *transformOp, Range &&payloadOps, unsigned numLoops, transform::TransformResults &transformResults, function_ref< FailureOr< scf::SCFTileAndFuseResult >(TilingInterface)> applyFn)
Apply a tiling transformation to all payload ops and store both the tiled operation as well as the cr...
Definition: LinalgTransformOps.cpp:518

tileAndFuseFirstExtractUse
static std::tuple< SmallVector< Operation * >, Operation * > tileAndFuseFirstExtractUse(RewriterBase &rewriter, Diagnostic &diag, Operation *producerOp, Operation *containingOp)
Find the first "extract" user of producerOp and tile it right before its use.
Definition: LinalgTransformOps.cpp:719

LinalgTransformOps.h

LoweringPatterns.h

diag
static std::string diag(const llvm::Value &value)
Definition: ModuleImport.cpp:54

OneShotAnalysis.h

options
static llvm::ManagedStatic< PassManagerOptions > options
Definition: PassManagerOptions.cpp:89

PatternMatch.h

print
static void print(spirv::VerCapExtAttr triple, DialectAsmPrinter &printer)
Definition: SPIRVAttributes.cpp:623

getDynamicSizes
static void getDynamicSizes(RankedTensorType tp, ValueRange sizes, SmallVectorImpl< Value > &dynSizes)
Collects the dynamic dimension sizes for tp with the assumption that sizes are the dimension sizes fo...
Definition: SparseTensorRewriting.cpp:146

StaticValueUtils.h

Syntax.h

TileUsingInterface.h

TilingInterface.h

TransformDialect.h

TransformInterfaces.h

TransformOps.h

TransformTypes.h

TypeID.h

TypeUtilities.h

VectorRewritePatterns.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::DenseSet
Definition: LLVM.h:59

llvm::SetVector
Definition: LLVM.h:66

llvm::SmallVectorImpl
Definition: LLVM.h:74

llvm::SmallVector
Definition: LLVM.h:72

llvm::StringSet
Definition: LLVM.h:76

llvm::TypeSwitch
Definition: LLVM.h:82

llvm::function_ref
Definition: LLVM.h:90

mlir::AffineExpr
Base type for affine expression.
Definition: AffineExpr.h:68

mlir::AffineMap
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46

mlir::AsmParser::getBuilder
virtual Builder & getBuilder() const =0
Return a builder which provides useful access to MLIRContext, global objects like types and attribute...

mlir::AsmParser::parseOptionalAttrDict
virtual ParseResult parseOptionalAttrDict(NamedAttrList &result)=0
Parse a named dictionary into 'result' if it is present.

mlir::AsmParser::emitError
virtual InFlightDiagnostic emitError(SMLoc loc, const Twine &message={})=0
Emit a diagnostic at the specified location and return failure.

mlir::AsmParser::parseInteger
ParseResult parseInteger(IntT &result)
Parse an integer value from the stream.
Definition: OpImplementation.h:715

mlir::AsmParser::parseColonType
virtual ParseResult parseColonType(Type &result)=0
Parse a colon followed by a type.

mlir::AsmParser::getCurrentLocation
virtual SMLoc getCurrentLocation()=0
Get the location of the next token and store it into the argument.

mlir::AsmParser::parseType
virtual ParseResult parseType(Type &result)=0
Parse a type.

mlir::AsmParser::parseComma
virtual ParseResult parseComma()=0
Parse a , token.

mlir::AsmParser::parseKeyword
ParseResult parseKeyword(StringRef keyword)
Parse a given keyword.
Definition: OpImplementation.h:889

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::BlockArgument
This class represents an argument of a Block.
Definition: Value.h:319

mlir::Block
Block represents an ordered list of Operations.
Definition: Block.h:33

mlir::Builder
This class is a general helper class for creating context-global objects like types,...
Definition: Builders.h:51

mlir::Builder::getIndexAttr
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:148

mlir::Builder::getUnitAttr
UnitAttr getUnitAttr()
Definition: Builders.cpp:138

mlir::Builder::getIntegerAttr
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:268

mlir::Builder::getDenseI64ArrayAttr
DenseI64ArrayAttr getDenseI64ArrayAttr(ArrayRef< int64_t > values)
Definition: Builders.cpp:207

mlir::Builder::getAffineSymbolExpr
AffineExpr getAffineSymbolExpr(unsigned position)
Definition: Builders.cpp:408

mlir::Builder::getI64IntegerAttr
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:152

mlir::Builder::getType
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
Definition: Builders.h:100

mlir::Builder::getStringAttr
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:302

mlir::Builder::getContext
MLIRContext * getContext() const
Definition: Builders.h:56

mlir::Builder::getArrayAttr
ArrayAttr getArrayAttr(ArrayRef< Attribute > value)
Definition: Builders.cpp:306

mlir::Builder::getI64ArrayAttr
ArrayAttr getI64ArrayAttr(ArrayRef< int64_t > values)
Definition: Builders.cpp:321

mlir::Builder::getIndexType
IndexType getIndexType()
Definition: Builders.cpp:95

mlir::Builder::getStrArrayAttr
ArrayAttr getStrArrayAttr(ArrayRef< StringRef > values)
Definition: Builders.cpp:346

mlir::DiagnosedSilenceableFailure
The result of a transform IR operation application.
Definition: DiagnosedSilenceableFailure.h:38

mlir::DiagnosedSilenceableFailure::success
static DiagnosedSilenceableFailure success()
Constructs a DiagnosedSilenceableFailure in the success state.
Definition: DiagnosedSilenceableFailure.h:48

mlir::DiagnosedSilenceableFailure::isDefiniteFailure
bool isDefiniteFailure() const
Returns true if this is a definite failure.
Definition: DiagnosedSilenceableFailure.h:80

mlir::DiagnosedSilenceableFailure::silenceableFailure
static DiagnosedSilenceableFailure silenceableFailure(Diagnostic &&diag)
Constructs a DiagnosedSilenceableFailure in the silenceable failure state, ready to emit the given di...
Definition: DiagnosedSilenceableFailure.h:61

mlir::DiagnosedSilenceableFailure::succeeded
bool succeeded() const
Returns true if this is a success.
Definition: DiagnosedSilenceableFailure.h:75

mlir::DiagnosedSilenceableFailure::definiteFailure
static DiagnosedSilenceableFailure definiteFailure()
Constructs a DiagnosedSilenceableFailure in the failure state.
Definition: DiagnosedSilenceableFailure.h:54

mlir::Diagnostic
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Definition: Diagnostics.h:155

mlir::DominanceInfo
A class for computing basic dominance information.
Definition: Dominance.h:140

mlir::DominanceInfo::dominates
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:160

mlir::GreedyRewriteConfig
This class allows control over how the GreedyPatternRewriteDriver works.
Definition: GreedyPatternRewriteDriver.h:43

mlir::GreedyRewriteConfig::listener
RewriterBase::Listener * listener
An optional listener that should be notified about IR modifications.
Definition: GreedyPatternRewriteDriver.h:93

mlir::IRMapping
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26

mlir::IRMapping::map
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition: IRMapping.h:30

mlir::IROperand::get
IRValueT get() const
Return the current value being used by this operand.
Definition: UseDefLists.h:160

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::NamedAttribute
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:207

mlir::OpAsmParser
The OpAsmParser has methods for interacting with the asm parser: parsing things from it,...
Definition: OpImplementation.h:1462

mlir::OpAsmParser::resolveOperand
virtual ParseResult resolveOperand(const UnresolvedOperand &operand, Type type, SmallVectorImpl< Value > &result)=0
Resolve an operand to an SSA value, emitting an error on failure.

mlir::OpAsmParser::parseOptionalOperand
virtual OptionalParseResult parseOptionalOperand(UnresolvedOperand &result, bool allowResultNumber=true)=0
Parse a single operand if present.

mlir::OpAsmParser::parseOperand
virtual ParseResult parseOperand(UnresolvedOperand &result, bool allowResultNumber=true)=0
Parse a single SSA value operand name along with a result number if allowResultNumber is true.

mlir::OpAsmPrinter
This is a pure-virtual base class that exposes the asmprinter hooks necessary to implement a custom p...
Definition: OpImplementation.h:412

mlir::OpAsmPrinter::printOptionalAttrDict
virtual void printOptionalAttrDict(ArrayRef< NamedAttribute > attrs, ArrayRef< StringRef > elidedAttrs={})=0
If the specified operation has attributes, print out an attribute dictionary with their values.

mlir::OpAsmPrinter::printFunctionalType
void printFunctionalType(Operation *op)
Print the complete type of an operation in functional form.
Definition: AsmPrinter.cpp:94

mlir::OpBuilder::InsertPoint
This class represents a saved insertion point.
Definition: Builders.h:336

mlir::OpBuilder::InsertPoint::isSet
bool isSet() const
Returns true if this insert point is set.
Definition: Builders.h:346

mlir::OpBuilder::InsertionGuard
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:357

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:216

mlir::OpBuilder::clone
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:588

mlir::OpBuilder::setListener
void setListener(Listener *newListener)
Sets the listener of this builder to the one provided.
Definition: Builders.h:325

mlir::OpBuilder::setInsertionPointToStart
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:440

mlir::OpBuilder::setInsertionPoint
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:407

mlir::OpBuilder::getListener
Listener * getListener() const
Returns the current listener of this builder, or nullptr if this builder doesn't have a listener.
Definition: Builders.h:329

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:497

mlir::OpBuilder::setInsertionPointAfter
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:421

mlir::OpFoldResult
This class represents a single result from folding an operation.
Definition: OpDefinition.h:268

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:267

mlir::OpOperand::getOperandNumber
unsigned getOperandNumber()
Return which operand this is in the OpOperand list of the Operation.
Definition: Value.cpp:216

mlir::OpResult
This is a value defined by a result of an operation.
Definition: Value.h:457

mlir::OpTrait::IsIsolatedFromAbove
This class provides the API for ops that are known to be isolated from above.
Definition: OpDefinition.h:1249

mlir::OperationName::getStringRef
StringRef getStringRef() const
Return the name of this operation. This always succeeds.
Definition: OperationSupport.h:475

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getOpResult
OpResult getOpResult(unsigned idx)
Definition: Operation.h:416

mlir::Operation::hasTrait
bool hasTrait()
Returns true if the operation was registered with a particular trait, e.g.
Definition: Operation.h:745

mlir::Operation::getAttr
Attribute getAttr(StringAttr name)
Return the specified attribute if present, null otherwise.
Definition: Operation.h:529

mlir::Operation::setOperand
void setOperand(unsigned idx, Value value)
Definition: Operation.h:346

mlir::Operation::hasAttr
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition: Operation.h:555

mlir::Operation::getResult
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:402

mlir::Operation::walk
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793

mlir::Operation::getContext
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216

mlir::Operation::getNumRegions
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:669

mlir::Operation::getLoc
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223

mlir::Operation::getNumOperands
unsigned getNumOperands()
Definition: Operation.h:341

mlir::Operation::emitError
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
Definition: Operation.cpp:268

mlir::Operation::getName
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119

mlir::Operation::getOperandTypes
operand_type_range getOperandTypes()
Definition: Operation.h:392

mlir::Operation::getResultTypes
result_type_range getResultTypes()
Definition: Operation.h:423

mlir::Operation::isAncestor
bool isAncestor(Operation *other)
Return true if this operation is an ancestor of the other operation.
Definition: Operation.h:263

mlir::Operation::getUsers
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:869

mlir::Operation::getOpResults
result_range getOpResults()
Definition: Operation.h:415

mlir::Operation::getResults
result_range getResults()
Definition: Operation.h:410

mlir::Operation::isProperAncestor
bool isProperAncestor(Operation *other)
Return true if this operation is a proper ancestor of the other operation.
Definition: Operation.cpp:219

mlir::Operation::emitOpError
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
Definition: Operation.cpp:671

mlir::Operation::getNumResults
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:399

mlir::OptionalParseResult
This class implements Optional functionality for ParseResult.
Definition: OpDefinition.h:39

mlir::OptionalParseResult::has_value
bool has_value() const
Returns true if we contain a valid ParseResult value.
Definition: OpDefinition.h:49

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:791

mlir::RewritePatternSet
Definition: PatternMatch.h:814

mlir::RewritePattern
RewritePattern is the common base class for all DAG to DAG replacements.
Definition: PatternMatch.h:246

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:400

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:724

mlir::RewriterBase::eraseBlock
virtual void eraseBlock(Block *block)
This method erases all operations in a block.
Definition: PatternMatch.cpp:233

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition: PatternMatch.cpp:133

mlir::RewriterBase::replaceAllUsesWith
void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
Definition: PatternMatch.h:644

mlir::RewriterBase::mergeBlocks
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
Definition: PatternMatch.cpp:339

mlir::RewriterBase::eraseOp
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Definition: PatternMatch.cpp:161

mlir::RewriterBase::replaceUsesWithIf
void replaceUsesWithIf(Value from, Value to, function_ref< bool(OpOperand &)> functor, bool *allUsesReplaced=nullptr)
Find uses of from and replace them with to if the functor returns true.
Definition: PatternMatch.cpp:262

mlir::RewriterBase::modifyOpInPlace
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:636

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:542

mlir::TypeRange
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Type::isIndex
bool isIndex() const
Definition: Types.cpp:64

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381

mlir::ValueTypeRange::front
Type front()
Return first type in the range.
Definition: TypeRange.h:148

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:129

mlir::Value::getUsers
user_range getUsers() const
Definition: Value.h:228

mlir::Value::getLoc
Location getLoc() const
Return the location of this value.
Definition: Value.cpp:26

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20

mlir::bufferization::OneShotAnalysisState
State for analysis-enabled bufferization.
Definition: OneShotAnalysis.h:62

mlir::detail::IROperandBase::getOwner
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:38

mlir::transform::ApplyToEachResultList
A list of results of applying a transform op with ApplyEachOpTrait to a single payload operation,...
Definition: TransformInterfaces.h:1398

mlir::transform::ApplyToEachResultList::assign
void assign(unsigned size, std::nullptr_t)
Sets the list of results to size null pointers.
Definition: TransformInterfaces.h:1404

mlir::transform::ApplyToEachResultList::reserve
void reserve(unsigned size)
Reserves space for size elements in the list.
Definition: TransformInterfaces.h:1436

mlir::transform::ApplyToEachResultList::size
size_t size() const
Returns the number of elements in the list.
Definition: TransformInterfaces.h:1445

mlir::transform::ApplyToEachResultList::push_back
void push_back(Operation *op)
Appends an element to the list.
Definition: TransformInterfaces.h:1431

mlir::transform::TrackingListener
A listener that updates a TransformState based on IR modifications.
Definition: TransformInterfaces.h:964

mlir::transform::TransformResults
Local mapping between values defined by a specific op implementing the TransformOpInterface and the p...
Definition: TransformInterfaces.h:815

mlir::transform::TransformResults::setValues
void setValues(OpResult handle, Range &&values)
Indicates that the result of the transform IR op at the given position corresponds to the given range...
Definition: TransformInterfaces.h:856

mlir::transform::TransformResults::setParams
void setParams(OpResult value, ArrayRef< TransformState::Param > params)
Indicates that the result of the transform IR op at the given position corresponds to the given list ...
Definition: TransformInterfaces.cpp:1097

mlir::transform::TransformResults::set
void set(OpResult value, Range &&ops)
Indicates that the result of the transform IR op at the given position corresponds to the given list ...
Definition: TransformInterfaces.h:824

mlir::transform::TransformRewriter
This is a special rewriter to be used in transform op implementations, providing additional helper fu...
Definition: TransformInterfaces.h:1097

mlir::transform::TransformRewriter::notifyPayloadOperationReplaced
LogicalResult notifyPayloadOperationReplaced(Operation *op, Operation *replacement)
Notify the transform dialect interpreter that the given op has been replaced with another op and that...
Definition: TransformInterfaces.cpp:1415

mlir::transform::TransformState
The state maintained across applications of various ops implementing the TransformOpInterface.
Definition: TransformInterfaces.h:173

Arith.h

Linalg.h

Transforms.h

Tensor.h

LLVM.h

mlir::affine::makeComposedFoldedMultiResultAffineApply
SmallVector< OpFoldResult > makeComposedFoldedMultiResultAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Variant of makeComposedFoldedAffineApply suitable for multi-result maps.
Definition: AffineOps.cpp:1241

mlir::affine::makeComposedAffineApply
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1144

mlir::affine::makeComposedFoldedAffineApply
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1194

mlir::bufferization::analyzeOp
LogicalResult analyzeOp(Operation *op, OneShotAnalysisState &state, BufferizationStatistics *statistics=nullptr)
Analyze op and its nested ops.
Definition: OneShotAnalysis.cpp:1320

mlir::detail::walk
void walk(Operation *op, function_ref< void(Region *)> callback, WalkOrder order)
Walk all of the regions, blocks, or operations nested under (and including) the given operation.
Definition: Visitors.h:136

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir::linalg::detail::buildPackingLoopNest
FailureOr< PackingResult > buildPackingLoopNest(RewriterBase &rewriter, tensor::PadOp opToHoist, scf::ForOp outermostEnclosingForOp, ArrayRef< int64_t > transposeVector)
Build the packing loop nest required to hoist opToHoist above outermostEnclosingForOp.
Definition: HoistPadding.cpp:722

mlir::linalg
Definition: LinalgToStandard.h:23

mlir::linalg::generalizeNamedOp
FailureOr< GenericOp > generalizeNamedOp(RewriterBase &rewriter, LinalgOp namedOp)
Create a GenericOp from the given named operation namedOp and replace namedOp.
Definition: Generalization.cpp:53

mlir::linalg::rewriteAsPaddedOp
LogicalResult rewriteAsPaddedOp(RewriterBase &rewriter, LinalgOp opToPad, const LinalgPaddingOptions &options, LinalgOp &paddedOp, SmallVector< Value > &replacements, SmallVector< tensor::PadOp > &padOps)
Pad the iterator dimensions paddingDimensions of all opToPad operands to a static bounding box.
Definition: Padding.cpp:153

mlir::linalg::rewriteInIm2Col
FailureOr< std::pair< Operation *, Operation * > > rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp)
Convert linalg.conv_2d_nhwc_hwcf into linalg.generic (for img2col packing) and linalg....
Definition: ConvertConv2DToImg2Col.cpp:79

mlir::linalg::hasVectorizationImpl
bool hasVectorizationImpl(Operation *)
Return true if there's dedicated logic in the Linalg Vectorizer to vectorize this Op,...
Definition: Vectorization.cpp:2150

mlir::linalg::lowerUnPack
FailureOr< LowerUnPackOpResult > lowerUnPack(RewriterBase &rewriter, tensor::UnPackOp unPackOp)
Rewrite pack as empty + transpose + reshape + extract_slice.
Definition: Transforms.cpp:354

mlir::linalg::decomposeWinogradFilterTransformOp
FailureOr< Operation * > decomposeWinogradFilterTransformOp(RewriterBase &rewriter, linalg::WinogradFilterTransformOp op)
Rewrite linalg.winograd_filter_transform.
Definition: WinogradConv2D.cpp:1191

mlir::linalg::allocateWorkgroupMemory
std::optional< Value > allocateWorkgroupMemory(OpBuilder &builder, memref::SubViewOp subview, ArrayRef< Value > sizeBounds, DataLayout &)
Allocate the subview in the GPU workgroup memory.
Definition: Promotion.cpp:470

mlir::linalg::bufferizeToAllocation
Value bufferizeToAllocation(RewriterBase &rewriter, const BufferizeToAllocationOptions &options, tensor::PadOp padOp, Attribute memorySpace={}, Operation *insertionPoint=nullptr)
Materialize a buffer allocation for the given tensor.pad op and lower the op to linalg....

mlir::linalg::hoistPaddingOnTensors
FailureOr< Value > hoistPaddingOnTensors(RewriterBase &rewriter, tensor::PadOp opToHoist, int64_t numLoops, ArrayRef< int64_t > transposeVector, tensor::PadOp &hoistedOp, SmallVectorImpl< TransposeOp > &transposeOps)
Mechanically hoist padding operations on tensors by numLoops into a new, generally larger tensor.
Definition: HoistPadding.cpp:938

mlir::linalg::specializeGenericOp
FailureOr< LinalgOp > specializeGenericOp(RewriterBase &rewriter, GenericOp genericOp)
Create a namedOp from the given GenericOp and replace the GenericOp.
Definition: Specialize.cpp:260

mlir::linalg::populatePadOpVectorizationPatterns
void populatePadOpVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit baseBenefit=1)
Populates patterns with patterns that vectorize tensor.pad.
Definition: Vectorization.cpp:2771

mlir::linalg::populateLinalgTilingCanonicalizationPatterns
void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns)
Definition: Tiling.cpp:860

mlir::linalg::deallocateGPUPrivateMemory
LogicalResult deallocateGPUPrivateMemory(OpBuilder &, Value)
In case of GPU private memory there is no need to deallocate since the memory is freed when going out...
Definition: Promotion.cpp:511

mlir::linalg::decomposeWinogradOutputTransformOp
FailureOr< Operation * > decomposeWinogradOutputTransformOp(RewriterBase &rewriter, linalg::WinogradOutputTransformOp op)
Rewrite linalg.winograd_output_transform.
Definition: WinogradConv2D.cpp:1203

mlir::linalg::allocateGPUPrivateMemory
std::optional< Value > allocateGPUPrivateMemory(OpBuilder &builder, memref::SubViewOp subview, ArrayRef< Value > sizeBounds, DataLayout &)
Allocate the subview in the GPU private memory.
Definition: Promotion.cpp:495

mlir::linalg::rewriteInDestinationPassingStyle
FailureOr< Operation * > rewriteInDestinationPassingStyle(RewriterBase &rewriter, tensor::FromElementsOp fromElementsOp)
Rewrite tensor.from_elements to linalg.generic.
Definition: ConvertToDestinationStyle.cpp:345

mlir::linalg::winogradConv2D
FailureOr< Operation * > winogradConv2D(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp op, int64_t m, int64_t r)
Convert linalg.conv_2d_nhwc_fhwc to Winograd Conv2D algorithm F(m x m, r x r).
Definition: WinogradConv2D.cpp:1184

mlir::linalg::transposeConv2D
FailureOr< Operation * > transposeConv2D(RewriterBase &rewriter, linalg::Conv2DNhwcFhwcOp op)
Convert linalg.conv_2d_nhwc_fhwc(_q) to linalg.conv_2d_nhwc_hwcf(_q) by materializing transpose.
Definition: TransposeConv2D.cpp:127

mlir::linalg::populateFoldUnitExtentDimsPatterns
void populateFoldUnitExtentDimsPatterns(RewritePatternSet &patterns, ControlDropUnitDims &options)
Patterns to fold unit-extent dimensions in operands/results of linalg ops on tensors via reassociativ...
Definition: DropUnitDims.cpp:799

mlir::linalg::copyToWorkgroupMemory
LogicalResult copyToWorkgroupMemory(OpBuilder &b, Value src, Value dst)
Create Memref copy operations and add gpu barrier guards before and after the copy operation to ensur...
Definition: Promotion.cpp:486

mlir::linalg::linalgOpAnchoredEmptyTensorEliminationStep
LogicalResult linalgOpAnchoredEmptyTensorEliminationStep(RewriterBase &rewriter, Operation *op, bufferization::OneShotAnalysisState &state)
Try to eliminate tensor::EmptyOps inside op that are anchored on a LinalgOp.
Definition: EliminateEmptyTensors.cpp:40

mlir::linalg::vectorize
LogicalResult vectorize(RewriterBase &rewriter, Operation *op, ArrayRef< int64_t > inputVectorSizes={}, ArrayRef< bool > inputScalableVecDims={}, bool vectorizeNDExtract=false, bool flatten1DDepthwiseConv=false)
Emit a suitable vector form for an operation.
Definition: Vectorization.cpp:2161

mlir::linalg::transposeBatchMatmul
FailureOr< Operation * > transposeBatchMatmul(RewriterBase &rewriter, linalg::BatchMatmulOp op, bool transposeLHS=true)
Pattern to replace.
Definition: TransposeMatmul.cpp:88

mlir::linalg::promoteSubviewsPrecondition
LogicalResult promoteSubviewsPrecondition(Operation *op, LinalgPromotionOptions options)
Promote memref.subviews feeding linalg-on-buffers operations.
Definition: Promotion.cpp:399

mlir::linalg::copyToGPUPrivateMemory
LogicalResult copyToGPUPrivateMemory(OpBuilder &b, Value src, Value dst)
Normal copy to between src and dst.
Definition: Promotion.cpp:503

mlir::linalg::isElementwise
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition: Utils.cpp:169

mlir::linalg::interchangeGenericOp
FailureOr< GenericOp > interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, ArrayRef< unsigned > interchangeVector)
Interchange the iterator_types and iterator_maps dimensions and adapts the index accesses of op.
Definition: Interchange.cpp:50

mlir::linalg::computeStaticMultiTileSizes
FailureOr< StaticMultiSizeSpecification > computeStaticMultiTileSizes(LinalgOp op, unsigned dimension, int64_t targetSize, int64_t divisor)
Definition: Tiling.cpp:242

mlir::linalg::populateDecomposePackUnpackPatterns
void populateDecomposePackUnpackPatterns(RewritePatternSet &patterns)
Populates patterns to decompose tensor.pack and tensor.unpack Ops into e.g.
Definition: Transforms.cpp:1622

mlir::linalg::computeContinuousTileSizes
FailureOr< ContinuousTileSizeSpecification > computeContinuousTileSizes(OpBuilder &builder, TilingInterface op, unsigned dimension, OpFoldResult targetSize, bool emitAssertions)
Definition: Tiling.cpp:162

mlir::linalg::computeStaticContinuousTileSizes
FailureOr< StaticContinuousTileSizeSpecification > computeStaticContinuousTileSizes(LinalgOp op, unsigned dimension, unsigned targetSize)
Definition: Tiling.cpp:111

mlir::linalg::splitReduction
FailureOr< SplitReductionResult > splitReduction(RewriterBase &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Definition: SplitReduction.cpp:30

mlir::linalg::hoistRedundantVectorBroadcasts
void hoistRedundantVectorBroadcasts(RewriterBase &rewriter, Operation *root)
Hoist vector.extract/vector.broadcast pairs out of immediately enclosing scf::ForOp iteratively,...
Definition: Hoisting.cpp:97

mlir::linalg::tileReductionUsingForall
FailureOr< ForallReductionTilingResult > tileReductionUsingForall(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes={}, std::optional< ArrayAttr > mapping=std::nullopt)
Method to tile a reduction to parallel iterations computing partial reductions.
Definition: Tiling.cpp:594

mlir::linalg::packMatmulGreedily
FailureOr< PackResult > packMatmulGreedily(RewriterBase &rewriter, LinalgOp linalgOp, ArrayRef< OpFoldResult > mnkPackedSizes, ArrayRef< int64_t > mnkPaddedSizesNextMultipleOf, ArrayRef< int64_t > mnkOrder)
Pack a LinalgOp by greedily inferring matmul dimensions (m, n, k) where m and n are proper parallel d...
Definition: Transforms.cpp:766

mlir::linalg::pack
FailureOr< PackResult > pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp, ArrayRef< OpFoldResult > packedSizes)
Implement packing of a single LinalgOp by packedSizes.
Definition: Transforms.cpp:477

mlir::linalg::populateEraseUnnecessaryInputsPatterns
void populateEraseUnnecessaryInputsPatterns(RewritePatternSet &patterns)
Patterns to promote inputs to outputs and remove unused inputs of linalg.generic ops.
Definition: EraseUnusedOperandsAndResults.cpp:428

mlir::linalg::lowerPack
FailureOr< LowerPackResult > lowerPack(RewriterBase &rewriter, tensor::PackOp packOp)
Rewrite pack as pad + reshape + transpose.
Definition: Transforms.cpp:219

mlir::linalg::promoteSubViews
FailureOr< LinalgOp > promoteSubViews(OpBuilder &b, LinalgOp op, const LinalgPromotionOptions &options)
Promote the subViews into a new buffer allocated at the insertion point b.
Definition: Promotion.cpp:421

mlir::linalg::ControlSplitReductionFn
std::function< SplitReductionOptions(LinalgOp op)> ControlSplitReductionFn
Function signature to control reduction splitting.
Definition: Transforms.h:442

mlir::linalg::deallocateWorkgroupMemory
LogicalResult deallocateWorkgroupMemory(OpBuilder &, Value)
In case of GPU group memory there is no need to deallocate.
Definition: Promotion.cpp:479

mlir::linalg::transposeMatmul
FailureOr< Operation * > transposeMatmul(RewriterBase &rewriter, linalg::MatmulOp op, bool transposeLHS=true)
Convert Linalg matmul ops to transposed variants.
Definition: TransposeMatmul.cpp:31

mlir::linalg::collapseOpIterationDims
FailureOr< CollapseResult > collapseOpIterationDims(LinalgOp op, ArrayRef< ReassociationIndices > foldedIterationDims, RewriterBase &rewriter)
Collapses dimensions of linalg.generic/linalg.copy operation.
Definition: ElementwiseOpFusion.cpp:1674

mlir::linalg::hoistRedundantVectorTransfers
void hoistRedundantVectorTransfers(Operation *root, bool verifyNonZeroTrip=false)
Hoist vector.transfer_read/vector.transfer_write on buffers pairs out of immediately enclosing scf::F...
Definition: Hoisting.cpp:202

mlir::linalg::decomposeWinogradInputTransformOp
FailureOr< Operation * > decomposeWinogradInputTransformOp(RewriterBase &rewriter, linalg::WinogradInputTransformOp op)
Rewrite linalg.winograd_input_transform.
Definition: WinogradConv2D.cpp:1197

mlir::linalg::populateDecomposePadPatterns
void populateDecomposePadPatterns(RewritePatternSet &patterns)
Populates patterns to decompose tensor.pad into e.g.
Definition: Transforms.cpp:1627

mlir::linalg::populateFoldAddIntoDestPatterns
void populateFoldAddIntoDestPatterns(RewritePatternSet &patterns)
Pattern to replace linalg.add when destination passing on a contraction op suffices for achieving the...
Definition: FoldAddIntoDest.cpp:147

mlir::linalg::packTranspose
FailureOr< PackTransposeResult > packTranspose(RewriterBase &rewriter, tensor::PackOp packOp, linalg::LinalgOp linalgOp, tensor::UnPackOp maybeUnPackOp, ArrayRef< int64_t > outerPerm, ArrayRef< int64_t > innerPerm)
Transpose a single PackOp -> LinalgOp -> UnPackOp chain and return the transposed PackOp -> LinalgOp ...
Definition: Transforms.cpp:675

mlir::linalg::splitOp
std::pair< TilingInterface, TilingInterface > splitOp(RewriterBase &rewriter, TilingInterface op, unsigned dimension, OpFoldResult splitPoint)
Split the given op into two parts along the given iteration space dimension at the specified splitPoi...
Definition: Split.cpp:67

mlir::linalg::populateInsertSliceVectorizationPatterns
void populateInsertSliceVectorizationPatterns(RewritePatternSet &patterns)
Populates patterns with vectorisation patterns for tensor.insert_slice.
Definition: Vectorization.cpp:2766

mlir::linalg::splitReductionByScaling
FailureOr< SplitReductionResult > splitReductionByScaling(RewriterBase &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc=false)
Scaling-based implementation of the split reduction transformation.
Definition: SplitReduction.cpp:241

mlir::linalg::computeMultiTileSizes
FailureOr< MultiSizeSpecification > computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension, OpFoldResult targetSize, OpFoldResult divisor, bool emitAssertions=true)
Emits the IR computing the multi-sized tiling specification with two tile sizes not exceeding targetS...
Definition: Tiling.cpp:268

mlir::memref::getMixedSizes
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given memref value.
Definition: MemRefOps.cpp:77

mlir::query::parse
QueryRef parse(llvm::StringRef line, const QuerySession &qs)
Definition: Query.cpp:20

mlir::scf::tileReductionUsingScf
FailureOr< scf::SCFReductionTilingResult > tileReductionUsingScf(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > tileSize)
Method to tile a reduction and generate a parallel op within a serial loop.
Definition: TileUsingInterface.cpp:929

mlir::scf::tileUsingSCF
FailureOr< SCFTilingResult > tileUsingSCF(RewriterBase &rewriter, TilingInterface op, const SCFTilingOptions &options)
Method to tile an op that implements the TilingInterface using scf.for for iterating over the tiles.
Definition: TileUsingInterface.cpp:789

mlir::scf::getForInductionVarOwner
ForOp getForInductionVarOwner(Value val)
Returns the loop parent of an induction variable.
Definition: SCF.cpp:597

mlir::scf::lowerToLoopsUsingSCFForOp
FailureOr< SmallVector< scf::ForOp > > lowerToLoopsUsingSCFForOp(RewriterBase &rewriter, TilingInterface op)
Method to lower an op that implements the TilingInterface to loops/scalars.
Definition: TileUsingInterface.cpp:2092

mlir::sparse_tensor::getM
uint64_t getM(LevelType lt)
Definition: Enums.h:443

mlir::tensor::populateMergeConsecutiveInsertExtractSlicePatterns
void populateMergeConsecutiveInsertExtractSlicePatterns(RewritePatternSet &patterns)
Collects patterns to merge consecutive tensor.insert_slice/extract_slice into one.
Definition: MergeConsecutiveInsertExtractSlicePatterns.cpp:230

mlir::tensor::getOrCreateDestinations
LogicalResult getOrCreateDestinations(OpBuilder &b, Location loc, Operation *op, SmallVector< Value > &result)
This is a helper function for DestinationStyleOpInterface.
Definition: TensorOps.cpp:110

mlir::tensor::populateFoldTensorSubsetIntoVectorTransferPatterns
void populateFoldTensorSubsetIntoVectorTransferPatterns(RewritePatternSet &patterns)
Appends patterns for folding tensor subset ops into vector transfer ops.
Definition: FoldTensorSubsetOps.cpp:258

mlir::transform
Definition: DLTITransformOps.h:18

mlir::transform::onlyReadsPayload
void onlyReadsPayload(SmallVectorImpl< MemoryEffects::EffectInstance > &effects)
Definition: TransformInterfaces.cpp:1863

mlir::transform::tileToForallOpImpl
DiagnosedSilenceableFailure tileToForallOpImpl(RewriterBase &rewriter, transform::TransformState &state, TransformOpInterface transformOp, Operation *target, ArrayRef< OpFoldResult > mixedNumThreads, ArrayRef< OpFoldResult > mixedTileSizes, std::optional< ArrayAttr > mapping, scf::SCFTilingResult &tilingResult)
Implementation of tiling operations using scf.forall.
Definition: LinalgTransformOps.cpp:3271

mlir::transform::producesHandle
void producesHandle(ResultRange handles, SmallVectorImpl< MemoryEffects::EffectInstance > &effects)
Definition: TransformInterfaces.cpp:1826

mlir::transform::consumesHandle
void consumesHandle(MutableArrayRef< OpOperand > handles, SmallVectorImpl< MemoryEffects::EffectInstance > &effects)
Populates effects with the memory effects indicating the operation on the given handle value:
Definition: TransformInterfaces.cpp:1796

mlir::transform::onlyReadsHandle
void onlyReadsHandle(MutableArrayRef< OpOperand > handles, SmallVectorImpl< MemoryEffects::EffectInstance > &effects)
Definition: TransformInterfaces.cpp:1848

mlir::transform::modifiesPayload
void modifiesPayload(SmallVectorImpl< MemoryEffects::EffectInstance > &effects)
Populates effects with the memory effects indicating the access to payload IR resource.
Definition: TransformInterfaces.cpp:1857

mlir::vector::populateVectorTransferPermutationMapLoweringPatterns
void populateVectorTransferPermutationMapLoweringPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of transfer read/write lowering patterns that simplify the permutation map (e....
Definition: LowerVectorTransfer.cpp:385

mlir::vector::populateVectorStepLoweringPatterns
void populateVectorStepLoweringPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate the pattern set with the following patterns:
Definition: LowerVectorStep.cpp:46

mlir::vector::populateSinkVectorOpsPatterns
void populateSinkVectorOpsPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Patterns that remove redundant Vector Ops by re-ordering them with e.g.
Definition: VectorTransforms.cpp:2106

mlir::vector::populateVectorReductionToContractPatterns
void populateVectorReductionToContractPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect patterns to convert reduction op to vector.contract and fold transpose/broadcast ops into the...
Definition: VectorTransforms.cpp:2091

mlir::xegpu::transpose
static void transpose(llvm::ArrayRef< int64_t > trans, SmallVector< int64_t > &shape)
Definition: XeGPUOps.cpp:22

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::matchPattern
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:491

mlir::isConstantIntValue
bool isConstantIntValue(OpFoldResult ofr, int64_t value)
Return true if ofr is constant integer equal to value.
Definition: StaticValueUtils.cpp:149

mlir::getAsIndexOpFoldResult
OpFoldResult getAsIndexOpFoldResult(MLIRContext *ctx, int64_t val)
Convert int64_t to integer attributes of index type and return them as OpFoldResult.
Definition: StaticValueUtils.cpp:109

mlir::getConstantIntValue
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Definition: StaticValueUtils.cpp:120

mlir::DiagnosticSeverity::Remark
@ Remark

mlir::getType
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:305

mlir::bindDims
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:348

mlir::emitSilenceableFailure
DiagnosedSilenceableFailure emitSilenceableFailure(Location loc, const Twine &message={})
Emits a silenceable failure with the given message.
Definition: DiagnosedSilenceableFailure.h:256

mlir::DenseI64ArrayAttr
detail::DenseArrayAttrImpl< int64_t > DenseI64ArrayAttr
Definition: BuiltinAttributes.h:769

mlir::parseAttribute
Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context, Type type={}, size_t *numRead=nullptr, bool isKnownNullTerminated=false)
This parses a single MLIR attribute to an MLIR context if it was valid.

mlir::emitDefiniteFailure
DiagnosedDefiniteFailure emitDefiniteFailure(Location loc, const Twine &message={})
Emits a definite failure with the given message.
Definition: DiagnosedSilenceableFailure.h:243

mlir::getElementTypeOrSelf
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
Definition: TypeUtilities.cpp:23

mlir::bindSymbols
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:362

mlir::dispatchIndexOpFoldResults
void dispatchIndexOpFoldResults(ArrayRef< OpFoldResult > ofrs, SmallVectorImpl< Value > &dynamicVec, SmallVectorImpl< int64_t > &staticVec)
Helper function to dispatch multiple OpFoldResults according to the behavior of dispatchIndexOpFoldRe...
Definition: StaticValueUtils.cpp:75

mlir::applyPatternsAndFoldGreedily
LogicalResult applyPatternsAndFoldGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
Definition: GreedyPatternRewriteDriver.cpp:897

mlir::getValueOrCreateConstantIndexOp
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:112

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::getMixedValues
SmallVector< OpFoldResult > getMixedValues(ArrayRef< int64_t > staticValues, ValueRange dynamicValues, Builder &b)
Return a vector of OpFoldResults with the same size a staticValues, but all elements for which Shaped...
Definition: StaticValueUtils.cpp:193

mlir::getAsOpFoldResult
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
Definition: StaticValueUtils.cpp:84

mlir::m_Constant
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369

mlir::verify
LogicalResult verify(Operation *op, bool verifyRecursively=true)
Perform (potentially expensive) checks of invariants, used to detect compiler bugs,...
Definition: Verifier.cpp:426

mlir::isPermutationVector
bool isPermutationVector(ArrayRef< int64_t > interchange)
Method to check if an interchange vector is a permutation.
Definition: IndexingUtils.cpp:222

mlir::OpAsmParser::UnresolvedOperand
This is the representation of an operand reference.
Definition: OpImplementation.h:1513

mlir::OpBuilder::Listener
This class represents a listener that may be used to hook into various actions within an OpBuilder.
Definition: Builders.h:294

mlir::OperationState
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Definition: OperationSupport.h:950

mlir::OperationState::operands
SmallVector< Value, 4 > operands
Definition: OperationSupport.h:953

mlir::OperationState::name
OperationName name
Definition: OperationSupport.h:952

mlir::OperationState::addOperands
void addOperands(ValueRange newOperands)
Definition: OperationSupport.cpp:212

mlir::OperationState::addAttribute
void addAttribute(StringRef name, Attribute attr)
Add an attribute with the specified name.
Definition: OperationSupport.h:1035

mlir::OperationState::addTypes
void addTypes(ArrayRef< Type > newTypes)
Definition: OperationSupport.h:1025

mlir::OperationState::attributes
NamedAttrList attributes
Definition: OperationSupport.h:956

mlir::Range
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Definition: StaticValueUtils.h:33

mlir::RewriterBase::ForwardingListener
A listener that forwards all notifications to another listener.
Definition: PatternMatch.h:463

mlir::RewriterBase::ForwardingListener::ForwardingListener
ForwardingListener(OpBuilder::Listener *listener)
Definition: PatternMatch.h:464

mlir::TilingResult
Container for result values of tiling.
Definition: TilingInterface.h:33

mlir::TilingResult::tiledValues
SmallVector< Value > tiledValues
Definition: TilingInterface.h:35

mlir::bufferization::OneShotBufferizationOptions
Options for analysis-enabled bufferization.
Definition: OneShotAnalysis.h:26

mlir::linalg::BufferizeToAllocationOptions
Definition: Transforms.h:50

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy
@ MemrefCopy

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy
@ LinalgCopy

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp::MaterializeInDestination
@ MaterializeInDestination

mlir::linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloc
@ MemrefAlloc

mlir::linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloca
@ MemrefAlloca

mlir::linalg::ControlDropUnitDims
Transformation to drop unit-extent dimensions from linalg.generic operations.
Definition: Transforms.h:473

mlir::linalg::ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice
@ ExtractInsertSlice

mlir::linalg::CopyVectorizationPattern
Vectorization pattern for memref::CopyOp.
Definition: Transforms.h:1494

mlir::linalg::DownscaleConv2DOp
Definition: Transforms.h:1441

mlir::linalg::DownscaleDepthwiseConv2DNhwcHwcOp
Rewrites 2-D depthwise convolution ops with size-1 (w, kw) or (h, kh) dimensions into 1-D depthwise c...
Definition: Transforms.h:1426

mlir::linalg::LinalgCopyVTRForwardingPattern
Match and rewrite for the pattern:
Definition: Transforms.h:1592

mlir::linalg::LinalgCopyVTWForwardingPattern
Match and rewrite for the pattern:
Definition: Transforms.h:1620

mlir::linalg::LinalgPaddingOptions
Definition: Transforms.h:278

mlir::linalg::LinalgPaddingOptions::CopyBackOp::None
@ None

mlir::linalg::LinalgPaddingOptions::CopyBackOp::LinalgCopy
@ LinalgCopy

mlir::linalg::LinalgPaddingOptions::CopyBackOp::BufferizationMaterializeInDestination
@ BufferizationMaterializeInDestination

mlir::linalg::LinalgPromotionOptions
Definition: Transforms.h:353

mlir::linalg::LinalgPromotionOptions::setUseFullTileBuffersByDefault
LinalgPromotionOptions & setUseFullTileBuffersByDefault(bool use)
Definition: Transforms.h:379

mlir::linalg::LinalgPromotionOptions::setAlignment
LinalgPromotionOptions & setAlignment(unsigned align)
Definition: Transforms.h:385

mlir::linalg::LinalgPromotionOptions::setUseAlloca
LinalgPromotionOptions & setUseAlloca(bool use)
Definition: Transforms.h:398

mlir::linalg::LinalgPromotionOptions::setCopyInOutFns
LinalgPromotionOptions & setCopyInOutFns(CopyCallbackFn const &copyIn, CopyCallbackFn const &copyOut)
Definition: Transforms.h:418

mlir::linalg::LinalgPromotionOptions::setUseFullTileBuffers
LinalgPromotionOptions & setUseFullTileBuffers(ArrayRef< bool > useFullTiles)
Definition: Transforms.h:368

mlir::linalg::LinalgPromotionOptions::setMemorySpace
LinalgPromotionOptions & setMemorySpace(Attribute memorySpc)
Definition: Transforms.h:392

mlir::linalg::LinalgPromotionOptions::setAllocationDeallocationFns
LinalgPromotionOptions & setAllocationDeallocationFns(AllocBufferCallbackFn const &allocFn, DeallocBufferCallbackFn const &deallocFn)
Definition: Transforms.h:408

mlir::linalg::LinalgPromotionOptions::setOperandsToPromote
LinalgPromotionOptions & setOperandsToPromote(ArrayRef< int64_t > operands)
Definition: Transforms.h:357

mlir::linalg::SplitReductionOptions
Split Reduction options.
Definition: Transforms.h:427

mlir::scf::SCFTileAndFuseOptions
Options used to control tile + fuse.
Definition: TileUsingInterface.h:122

mlir::scf::SCFTileAndFuseOptions::tilingOptions
SCFTilingOptions tilingOptions
The tiling options used to control the tiling of the consumer.
Definition: TileUsingInterface.h:124

mlir::scf::SCFTileAndFuseOptions::cleanupPatterns
std::optional< FrozenRewritePatternSet > cleanupPatterns
An optional set of rewrite patterns to apply to the results of tiling before fusion.
Definition: TileUsingInterface.h:161

mlir::scf::SCFTilingOptions
Options to use to control tiling.
Definition: TileUsingInterface.h:35

mlir::scf::SCFTilingOptions::setTileSizeComputationFunction
SCFTilingOptions & setTileSizeComputationFunction(SCFTileSizeComputationFunction fun)
Definition: TileUsingInterface.h:44

mlir::scf::SCFTilingOptions::setInterchange
SCFTilingOptions & setInterchange(ArrayRef< int64_t > interchange)
Definition: TileUsingInterface.h:75

mlir::scf::SCFTilingOptions::setTileSizes
SCFTilingOptions & setTileSizes(ArrayRef< OpFoldResult > tileSizes)
Convenience function to set the tileSizeComputationFunction to a function that computes tile sizes at...
Definition: TileUsingInterface.cpp:40

mlir::scf::SCFTilingOptions::interchangeVector
SmallVector< int64_t > interchangeVector
The interchange vector to reorder the tiled loops.
Definition: TileUsingInterface.h:74

mlir::scf::SCFTilingOptions::LoopType::ForallOp
@ ForallOp

mlir::scf::SCFTilingResult
Transformation information returned after tiling.
Definition: TileUsingInterface.h:100

mlir::scf::SCFTilingResult::tiledOps
SmallVector< Operation * > tiledOps
Tiled operations that are generated during tiling.
Definition: TileUsingInterface.h:104

mlir::scf::SCFTilingResult::loops
SmallVector< LoopLikeOpInterface > loops
The scf.for operations that iterate over the tiles.
Definition: TileUsingInterface.h:106

mlir::transform::NumThreadsSpec
Definition: LinalgTransformOps.h:48

mlir::transform::TileSizesSpec
Definition: LinalgTransformOps.h:47

mlir::transform::gpu::CopyMappingInfo
Definition: GPUHeuristics.h:32