26 #include "llvm/ADT/TypeSwitch.h"
27 #include "llvm/Support/Debug.h"
30 #define DEBUG_TYPE "tile-using-interface"
37 auto tileSizes = llvm::to_vector(ts);
48 size_t iterationDomainSize) {
50 if (filledVector.size() < iterationDomainSize) {
51 auto range = llvm::seq<int64_t>(filledVector.size(), iterationDomainSize);
52 filledVector.append(range.begin(), range.end());
54 if (filledVector.size() > iterationDomainSize)
55 filledVector.resize(iterationDomainSize);
74 return ((sizeAsInt.value() - offsetAsInt.value()) % strideAsInt.value() == 0);
83 if (ts && ts.value() == 1)
129 if (newDestArgs.empty())
131 if (
auto destinationStyleOp = dyn_cast<DestinationStyleOpInterface>(clonedOp))
132 destinationStyleOp.getDpsInitsMutable().assign(newDestArgs);
150 assert(!loopRanges.empty() &&
"unexpected empty loop ranges");
151 assert(loopRanges.size() == tileSizes.size() &&
152 "expected as many tile sizes as loop ranges");
156 for (
auto [loopRange, tileSize] : llvm::zip_equal(loopRanges, tileSizes)) {
166 rewriter.
create<scf::ForOp>(loc, lb, ub, step, destinationTensors,
169 loops.push_back(loop);
170 ivs.push_back(loop.getInductionVar());
172 destinationTensors = loop.getRegionIterArgs();
177 if (
failed(yieldTiledValuesFn(rewriter, loc, ivs, destinationTensors,
178 tiledResults, resultOffsets, resultSizes))) {
180 loc,
"failed to generate inner tile loop body");
187 for (
auto [tiledValue, destinationTensor, resultOffset, resultSize] :
188 llvm::zip_equal(tiledResults, destinationTensors, resultOffsets,
192 auto insertSlice = rewriter.
create<tensor::InsertSliceOp>(
193 loc, tiledValue, destinationTensor, resultOffset, resultSize,
195 yieldedValues.push_back(insertSlice);
197 rewriter.
create<scf::YieldOp>(loc, yieldedValues);
200 for (
auto [outerLoop, innerLoop] :
204 cast<scf::ForOp>(outerLoop.getOperation()).getBody());
205 rewriter.
create<scf::YieldOp>(outerLoop.getLoc(), innerLoop->getResults());
227 assert(!loopRanges.empty() &&
"unexpected empty loop ranges");
228 assert(loopRanges.size() == tileSizes.size() &&
229 "expected as many tile sizes as loop ranges");
232 sizes(loopRanges.size());
234 for (
auto [tileSize, loopRange] : llvm::zip_equal(tileSizes, loopRanges)) {
237 lbs.push_back(loopRange.offset);
238 ubs.push_back(loopRange.size);
239 steps.push_back(tileSize);
241 assert(!lbs.empty() &&
"Expected at least one loop range");
243 std::optional<ArrayAttr> mappingAttr;
244 if (!mappingVector.empty())
247 auto forallOp = rewriter.
create<scf::ForallOp>(
248 loc, lbs, ubs, steps, destinationTensors, mappingAttr);
249 loops.push_back(forallOp);
252 destinationTensors = forallOp.getRegionOutArgs();
256 if (
failed(tiledBodyFn(rewriter, loc, forallOp.getInductionVars(),
257 destinationTensors, tiledResults, resultOffsets,
262 for (
auto [tiledValue, destinationTensor, resultOffset, resultSize] :
263 llvm::zip_equal(tiledResults, destinationTensors, resultOffsets,
268 rewriter.
create<tensor::ParallelInsertSliceOp>(
269 loc, tiledValue, destinationTensor, resultOffset, resultSize,
297 return tiledBodyFn(rewriter, loc,
ValueRange{}, destinationTensors,
298 tiledResults, resultOffsets, resultSizes);
302 destinationTensors, tiledBodyFn, loops);
306 rewriter, loc, loopRanges, tileSizes,
options.mappingVector,
307 destinationTensors, tiledBodyFn, loops);
319 template <
typename LoopType>
336 auto inits = llvm::to_vector(loopOp.getInitArgs());
337 inits.append(newInitOperands.begin(), newInitOperands.end());
338 auto newLoop = rewriter.
create<scf::ForOp>(
339 loc, loopOp.getLowerBound(), loopOp.getUpperBound(), loopOp.getStep(),
343 Block *loopBody = loopOp.getBody();
344 Block *newLoopBody = newLoop.getBody();
346 loopBody, newLoopBody,
347 newLoopBody->
getArguments().take_front(loopBody->getNumArguments()));
349 auto yieldOp = cast<scf::YieldOp>(newLoopBody->
getTerminator());
355 newLoop.getRegionIterArgs().take_back(newInitOperands.size());
356 if (
failed(yieldTiledValuesFn(rewriter, loc, newLoop.getInductionVar(),
357 newRegionIterArgs, tiledValues, resultOffsets,
364 for (
auto [tiledValue, regionIterArg, resultOffset, resultSize] :
365 llvm::zip_equal(tiledValues, newRegionIterArgs, resultOffsets,
369 Value insert = rewriter.
create<tensor::InsertSliceOp>(
370 yieldOp->getLoc(), tiledValue, regionIterArg, resultOffset, resultSize,
372 newYieldValues.push_back(insert);
377 newLoop->getResults().take_front(loopOp.getNumResults()));
378 return cast<LoopLikeOpInterface>(newLoop.getOperation());
389 auto inits = llvm::to_vector(loopOp.getOutputs());
390 inits.append(newInitOperands.begin(), newInitOperands.end());
391 auto newLoop = rewriter.
create<scf::ForallOp>(
392 loc, loopOp.getMixedLowerBound(), loopOp.getMixedUpperBound(),
393 loopOp.getMixedStep(), inits, loopOp.getMapping(),
397 Block *loopBody = loopOp.getBody();
398 Block *newLoopBody = newLoop.getBody();
400 loopBody, newLoopBody,
401 newLoopBody->
getArguments().take_front(loopBody->getNumArguments()));
403 auto terminator = cast<scf::InParallelOp>(newLoopBody->
getTerminator());
408 newLoop.getRegionIterArgs().take_back(newInitOperands.size());
409 if (
failed(yieldTiledValuesFn(rewriter, loc, newLoop.getInductionVars(),
410 regionIterArgs, tiledValues, resultOffsets,
414 "failed to get yielded tiled values");
420 for (
auto [tiledValue, iterArg, resultOffset, resultSize] : llvm::zip_equal(
421 tiledValues, regionIterArgs, resultOffsets, resultSizes)) {
424 rewriter.
create<tensor::ParallelInsertSliceOp>(
425 terminator.getLoc(), tiledValue, iterArg, resultOffset, resultSize,
430 newLoop->getResults().take_front(loopOp.getNumResults()));
431 return cast<LoopLikeOpInterface>(newLoop.getOperation());
441 loopLikeOp.getOperation())
442 .Case<scf::ForOp, scf::ForallOp>(
445 loopOp, rewriter, newInitOperands, yieldTiledValuesFn);
467 for (
auto &loop : loops.drop_back()) {
471 auto forLoop = cast<scf::ForOp>(loop.getOperation());
475 newInits.append(newInitValues.begin(), newInitValues.end());
476 auto newLoop = rewriter.
create<scf::ForOp>(
477 forLoop.getLoc(), forLoop.getLowerBound(), forLoop.getUpperBound(),
478 forLoop.getStep(), newInits,
483 sourceBlockArgs.push_back(newLoop.getInductionVar());
484 auto newRegionIterArgs = newLoop.getRegionIterArgs();
485 sourceBlockArgs.append(
486 newRegionIterArgs.begin(),
487 std::next(newRegionIterArgs.begin(), forLoop.getNumResults()));
488 rewriter.
mergeBlocks(forLoop.getBody(), newLoop.getBody(), sourceBlockArgs);
490 forLoop, newLoop.getResults().take_front(forLoop.getNumResults()));
492 ivs.push_back(newLoop.getInductionVar());
493 newInitValues = newLoop.getRegionIterArgs().take_back(newInitValues.size());
497 LoopLikeOpInterface innerMostLoop = loops.back();
500 getNewTiledYieldsFn);
502 if (
failed(newInnerMostLoop))
503 return innerMostLoop.emitOpError(
"failed to return additional yields");
504 loops.back() = newInnerMostLoop.value();
508 for (
auto [outerLoop, innerLoop] :
509 llvm::zip_equal(loops.drop_back(), loops.drop_front())) {
511 auto outerForLoop = cast<scf::ForOp>(outerLoop);
512 auto outerLoopYield =
513 cast<scf::YieldOp>(outerForLoop.getBody()->getTerminator());
515 llvm::to_vector(outerLoopYield.getOperands());
517 innerLoop->getResults().take_back(newInitValues.size());
518 newYields.append(additionalYields.begin(), additionalYields.end());
533 if (!
options.tileSizeComputationFunction) {
535 op,
"missing tile size computation function");
540 size_t numLoops = iterationDomain.size();
547 options.tileSizeComputationFunction(rewriter, op);
548 if (tileSizes.size() < iterationDomain.size()) {
550 tileSizes.append(numLoops - tileSizes.size(), zero);
556 if (!
options.interchangeVector.empty()) {
558 iterationDomain.size());
560 if (!interchangeVector.empty()) {
563 op,
"invalid intechange vector, not a permutation of the entire "
583 int materializedLoopNum = 0;
584 for (
auto [tileSize, loopRange] :
585 llvm::zip_equal(tileSizes, iterationDomain)) {
587 offsets.push_back(loopRange.offset);
588 sizes.push_back(loopRange.size);
591 Value iv = ivs[materializedLoopNum++];
592 offsets.push_back(iv);
600 if (!interchangeVector.empty()) {
609 auto clonedOp = cast<TilingInterface>(
616 tiledResults.append(clonedOp->result_begin(), clonedOp->result_end());
623 tilingResult = clonedOp.getTiledImplementation(rewriter, offsets, sizes);
624 if (
failed(tilingResult)) {
634 for (
auto [index, tiledValue] :
636 tiledResults.push_back(tiledValue);
638 if (
failed(op.getResultTilePosition(rewriter, index, offsets, sizes,
639 resultOffset, resultSize))) {
640 for (
auto op : tilingResult->tiledOps) {
644 op,
"failed to get slice of result produced");
646 resultOffsets.emplace_back(std::move(resultOffset));
647 resultSizes.emplace_back(std::move(resultSize));
656 destinationTensors))) {
658 "unable to create destination tensors");
664 tileSizes, destinationTensors,
665 innerYieldTiledValuesFn, loops)))
666 return op.
emitOpError(
"failed to generate tiling loops");
668 "expected tiling result to be computed after loop generation");
674 tilingResult->tiledValues};
678 loops.front()->getResults(), [](
OpResult r) ->
Value { return r; });
684 PartialReductionOpInterface op,
689 auto tilingInterfaceOp = cast<TilingInterface>(op.getOperation());
691 auto tileSizesVector = llvm::to_vector(tileSizes);
692 if (tileSizesVector.size() < iterationDomain.size()) {
694 tileSizesVector.append(iterationDomain.size() - tileSizesVector.size(),
699 op,
"don't support ops with multiple results for now");
701 tilingInterfaceOp.getLoopIteratorTypes();
704 for (
auto [idx, iteratorType] :
706 if (iteratorType == utils::IteratorType::reduction)
707 reductionDims.push_back(idx);
712 op.generateInitialTensorForPartialReduction(b, loc, tileSizesVector,
714 if (
failed(identityTensor))
716 "cannot create a tensor of identity value.");
720 auto innerYieldTiledValuesFn =
728 int materializedLoopNum = 0;
729 for (
auto [tileSize, loopRange] :
730 llvm::zip_equal(tileSizesVector, iterationDomain)) {
732 offsets.push_back(loopRange.offset);
733 sizes.push_back(loopRange.size);
736 Value iv = ivs[materializedLoopNum++];
737 offsets.push_back(iv);
744 auto clonedOp = cast<PartialReductionOpInterface>(
748 parallelOp = clonedOp.tileToPartialReduction(b, loc, regionIterArgs,
749 offsets, sizes, reductionDims);
757 resultOffsets.emplace_back(std::move(outOffsets));
760 for (
size_t i = 0; i < offsets.size(); i++) {
764 resultSizes.emplace_back(std::move(outSizes));
770 llvm::map_to_vector(identityTensor.value()->getResults(),
777 destinationTensors, innerYieldTiledValuesFn,
782 loops.front()->getResults(), [](
OpResult r) ->
Value { return r; });
786 Operation *mergeOp = op.mergeReductions(b, loc, replacements, reductionDims);
791 results.
loops = loops;
806 static std::tuple<OpResult, std::optional<OpOperand *>>
809 std::optional<OpOperand *> destinationIterArg;
810 auto loopIt = loops.rbegin();
811 while (
auto iterArg = dyn_cast<BlockArgument>(source->
get())) {
813 if (iterArg.getOwner()->getParentOp() != loop)
815 source = loop.getTiedLoopInit(iterArg);
818 if (loopIt == loops.rend())
819 destinationIterArg = source;
820 return {dyn_cast<OpResult>(source->
get()), destinationIterArg};
825 std::optional<scf::SCFFuseProducerOfSliceResult>
827 RewriterBase &rewriter, tensor::ExtractSliceOp candidateSliceOp,
831 auto [fusableProducer, destinationInitArg] =
834 if (!fusableProducer)
836 unsigned resultNumber = fusableProducer.getResultNumber();
844 Operation *fusableProducerOp = fusableProducer.getOwner();
845 if (isa<DestinationStyleOpInterface>(fusableProducerOp) &&
847 rewriter, fusableProducerOp->
getLoc(), fusableProducerOp,
848 origDestinationTensors)))
851 clonedOpDestinationTensors = origDestinationTensors;
852 if (destinationInitArg &&
853 isa<DestinationStyleOpInterface>(fusableProducerOp)) {
857 clonedOpDestinationTensors[resultNumber] = candidateSliceOp.getSource();
861 rewriter, fusableProducerOp, clonedOpDestinationTensors);
866 llvm::to_vector(candidateSliceOp->getOperands());
867 candidateSliceOpOperands[0] = clonedProducerOp->
getResult(resultNumber);
868 tensor::ExtractSliceOp clonedCandidateSliceOp =
870 candidateSliceOp->getResultTypes(), candidateSliceOpOperands);
875 rewriter, clonedCandidateSliceOp,
876 clonedProducerOp->
getResult(resultNumber));
877 if (
failed(tileAndFuseResult))
882 tileAndFuseResult->tiledValues[0]);
883 rewriter.
eraseOp(clonedCandidateSliceOp);
884 rewriter.
eraseOp(clonedProducerOp);
929 if (destinationInitArg &&
930 isa<DestinationStyleOpInterface>(fusableProducerOp) && !loops.empty()) {
932 ->getOpOperands()[destinationInitArg.value()->getOperandNumber()]
933 .set(origDestinationTensors[resultNumber]);
936 tileAndFuseResult->tiledValues[0],
937 tileAndFuseResult->tiledOps};
961 if (
auto tiledDestStyleOp =
962 tiledAndFusedProducer
963 .getDefiningOp<DestinationStyleOpInterface>()) {
965 Value newRegionArg = newRegionIterArgs.back();
966 auto destSlice = rewriter.
create<tensor::ExtractSliceOp>(
967 sliceOp.getLoc(), newRegionArg, sliceOp.getMixedOffsets(),
968 sliceOp.getMixedSizes(), sliceOp.getMixedStrides());
971 tiledDestStyleOp.getDpsInitsMutable()[resultNumber].set(destSlice);
977 tiledOffset.emplace_back(sliceOp.getMixedOffsets());
978 tiledSizes.emplace_back(sliceOp.getMixedSizes());
996 if (!consumer->getNumResults()) {
998 consumer,
"invalid pattern for op with no results");
1003 llvm::SmallDenseMap<Value, size_t> origProducerToLoopResultNum;
1008 if (
failed(tilingResult))
1010 for (
auto *tiledOp : tilingResult->tiledOps)
1011 tiledAndFusedOps.insert(tiledOp);
1014 auto &loops = tilingResult->loops;
1015 if (loops.empty()) {
1017 for (
auto [origVal, replacement] :
1018 llvm::zip_equal(consumer->getResults(), tilingResult->replacements)) {
1019 replacements[origVal] = replacement;
1029 for (
auto [index, result] :
llvm::enumerate(consumer->getResults())) {
1030 origValToResultNumber[result] = index;
1040 auto addCandidateSlices = [](
Operation *fusedOp,
1041 std::deque<tensor::ExtractSliceOp> &candidates) {
1043 if (
auto sliceOp = operand.getDefiningOp<tensor::ExtractSliceOp>())
1044 candidates.push_back(sliceOp);
1047 std::deque<tensor::ExtractSliceOp> candidates;
1048 addCandidateSlices(tiledAndFusedOps.back(), candidates);
1050 while (!candidates.empty()) {
1052 tensor::ExtractSliceOp candidateSliceOp = candidates.front();
1053 candidates.pop_front();
1056 auto [fusableProducer, destinationInitArg] =
1059 if (!fusableProducer)
1062 auto [fuseSlice, yieldReplacement] =
options.fusionControlFn(
1063 candidateSliceOp, fusableProducer, destinationInitArg.has_value());
1070 std::optional<scf::SCFFuseProducerOfSliceResult> fusedResult =
1075 if (yieldReplacement) {
1077 rewriter, candidateSliceOp, fusedResult.value(), loops))) {
1079 fusableProducer.getOwner(),
"failed to replacement value for this "
1080 "oepration from within the tiled loop");
1082 origValToResultNumber[fusableProducer] =
1083 loops.front()->getNumResults() - 1;
1087 fusedResult->tiledAndFusedProducer.getDefiningOp()) {
1088 fusedProducers.insert(fusedResult->origProducer.getDefiningOp());
1089 tiledAndFusedOps.insert(tiledAndFusedOp);
1090 addCandidateSlices(tiledAndFusedOp, candidates);
1095 for (
auto [origVal, resultNumber] : origValToResultNumber) {
1096 replacements[origVal] = loops.front()->getResult(resultNumber);
1109 TilingInterface op) {
1113 op,
"unable to lower to loops operations with return values");
1120 for (
auto loopRange : domain) {
1127 auto loop = rewriter.
create<scf::ForOp>(op.
getLoc(), offsetVal, sizeVal,
1129 loops.push_back(loop);
1130 ivs.push_back(loop.getInductionVar());
1133 if (
failed(op.generateScalarImplementation(rewriter, op.
getLoc(), ivs))) {
static llvm::ManagedStatic< PassManagerOptions > options
static SmallVector< int64_t > fillInterchangeVector(ArrayRef< int64_t > interchangeVector, size_t iterationDomainSize)
Helper method to adjust the interchange vector to match the iteration domain.
std::function< LogicalResult(RewriterBase &rewriter, Location loc, ValueRange ivs, ValueRange newBbArgs, SmallVector< Value > &tiledValues, SmallVector< SmallVector< OpFoldResult > > &resultOffsets, SmallVector< SmallVector< OpFoldResult > > &resultSizes)> YieldTiledValuesFn
A function that allows returning additional yielded values during yieldTiledValuesAndReplace.
static LogicalResult generateLoopNest(RewriterBase &rewriter, Location loc, const scf::SCFTilingOptions &options, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes, ValueRange destinationTensors, YieldTiledValuesFn tiledBodyFn, SmallVector< LoopLikeOpInterface > &loops)
Generate the tile-loop nest using the loop construct specifed in options.
FailureOr< LoopLikeOpInterface > yieldTiledValuesAndReplaceLoop(LoopType loopOp, RewriterBase &rewriter, ValueRange newInitOperands, YieldTiledValuesFn yieldTiledValuesFn)
Append the specified additional newInitOperands operands to the loops existing init operands (or simi...
static LogicalResult generateLoopNestUsingForOp(RewriterBase &rewriter, Location loc, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes, ValueRange destinationTensors, YieldTiledValuesFn yieldTiledValuesFn, SmallVector< LoopLikeOpInterface > &loops)
Generate the tile-loop nest using scf.for operation.
static LogicalResult generateLoopNestUsingForallOp(RewriterBase &rewriter, Location loc, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes, ArrayRef< Attribute > mappingVector, ValueRange destinationTensors, YieldTiledValuesFn tiledBodyFn, SmallVector< LoopLikeOpInterface > &loops)
Generate the tile-loop nest using scf.forall operation.
static std::tuple< OpResult, std::optional< OpOperand * > > getUntiledProducerFromSliceSource(OpOperand *source, ArrayRef< LoopLikeOpInterface > loops)
Return the untiled producer whose slice is used in a tiled consumer.
static bool tileDividesIterationDomain(Range loopRange)
static Operation * cloneOpAndUpdateDestinationArgs(RewriterBase &rewriter, Operation *op, ValueRange newDestArgs)
Clones the operation and updates the destination if the operation implements the DestinationStyleOpIn...
static LogicalResult addInitOperandsToLoopNest(RewriterBase &rewriter, MutableArrayRef< LoopLikeOpInterface > loops, ValueRange newInitValues, YieldTiledValuesFn getNewTiledYieldsFn)
Method to add new init values to a loop nest.
static OpFoldResult getBoundedTileSize(OpBuilder &b, Location loc, Range loopRange, Value iv, OpFoldResult tileSize)
Returns the bounded tile size given the current iv, loopRange and tileSize, i.e., min(tileSize,...
Base type for affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Block represents an ordered list of Operations.
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
ArrayAttr getArrayAttr(ArrayRef< Attribute > value)
This class provides support for representing a failure result, or a valid value of type T.
IRValueT get() const
Return the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Block::iterator getInsertionPoint() const
Returns the current insertion point of the builder.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation * getOwner() const
Returns the operation that owns this result.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
result_iterator result_begin()
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
result_iterator result_end()
operand_range getOperands()
Returns an iterator on the underlying Value's.
result_range getResults()
InFlightDiagnostic emitOpError(const Twine &message={})
Emit an error with the op name prefixed, like "'dim' op " which is convenient for verifiers.
unsigned getNumResults()
Return the number of results held by this operation.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
void replaceAllUsesWith(Value from, Value to)
Find uses of from and replace them with to.
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
FailureOr< scf::SCFReductionTilingResult > tileReductionUsingScf(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > tileSize)
Method to tile a reduction and generate a parallel op within a serial loop.
FailureOr< SCFTilingResult > tileUsingSCF(RewriterBase &rewriter, TilingInterface op, const SCFTilingOptions &options)
Method to tile an op that implements the TilingInterface using scf.for for iterating over the tiles.
LogicalResult yieldReplacementForFusedProducer(RewriterBase &rewriter, tensor::ExtractSliceOp sliceOp, scf::SCFFuseProducerOfSliceResult fusedProducerInfo, MutableArrayRef< LoopLikeOpInterface > loops)
Reconstruct the fused producer from within the tiled-and-fused code.
FailureOr< SmallVector< scf::ForOp > > lowerToLoopsUsingSCFForOp(RewriterBase &rewriter, TilingInterface op)
Method to lower an op that implements the TilingInterface to loops/scalars.
FailureOr< SCFTileAndFuseResult > tileConsumerAndFuseProducersUsingSCF(RewriterBase &rewriter, TilingInterface consumer, const SCFTileAndFuseOptions &options)
Method to tile and fuse a sequence of operations, by tiling the consumer and fusing its producers.
std::optional< SCFFuseProducerOfSliceResult > tileAndFuseProducerOfSlice(RewriterBase &rewriter, tensor::ExtractSliceOp candidateSliceOp, MutableArrayRef< LoopLikeOpInterface > loops)
Implementation of fusing producer of a single slice by computing the slice of the producer in-place.
FailureOr< TilingResult > replaceExtractSliceWithTiledProducer(OpBuilder &builder, tensor::ExtractSliceOp sliceOp, OpResult producerOp)
Pattern to swap an tensor.extract_slice with its producer when the producer implements the TilingInte...
OpFoldResult getMixedSize(OpBuilder &builder, Location loc, Value value, int64_t dim)
Return the dimension of the given tensor value.
FailureOr< Value > getOrCreateDestination(OpBuilder &b, Location loc, OpResult opResult)
This is a helper function for DestinationStyleOpInterface.
LogicalResult getOrCreateDestinations(OpBuilder &b, Location loc, Operation *op, SmallVector< Value > &result)
This is a helper function for DestinationStyleOpInterface.
Include the generated interface declarations.
bool isConstantIntValue(OpFoldResult ofr, int64_t value)
Return true if ofr is constant integer equal to value.
bool isZeroIndex(OpFoldResult v)
Return true if v is an IntegerAttr with value 0 of a ConstantIndexOp with attribute with value 0.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
bool isPermutationVector(ArrayRef< int64_t > interchange)
Method to check if an interchange vector is a permutation.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
This class represents an efficient way to signal success or failure.
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Container for result values of tiling.
Fuse the producer of the source of candidateSliceOp by computing the required slice of the producer i...
Value tiledAndFusedProducer
Transformation information returned after reduction tiling.
Operation * parallelTiledOp
The partial reduction tiled op generated.
Operation * mergeOp
The final reduction operation merging all the partial reductions.
SmallVector< LoopLikeOpInterface > loops
The loop operations that iterate over the tiles.
Operation * initialOp
Initial op.
Options used to control tile + fuse.
Transformation information returned after tile and fuse.
Options to use to control tiling.
SCFTileSizeComputationFunction tileSizeComputationFunction
Computation function that returns the tile sizes for each operation.
SCFTilingOptions & setTileSizes(ArrayRef< OpFoldResult > ts)
Convenience function to set the tileSizeComputationFunction to a function that computes tile sizes at...
Transformation information returned after tiling.