27 #include "llvm/ADT/MapVector.h"
28 #include "llvm/ADT/ScopeExit.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Debug.h"
35 #define DEBUG_TYPE "linalg-fusion"
68 bool fromSubViewOpOnly =
false) {
78 if (fromSubViewOpOnly &&
79 !isa_and_nonnull<memref::SubViewOp, tensor::ExtractSliceOp>(
80 opOperand.get().getDefiningOp()))
83 AffineMap map = op.getMatchingIndexingMap(&opOperand);
84 LLVM_DEBUG(llvm::dbgs() <<
"getShapeDefiningLoopRange I/O idx: "
85 << opOperand.getOperandNumber() <<
"\n");
86 LLVM_DEBUG(llvm::dbgs()
87 <<
"getShapeDefiningLoopRange map: " << map <<
"\n");
93 if (loopDepth == en.value().cast<
AffineDimExpr>().getPosition()) {
94 LLVM_DEBUG(llvm::dbgs() <<
"getShapeDefiningLoopRange loopDepth: "
95 << loopDepth <<
"\n");
96 LLVM_DEBUG(llvm::dbgs() <<
"getShapeDefiningLoopRange shape: "
97 << opOperand.get() <<
"\n");
99 static_cast<unsigned>(en.index())};
103 llvm_unreachable(
"Expect to be able to extract a shape defining loop range");
107 return producer->getOperands();
119 for (
unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
123 sizeBounds.push_back(dim);
124 auto it = fusedLoopsAndRanges.find(i);
125 if (it != fusedLoopsAndRanges.end()) {
126 ivs.push_back(it->second.offset);
127 tileSizes.push_back(it->second.size);
128 loopRanges.push_back(it->second);
129 LLVM_DEBUG(llvm::dbgs() <<
"tiled loop#" << i <<
" with LoopRange "
130 << loopRanges.back() <<
"\n");
134 LLVM_DEBUG(llvm::dbgs() <<
"full loop#" << i <<
" with LoopRange "
135 << loopRanges.back() <<
"\n");
140 clonedShapes.reserve(producer->getNumOperands());
152 resultTypes.reserve(producer->getNumResults());
153 for (
Value operand : producer.getDpsInits()) {
154 auto tensorType = dyn_cast<RankedTensorType>(operand.getType());
157 unsigned rank = tensorType.getRank();
159 rank, ShapedType::kDynamic);
162 rank, ShapedType::kDynamic);
163 resultTypes.push_back(tensor::ExtractSliceOp::inferResultType(
164 tensorType, staticOffsetsVector, staticSizesVector,
165 staticStridesVector));
168 LinalgOp clonedOp =
clone(b, producer, resultTypes, clonedShapes);
172 llvm::map_range(loopRanges, [&](
Range range) {
return range.
offset; }));
181 Value shapedOperand,
unsigned dim) {
183 if (
auto subViewOp = dyn_cast<memref::SubViewOp>(shapeProducingOp))
184 return subViewOp.getOrCreateRanges(b, loc)[dim];
185 if (
auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(shapeProducingOp))
186 return sliceOp.getOrCreateRanges(b, loc)[dim];
187 llvm_unreachable(
"SubviewOp or ExtractSliceOp expected");
195 LLVM_DEBUG(llvm::dbgs() <<
"Producer map: " << producerMap <<
"\n");
197 Value shapedOperand = consumerOpOperand.
get();
201 b, consumerOpOperand.
getOwner()->
getLoc(), shapedOperand, en.index());
203 return fuse(b, producerOp, fusedLoopsAndRanges);
213 if (!isa<RankedTensorType>(tensor.
getType()))
217 LLVM_DEBUG(llvm::dbgs() <<
"\ngetProducerOfTensor: " << tensor);
219 opResult = cast<OpResult>(tensor);
222 if (
auto sliceOp = tensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
223 tensor = sliceOp.getSource();
226 if (
auto blockArg = dyn_cast<BlockArgument>(tensor)) {
227 if (
auto forOp = blockArg.getDefiningOp<scf::ForOp>()) {
228 tensor = forOp.getInitArgs()[blockArg.getArgNumber()];
238 Value inputTensor = consumerOpOperand.
get();
241 if (!producerOpResult) {
242 LLVM_DEBUG(llvm::dbgs() <<
"\nUnable to find producer");
251 auto producerOp = dyn_cast<LinalgOp>(producerOpResult.
getOwner());
255 LinalgOp consumerOp = dyn_cast<LinalgOp>(consumerOpOperand.
getOwner());
259 Value inputTensor = consumerOpOperand.
get();
262 auto sliceOp = inputTensor.
getDefiningOp<tensor::ExtractSliceOp>();
264 LLVM_DEBUG(llvm::dbgs()
265 <<
"\nNot fusable, not an extract_slice op: " << inputTensor);
277 LLVM_DEBUG(llvm::dbgs() <<
"Fuse into consumer: " << *consumerOp <<
"\n");
280 LinalgOp fusedProducer =
281 fuse(b, producerOp, producerOp.getMatchingIndexingMap(opOperand),
291 if (consumerType != def.
getType())
292 def = b.
create<tensor::CastOp>(fusedProducer.getLoc(), consumerType, def);
293 consumerOpOperand.
set(def);
static LinalgOp fuse(OpBuilder &b, LinalgOp producer, const DenseMap< unsigned, Range > &fusedLoopsAndRanges)
Fuses the producer by cloning the producer.
static void getProducerOfTensor(Value tensor, OpResult &opResult)
Walk back use-def chain through scf::For yields.
static SmallVector< Value > getTiledOperands(LinalgOp producer)
static ShapeDimension getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth, bool fromSubViewOpOnly=false)
static Range getRangeFromOperandShape(OpBuilder &b, Location loc, Value shapedOperand, unsigned dim)
Get the loop range for a dimension dim based on the shapedOperand.
A dimensional identifier appearing in an affine expression.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
ArrayRef< AffineExpr > getResults() const
unsigned getNumResults() const
IntegerAttr getIndexAttr(int64_t value)
This class provides support for representing a failure result, or a valid value of type T.
IRValueT get() const
Return the current value being used by this operand.
void set(IRValueT newValue)
Set the current value being used by this operand.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation * getOwner() const
Returns the operation that owns this result.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
Location getLoc()
The source location the operation was defined or derived from.
MutableArrayRef< OpOperand > getOpOperands()
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Block * getParentBlock()
Return the Block in which this Value is defined.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
FailureOr< FusionInfo > fuseProducerOfTensor(OpBuilder &b, OpOperand &consumerOpOperand)
Tensor counterpart of fuseProducerOfBuffer.
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
This header declares functions that assist transformations in the MemRef dialect.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
Implements a simple high-level fusion pass on linalg structured operations.
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
A struct containing the Linalg producer before and after fusion.