doxygen/Fusion_8cpp_source.html

 //===- Fusion.cpp - Implementation of linalg Fusion -----------------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements the linalg dialect Fusion pass.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Linalg/IR/Linalg.h"

 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"

 #include "mlir/Dialect/Linalg/Utils/Utils.h"

 #include "mlir/Dialect/MemRef/IR/MemRef.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Dialect/Tensor/Utils/Utils.h"

 #include "mlir/IR/AffineExpr.h"

 #include "mlir/IR/AffineMap.h"

 #include "mlir/IR/Dominance.h"

 #include "mlir/Support/LLVM.h"

 #include "llvm/ADT/SmallBitVector.h"

 #include "llvm/Support/Debug.h"


 #define DEBUG_TYPE "linalg-fusion"


 using namespace mlir;

 using namespace mlir::linalg;


 /// Implements a simple high-level fusion pass on linalg structured operations.

 ///

 /// In each block, linalg ops are processed in reverse textual order.

 /// Given a linalg op `O`, fusion occurs by:

 ///   1. inspecting the linalg ops that write into the views read by `O`. There

 ///      are 2 cases:

 ///      a) buffer case: use the SSA value of the views and a simple alias

 ///         analysis on subview ops to determine producer-consumer dependences;

 ///      b) tensor case: use SSA use-def chains on extract_slice ops;

 ///   2. greedily fuse the linalg ops that produce the subview/extract_slice.

 ///   3. inspect the fused ops and determine whether they have other remaining

 ///      LinalgOp uses. If not, then erase the original producing linalg op.

 ///

 /// More advanced use cases, analyses as well as profitability heuristics are

 /// left for future work.


 struct ShapeDimension {

   Value shape;

   unsigned dimension;

 };


 // Given an `op`, returns the first (`shape`, `dimension`) pair that identifies

 // the loop range at `loopDepth`. The semantics of the loopToOperandRangesMaps

 // guarantees at least one such dimension is found. If multiple candidates exist

 // they must agree by construction (i.e. have the same size) and we just return

 // the first one.

 static ShapeDimension

 getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth,

                           bool fromSubViewOpOnly = false) {

   // Iterate over the inputs and outputs in order.

   // Extract the subranges from the linearized ranges.

   for (OpOperand &opOperand : op->getOpOperands()) {

     // The method `getRangeFromOperandShape` requires using SubViewOp or

     // ExtractSliceOps. If the value isn't defined from there continue.

     // todo: The method should be adapted to get the values from

     // `ViewInterface`. The interface needs a `getOrCreateRanges` method which

     // currently returns a `linalg.range`. The fix here is to move this op to

     // `std` dialect and add the method to `ViewInterface`.

     if (fromSubViewOpOnly &&

         !isa_and_nonnull<memref::SubViewOp, tensor::ExtractSliceOp>(

             opOperand.get().getDefiningOp()))

       continue;


     AffineMap map = op.getMatchingIndexingMap(&opOperand);

     LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange I/O idx: "

                             << opOperand.getOperandNumber() << "\n");

     LLVM_DEBUG(llvm::dbgs()

                << "getShapeDefiningLoopRange map: " << map << "\n");

     for (const auto &en : llvm::enumerate(map.getResults())) {

       auto dimExpr = dyn_cast<AffineDimExpr>(en.value());

       if (!dimExpr)

         continue;

       if (loopDepth == cast<AffineDimExpr>(en.value()).getPosition()) {

         LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange loopDepth: "

                                 << loopDepth << "\n");

         LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange shape: "

                                 << opOperand.get() << "\n");

         return ShapeDimension{opOperand.get(),

                               static_cast<unsigned>(en.index())};

       }

     }

   }

   llvm_unreachable("Expect to be able to extract a shape defining loop range");

 }


 static SmallVector<Value> getTiledOperands(LinalgOp producer) {

   return producer->getOperands();

 }


 /// Fuses the producer by cloning the `producer`. The `fusedLoopsAndRanges`

 /// provides the loop range information for the fused loops. The rest are

 /// obtained from the producer itself, since they are not tiled + fused.

 static LinalgOp fuse(OpBuilder &b, LinalgOp producer,

                      const DenseMap<unsigned, Range> &fusedLoopsAndRanges) {

   SmallVector<OpFoldResult> ivs, tileSizes, sizeBounds;

   SmallVector<Range> loopRanges;

   Location loc = producer.getLoc();


   for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {

     auto shapeDim = getShapeDefiningLoopRange(producer, i);

     OpFoldResult dim =

         createFoldedDimOp(b, loc, shapeDim.shape, shapeDim.dimension);

     sizeBounds.push_back(dim);

     auto it = fusedLoopsAndRanges.find(i);

     if (it != fusedLoopsAndRanges.end()) {

       ivs.push_back(it->second.offset);

       tileSizes.push_back(it->second.size);

       loopRanges.push_back(it->second);

       LLVM_DEBUG(llvm::dbgs() << "tiled loop#" << i << " with LoopRange "

                               << loopRanges.back() << "\n");

     } else {

       tileSizes.push_back(b.getIndexAttr(0));

       loopRanges.push_back(Range{b.getIndexAttr(0), dim, b.getIndexAttr(1)});

       LLVM_DEBUG(llvm::dbgs() << "full loop#" << i << " with LoopRange "

                               << loopRanges.back() << "\n");

     }

   }


   SmallVector<Value, 8> clonedShapes;

   clonedShapes.reserve(producer->getNumOperands());


   // Compute subranges for all tensor input/output operands.

   clonedShapes.append(makeTiledShapes(

       b, loc, producer, getTiledOperands(producer), ivs, tileSizes, sizeBounds,

       /**omitPartialTileCheck=*/false));


   // Take result types from the tiled init operands.

   MutableOperandRange producerDpsInits = producer.getDpsInitsMutable();

   SmallVector<Type, 4> resultTypes;

   resultTypes.reserve(producer->getNumResults());

   int64_t firstInitOperandIdx =

       producerDpsInits.getAsOperandRange().getBeginOperandIndex();

   for (int64_t i = 0, e = producer->getNumResults(); i < e; ++i) {

     resultTypes.push_back(clonedShapes[firstInitOperandIdx + i].getType());

   }


   // Clone the producer with new operands and result types.

   LinalgOp clonedOp = clone(b, producer, resultTypes, clonedShapes);


   // Shift all IndexOp results by the tile offset.

   SmallVector<OpFoldResult> allIvs = llvm::to_vector(

       llvm::map_range(loopRanges, [&](Range range) { return range.offset; }));

   offsetIndices(b, clonedOp, allIvs);


   return clonedOp;

 }


 /// Get the loop range for a dimension `dim` based on the `shapedOperand`. It is

 /// expected to be defined by a subview op or an extract_slice op.

 static Range getRangeFromOperandShape(OpBuilder &b, Location loc,

                                       Value shapedOperand, unsigned dim) {

   Operation *shapeProducingOp = shapedOperand.getDefiningOp();

   if (auto subViewOp = dyn_cast<memref::SubViewOp>(shapeProducingOp))

     return subViewOp.getOrCreateRanges(b, loc)[dim];

   if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(shapeProducingOp))

     return sliceOp.getOrCreateRanges(b, loc)[dim];

   llvm_unreachable("SubviewOp or ExtractSliceOp expected");

 }


 /// Fuses the producer into the loop immediately enclosing the consumer.

 /// This is achieved by "recomputing" the producer at the time it

 /// is needed just before the consumer.

 static LinalgOp fuse(OpBuilder &b, LinalgOp producerOp, AffineMap producerMap,

                      OpOperand &consumerOpOperand) {

   LLVM_DEBUG(llvm::dbgs() << "Producer map: " << producerMap << "\n");

   DenseMap<unsigned, Range> fusedLoopsAndRanges;

   Value shapedOperand = consumerOpOperand.get();

   for (const auto &en : llvm::enumerate(producerMap.getResults())) {

     unsigned posInProducerLoop = cast<AffineDimExpr>(en.value()).getPosition();

     fusedLoopsAndRanges[posInProducerLoop] = getRangeFromOperandShape(

         b, consumerOpOperand.getOwner()->getLoc(), shapedOperand, en.index());

   }

   return fuse(b, producerOp, fusedLoopsAndRanges);

 }


 /// Walk back use-def chain through scf::For yields.

 /// Sets `producer` and `outputIndex` if it finds a producer LinalgOp


 // TODO(ravishankarm, ntv): This can be moved into the dependence graphs

 // dependence tracking since the dependence tracking is similar to what is done

 // w.r.t to buffers.

 static void getProducerOfTensor(Value tensor, OpResult &opResult) {

   if (!isa<RankedTensorType>(tensor.getType()))

     return;


   while (true) {

     LLVM_DEBUG(llvm::dbgs() << "\ngetProducerOfTensor: " << tensor);

     if (auto linalgOp = tensor.getDefiningOp<LinalgOp>()) {

       opResult = cast<OpResult>(tensor);

       return;

     }

     if (auto sliceOp = tensor.getDefiningOp<tensor::ExtractSliceOp>()) {

       tensor = sliceOp.getSource();

       continue;

     }

     if (auto blockArg = dyn_cast<BlockArgument>(tensor)) {

       if (auto forOp = blockArg.getDefiningOp<scf::ForOp>()) {

         tensor = forOp.getInitArgs()[blockArg.getArgNumber()];

         continue;

       }

     }

     return;

   }

 }


 FailureOr<FusionInfo>

 mlir::linalg::fuseProducerOfTensor(OpBuilder &b, OpOperand &consumerOpOperand) {

   Value inputTensor = consumerOpOperand.get();

   OpResult producerOpResult;

   getProducerOfTensor(inputTensor, producerOpResult);

   if (!producerOpResult) {

     LLVM_DEBUG(llvm::dbgs() << "\nUnable to find producer");

     return failure();

   }

   return fuseProducerOfTensor(b, producerOpResult, consumerOpOperand);

 }


 FailureOr<FusionInfo>

 mlir::linalg::fuseProducerOfTensor(OpBuilder &b, OpResult producerOpResult,

                                    OpOperand &consumerOpOperand) {

   auto producerOp = dyn_cast<LinalgOp>(producerOpResult.getOwner());

   if (!producerOp)

     return failure();


   LinalgOp consumerOp = dyn_cast<LinalgOp>(consumerOpOperand.getOwner());

   if (!consumerOp)

     return failure();


   Value inputTensor = consumerOpOperand.get();


   // Must be an extract_slice op to guarantee there are loops we can fuse into.

   auto sliceOp = inputTensor.getDefiningOp<tensor::ExtractSliceOp>();

   if (!sliceOp) {

     LLVM_DEBUG(llvm::dbgs()

                << "\nNot fusable, not an extract_slice op: " << inputTensor);

     return failure();

   }


   // If producer is already in the same block as consumer, we are done.

   if (consumerOpOperand.get().getParentBlock() ==

       producerOpResult.getParentBlock())

     return failure();


   // Insert fused `producer` just before `consumer`.

   OpBuilder::InsertionGuard g(b);

   b.setInsertionPoint(consumerOp);

   LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: " << *consumerOp << "\n");

   OpOperand *opOperand =

       producerOp.getDpsInitOperand(producerOpResult.getResultNumber());

   LinalgOp fusedProducer =

       fuse(b, producerOp, producerOp.getMatchingIndexingMap(opOperand),

            consumerOpOperand);


   // Replace use.

   Value def = fusedProducer->getResult(producerOpResult.getResultNumber());

   Type consumerType = consumerOpOperand.get().getType();

   // Check if rank-reduction occurred as part of the extract_slice. If yes,

   // collapse the dropped dimensions.

   if (cast<ShapedType>(consumerType).getRank() !=

       cast<ShapedType>(def.getType()).getRank()) {

     llvm::SmallBitVector droppedDims = sliceOp.getDroppedDims();

     def =

         tensor::dropGivenUnitDims(b, fusedProducer.getLoc(), def, droppedDims);

   }

   // Canonicalizations are not guaranteed to have happened before constructing

   // `fusedProducer`. In the tensor case this can result in temporary type

   // mismatches. Insert a `tensor.cast` op to propagate the transformation

   // invariant that types are compatible.

   if (consumerType != def.getType())

     def = tensor::CastOp::create(b, fusedProducer.getLoc(), consumerType, def);

   consumerOpOperand.set(def);

   return FusionInfo{cast<LinalgOp>(producerOpResult.getOwner()), fusedProducer};

 }

Utils.h

Utils.h

Dominance.h

fuse
static LinalgOp fuse(OpBuilder &b, LinalgOp producer, const DenseMap< unsigned, Range > &fusedLoopsAndRanges)
Fuses the producer by cloning the producer.
Definition: Fusion.cpp:103

getProducerOfTensor
static void getProducerOfTensor(Value tensor, OpResult &opResult)
Walk back use-def chain through scf::For yields.
Definition: Fusion.cpp:192

getTiledOperands
static SmallVector< Value > getTiledOperands(LinalgOp producer)
Definition: Fusion.cpp:96

getShapeDefiningLoopRange
static ShapeDimension getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth, bool fromSubViewOpOnly=false)
Definition: Fusion.cpp:58

getRangeFromOperandShape
static Range getRangeFromOperandShape(OpBuilder &b, Location loc, Value shapedOperand, unsigned dim)
Get the loop range for a dimension dim based on the shapedOperand.
Definition: Fusion.cpp:160

llvm::DenseMap
Definition: LLVM.h:55

llvm::SmallVector
Definition: LLVM.h:72

mlir::AffineMap
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46

mlir::AffineMap::getResults
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:403

mlir::Builder::getIndexAttr
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:103

mlir::IROperand::get
IRValueT get() const
Return the current value being used by this operand.
Definition: UseDefLists.h:160

mlir::IROperand::set
void set(IRValueT newValue)
Set the current value being used by this operand.
Definition: UseDefLists.h:163

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MutableOperandRange
This class provides a mutable adaptor for a range of operands.
Definition: ValueRange.h:118

mlir::MutableOperandRange::getAsOperandRange
OperandRange getAsOperandRange() const
Explicit conversion to an OperandRange.
Definition: OperationSupport.cpp:498

mlir::OpBuilder::InsertionGuard
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpBuilder::setInsertionPoint
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396

mlir::OpFoldResult
This class represents a single result from folding an operation.
Definition: OpDefinition.h:272

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:257

mlir::OpResult
This is a value defined by a result of an operation.
Definition: Value.h:447

mlir::OpResult::getOwner
Operation * getOwner() const
Returns the operation that owns this result.
Definition: Value.h:456

mlir::OpResult::getResultNumber
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:459

mlir::OperandRange::getBeginOperandIndex
unsigned getBeginOperandIndex() const
Return the operand index of the first element of this range.
Definition: OperationSupport.cpp:387

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getLoc
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::Value::getParentBlock
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:46

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18

mlir::detail::IROperandBase::getOwner
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:38

Linalg.h

Transforms.h

MemRef.h

Tensor.h

AffineExpr.h

AffineMap.h

LLVM.h

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir::linalg
Definition: LinalgToStandard.h:24

mlir::linalg::fuseProducerOfTensor
FailureOr< FusionInfo > fuseProducerOfTensor(OpBuilder &b, OpOperand &consumerOpOperand)
This implements the fusion part of the "tileAndFuse on tensors" transformation and thus requires the ...
Definition: Fusion.cpp:217

mlir::linalg::makeTiledShapes
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition: Utils.cpp:862

mlir::linalg::createFoldedDimOp
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
Definition: LinalgOps.cpp:103

mlir::linalg::offsetIndices
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition: Utils.cpp:884

mlir::tensor::dropGivenUnitDims
CollapseShapeOp dropGivenUnitDims(OpBuilder &b, Location loc, Value src, const llvm::SmallBitVector &dropDims)
Create tensor.collapse_shape to drop unit dimensions in dropDims in tensor src.
Definition: Utils.cpp:95

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::getType
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:304

mlir::clone
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
Definition: StructuredOpsUtils.cpp:197

ShapeDimension
Implements a simple high-level fusion pass on linalg structured operations.
Definition: Fusion.cpp:47

ShapeDimension::dimension
unsigned dimension
Definition: Fusion.cpp:49

ShapeDimension::shape
Value shape
Definition: Fusion.cpp:48

mlir::Range
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Definition: StaticValueUtils.h:35

mlir::Range::offset
OpFoldResult offset
Definition: StaticValueUtils.h:36

mlir::linalg::FusionInfo
A struct containing the Linalg producer before and after fusion.
Definition: Utils.h:233