MLIR  16.0.0git
Fusion.cpp
Go to the documentation of this file.
1 //===- Fusion.cpp - Implementation of linalg Fusion -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the linalg dialect Fusion pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
22 #include "mlir/IR/AffineExpr.h"
23 #include "mlir/IR/AffineMap.h"
24 #include "mlir/IR/Dominance.h"
25 #include "mlir/Support/LLVM.h"
28 #include "llvm/ADT/MapVector.h"
29 #include "llvm/ADT/ScopeExit.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 
33 #include <set>
34 
35 #define DEBUG_TYPE "linalg-fusion"
36 
37 using namespace mlir;
38 using namespace mlir::linalg;
39 
40 /// Implements a simple high-level fusion pass on linalg structured operations.
41 ///
42 /// In each block, linalg ops are processed in reverse textual order.
43 /// Given a linalg op `O`, fusion occurs by:
44 /// 1. inspecting the linalg ops that write into the views read by `O`. There
45 /// are 2 cases:
46 /// a) buffer case: use the SSA value of the views and a simple alias
47 /// analysis on subview ops to determine producer-consumer dependences;
48 /// b) tensor case: use SSA use-def chains on extract_slice ops;
49 /// 2. greedily fuse the linalg ops that produce the subview/extract_slice.
50 /// 3. inspect the fused ops and determine whether they have other remaining
51 /// LinalgOp uses. If not, then erase the original producing linalg op.
52 ///
53 /// More advanced use cases, analyses as well as profitability heuristics are
54 /// left for future work.
55 
58  unsigned dimension;
59 };
60 
61 // Given an `op`, returns the first (`shape`, `dimension`) pair that identifies
62 // the loop range at `loopDepth`. The semantics of the loopToOperandRangesMaps
63 // guarantees at least one such dimension is found. If multiple candidates exist
64 // they must agree by construction (i.e. have the same size) and we just return
65 // the first one.
66 static ShapeDimension
67 getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth,
68  bool fromSubViewOpOnly = false) {
69  // Iterate over the inputs and outputs in order.
70  // Extract the subranges from the linearized ranges.
71  for (OpOperand &opOperand : op->getOpOperands()) {
72  // The method `getRangeFromOperandShape` requires using SubViewOp or
73  // ExtractSliceOps. If the value isn't defined from there continue.
74  // todo: The method should be adapted to get the values from
75  // `ViewInterface`. The interface needs a `getOrCreateRanges` method which
76  // currently returns a `linalg.range`. The fix here is to move this op to
77  // `std` dialect and add the method to `ViewInterface`.
78  if (fromSubViewOpOnly &&
79  !isa_and_nonnull<memref::SubViewOp, tensor::ExtractSliceOp>(
80  opOperand.get().getDefiningOp()))
81  continue;
82 
83  AffineMap map = op.getMatchingIndexingMap(&opOperand);
84  LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange I/O idx: "
85  << opOperand.getOperandNumber() << "\n");
86  LLVM_DEBUG(llvm::dbgs()
87  << "getShapeDefiningLoopRange map: " << map << "\n");
88  SmallVector<Value, 8> shapeRanges(map.getNumResults(), nullptr);
89  for (const auto &en : llvm::enumerate(map.getResults())) {
90  auto dimExpr = en.value().dyn_cast<AffineDimExpr>();
91  if (!dimExpr)
92  continue;
93  if (loopDepth == en.value().cast<AffineDimExpr>().getPosition()) {
94  LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange loopDepth: "
95  << loopDepth << "\n");
96  LLVM_DEBUG(llvm::dbgs() << "getShapeDefiningLoopRange shape: "
97  << opOperand.get() << "\n");
98  return ShapeDimension{opOperand.get(),
99  static_cast<unsigned>(en.index())};
100  }
101  }
102  }
103  llvm_unreachable("Expect to be able to extract a shape defining loop range");
104 }
105 
106 static SmallVector<Value> getTiledOperands(LinalgOp producer) {
107  return producer->getOperands();
108 }
109 
110 /// Fuses the producer by cloning the `producer`. The `fusedLoopsAndRanges`
111 /// provides the loop range information for the fused loops. The rest are
112 /// obtained from the producer itself, since they are not tiled + fused.
113 static LinalgOp fuse(OpBuilder &b, LinalgOp producer,
114  const DenseMap<unsigned, Range> &fusedLoopsAndRanges) {
115  SmallVector<OpFoldResult> ivs, tileSizes, sizeBounds;
116  SmallVector<Range> loopRanges;
117  Location loc = producer.getLoc();
118 
119  for (unsigned i = 0, e = producer.getNumLoops(); i < e; ++i) {
120  auto shapeDim = getShapeDefiningLoopRange(producer, i);
121  OpFoldResult dim =
122  createFoldedDimOp(b, loc, shapeDim.shape, shapeDim.dimension);
123  sizeBounds.push_back(dim);
124  auto it = fusedLoopsAndRanges.find(i);
125  if (it != fusedLoopsAndRanges.end()) {
126  ivs.push_back(it->second.offset);
127  tileSizes.push_back(it->second.size);
128  loopRanges.push_back(it->second);
129  LLVM_DEBUG(llvm::dbgs() << "tiled loop#" << i << " with LoopRange "
130  << loopRanges.back() << "\n");
131  } else {
132  tileSizes.push_back(b.getIndexAttr(0));
133  loopRanges.push_back(Range{b.getIndexAttr(0), dim, b.getIndexAttr(1)});
134  LLVM_DEBUG(llvm::dbgs() << "full loop#" << i << " with LoopRange "
135  << loopRanges.back() << "\n");
136  }
137  }
138 
139  SmallVector<Value, 8> clonedShapes;
140  clonedShapes.reserve(producer->getNumOperands());
141 
142  // Compute subranges for all tensor input/output operands.
143  clonedShapes.append(makeTiledShapes(
144  b, loc, producer, getTiledOperands(producer), ivs, tileSizes, sizeBounds,
145  /**omitPartialTileCheck=*/false));
146 
147  // Iterate over the results in order.
148  // Extract the subtensor type from the linearized range.
149  // Since we do not enforce any canonicalizations on the fly, this is always
150  // fully dynamic at construction time.
151  SmallVector<Type, 4> resultTypes;
152  resultTypes.reserve(producer->getNumResults());
153  for (OpOperand *operand : producer.getDpsInitOperands()) {
154  auto tensorType = operand->get().getType().dyn_cast<RankedTensorType>();
155  if (!tensorType)
156  continue;
157  unsigned rank = tensorType.getRank();
158  SmallVector<int64_t, 4> staticOffsetsVector(
159  rank, ShapedType::kDynamic);
160  SmallVector<int64_t, 4> staticSizesVector(rank, ShapedType::kDynamic);
161  SmallVector<int64_t, 4> staticStridesVector(
162  rank, ShapedType::kDynamic);
163  resultTypes.push_back(tensor::ExtractSliceOp::inferResultType(
164  tensorType, staticOffsetsVector, staticSizesVector,
165  staticStridesVector));
166  }
167 
168  Operation *clonedOp = clone(b, producer, resultTypes, clonedShapes);
169 
170  // Shift all IndexOp results by the tile offset.
171  SmallVector<OpFoldResult> allIvs = llvm::to_vector(
172  llvm::map_range(loopRanges, [&](Range range) { return range.offset; }));
173  offsetIndices(b, clonedOp, allIvs);
174 
175  return clonedOp;
176 }
177 
178 /// Get the loop range for a dimension `dim` based on the `shapedOperand`. It is
179 /// expected to be defined by a subview op or an extract_slice op.
181  Value shapedOperand, unsigned dim) {
182  Operation *shapeProducingOp = shapedOperand.getDefiningOp();
183  if (auto subViewOp = dyn_cast<memref::SubViewOp>(shapeProducingOp))
184  return subViewOp.getOrCreateRanges(b, loc)[dim];
185  if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(shapeProducingOp))
186  return sliceOp.getOrCreateRanges(b, loc)[dim];
187  llvm_unreachable("SubviewOp or ExtractSliceOp expected");
188 }
189 
190 /// Fuses the producer into the loop immediately enclosing the consumer.
191 /// This is achieved by "recomputing" the producer at the time it
192 /// is needed just before the consumer.
193 static LinalgOp fuse(OpBuilder &b, LinalgOp producerOp, AffineMap producerMap,
194  OpOperand &consumerOpOperand) {
195  LLVM_DEBUG(llvm::dbgs() << "Producer map: " << producerMap << "\n");
196  DenseMap<unsigned, Range> fusedLoopsAndRanges;
197  Value shapedOperand = consumerOpOperand.get();
198  for (const auto &en : llvm::enumerate(producerMap.getResults())) {
199  unsigned posInProducerLoop = en.value().cast<AffineDimExpr>().getPosition();
200  fusedLoopsAndRanges[posInProducerLoop] = getRangeFromOperandShape(
201  b, consumerOpOperand.getOwner()->getLoc(), shapedOperand, en.index());
202  }
203  return fuse(b, producerOp, fusedLoopsAndRanges);
204 }
205 
206 // Encode structural fusion safety preconditions.
207 // Some of these will be lifted in the future with better analysis.
208 static bool isStructurallyFusableProducer(LinalgOp producer, Value consumedView,
209  LinalgOp consumer) {
210  assert(producer.hasBufferSemantics() &&
211  "expected linalg op with buffer semantics");
212  assert(consumer.hasBufferSemantics() &&
213  "expected linalg op with buffer semantics");
214  if (producer.getNumDpsInits() != 1) {
215  LLVM_DEBUG(llvm::dbgs() << "\nNot structurally fusable (multi-output)");
216  return false;
217  }
218  // Only fuse when the producer block dominates.
219  DominanceInfo dom(producer.getOperation());
220  if (!dom.dominates(producer->getBlock(), consumer->getBlock())) {
221  LLVM_DEBUG(
222  llvm::dbgs()
223  << "\nNot structurally fusable (producer block does not dominate)");
224  return false;
225  }
226  return true;
227 }
228 
230  LinalgOp consumer,
231  Value consumedView,
232  LinalgOp producer) {
233  assert(producer.hasBufferSemantics() &&
234  "expected linalg op with buffer semantics");
235  assert(consumer.hasBufferSemantics() &&
236  "expected linalg op with buffer semantics");
237  // Make some simple structural checks that alleviate the need for more
238  // complex analyses.
239  if (!isStructurallyFusableProducer(producer, consumedView, consumer)) {
240  LLVM_DEBUG(llvm::dbgs() << "\n***Not static last write due to structure:\t"
241  << *producer.getOperation());
242  return false;
243  }
244  // Check for any interleaved write to consumedView.
245  if (!graph.findCoveringWrites(producer, consumer, consumedView).empty()) {
246  LLVM_DEBUG(llvm::dbgs() << "\n***Not fusable due to interleaved write:\t"
247  << *producer.getOperation());
248  return false;
249  }
250  return true;
251 }
252 
254  LinalgOp consumer, Value consumedView,
255  LinalgOp producer) {
256  assert(producer.hasBufferSemantics() &&
257  "expected linalg op with buffer semantics");
258  assert(consumer.hasBufferSemantics() &&
259  "expected linalg op with buffer semantics");
260  if (!isProducerLastWriteOfView(graph, consumer, consumedView, producer))
261  return false;
262  // Check for any fusion-preventing dependence to any shape read/written that
263  // would violate dependences.
264  if (!graph.findCoveringDependences(producer, consumer).empty()) {
265  LLVM_DEBUG(llvm::dbgs()
266  << "\n***Not fusable due to an interleaved dependence:\t"
267  << *producer.getOperation());
268  return false;
269  }
270  return true;
271 }
272 
273 /// For `consumer` with buffer semantics, find the Linalg operation on buffers
274 /// that is the last writer of `consumerOpOperand`. For now the fusable
275 /// dependence is returned as an instance of the `dependenceGraph`.
277 findFusableProducer(OpOperand &consumerOpOperand,
278  const LinalgDependenceGraph &dependenceGraph) {
279  LLVM_DEBUG(llvm::dbgs() << "findFusableProducer for: "
280  << consumerOpOperand.get() << " @"
281  << consumerOpOperand.getOperandNumber() << " in "
282  << *consumerOpOperand.getOwner() << "\n");
283  LinalgOp consumerOp = dyn_cast<LinalgOp>(consumerOpOperand.getOwner());
284  if (!consumerOp)
285  return failure();
286 
287  // Only consider RAW and WAW atm.
288  for (auto depType : {
289  LinalgDependenceGraph::DependenceType::RAW,
290  LinalgDependenceGraph::DependenceType::WAW,
291  }) {
292  LLVM_DEBUG(llvm::dbgs()
293  << "Dependencies into: " << *consumerOp.getOperation() << "\n");
294  for (auto dependence : llvm::make_filter_range(
295  dependenceGraph.getDependencesInto(consumerOp, depType),
297  LLVM_DEBUG(llvm::dbgs() << "Inspect dependence btw: "
298  << elem.getIndexingValue() << " and "
299  << elem.getDependentValue() << "\n");
300  Value v = elem.getIndexingValue();
301  Optional<unsigned> operandNum =
302  elem.getIndexingOpViewOperandNum();
303  return isa<LinalgOp>(elem.getDependentOp()) &&
304  v == consumerOpOperand.get() && operandNum &&
305  *operandNum == consumerOpOperand.getOperandNumber();
306  })) {
307  // Consumer consumes this view, `isStructurallyFusableProducer` also
308  // checks whether it is a strict subview of the producer view.
309  auto producer = cast<LinalgOp>(dependence.getDependentOp());
310  LLVM_DEBUG(llvm::dbgs()
311  << "\n"
313  << "producer: " << *dependence.getDependentOp()
314  << " view: " << dependence.getDependentValue() << "\n");
315 
316  // If the producer and consumer have tensor semantics, the only dependence
317  // between them is through a RAW dependence and they are fusable by
318  // construction. For buffer semantics need additional checks.
319  if (producer.hasBufferSemantics() && consumerOp.hasBufferSemantics() &&
320  isFusableInto(dependenceGraph, consumerOp, consumerOpOperand.get(),
321  producer))
322  return dependence;
323  if (producer.hasTensorSemantics() && consumerOp.hasTensorSemantics()) {
324  assert(dependence.dependenceType ==
325  LinalgDependenceGraph::DependenceType::RAW);
326  return dependence;
327  }
328  }
329  }
330  return failure();
331 }
332 
335  const LinalgDependenceGraph &graph) {
337  findFusableProducer(consumerOpOperand, graph);
338  if (!fusableDependence)
339  return failure();
340 
341  LinalgOp producerOp = dyn_cast<LinalgOp>(fusableDependence->getDependentOp());
342  if (!producerOp)
343  return failure();
344 
345  // If producer is already in the same block as consumer, we are done.
346  if (consumerOpOperand.get().getParentBlock() ==
347  fusableDependence->getDependentValue().getParentBlock())
348  return failure();
349 
350  Optional<AffineMap> producerMap =
351  fusableDependence->getDependentOpViewIndexingMap();
352  if (!producerMap)
353  return failure();
354 
355  // Must be a subview or an extract_slice to guarantee there are loops we can
356  // fuse into.
357  auto subView = consumerOpOperand.get().getDefiningOp<memref::SubViewOp>();
358  if (!subView) {
359  LLVM_DEBUG(llvm::dbgs() << "\nNot fusable (not a subview)");
360  return failure();
361  }
362 
363  // Fuse `producer` just before `consumer`.
365  b.setInsertionPoint(consumerOpOperand.getOwner());
366  LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: "
367  << *consumerOpOperand.getOwner() << "\n");
368 
369  auto fusedProducer = fuse(b, producerOp, *producerMap, consumerOpOperand);
370  return FusionInfo{producerOp, fusedProducer};
371 }
372 
373 /// Walk back use-def chain through scf::For yields.
374 /// Sets `producer` and `outputIndex` if it finds a producer LinalgOp
375 
376 // TODO(ravishankarm, ntv): This can be moved into the dependence graphs
377 // dependence tracking since the dependence tracking is similar to what is done
378 // w.r.t to buffers.
379 static void getProducerOfTensor(Value tensor, OpResult &opResult) {
380  if (!tensor.getType().isa<RankedTensorType>())
381  return;
382 
383  while (true) {
384  LLVM_DEBUG(llvm::dbgs() << "\ngetProducerOfTensor: " << tensor);
385  if (auto linalgOp = tensor.getDefiningOp<LinalgOp>()) {
386  opResult = tensor.cast<OpResult>();
387  return;
388  }
389  if (auto sliceOp = tensor.getDefiningOp<tensor::ExtractSliceOp>()) {
390  tensor = sliceOp.getSource();
391  continue;
392  }
393  if (auto blockArg = tensor.dyn_cast<BlockArgument>()) {
394  if (auto forOp = blockArg.getDefiningOp<scf::ForOp>()) {
395  tensor = *(forOp.getIterOperands().begin() + blockArg.getArgNumber());
396  continue;
397  }
398  }
399  return;
400  }
401 }
402 
405  Value inputTensor = consumerOpOperand.get();
406  OpResult producerOpResult;
407  getProducerOfTensor(inputTensor, producerOpResult);
408  if (!producerOpResult) {
409  LLVM_DEBUG(llvm::dbgs() << "\nUnable to find producer");
410  return failure();
411  }
412  return fuseProducerOfTensor(b, producerOpResult, consumerOpOperand);
413 }
414 
417  OpOperand &consumerOpOperand) {
418  auto producerOp = dyn_cast<LinalgOp>(producerOpResult.getOwner());
419  if (!producerOp)
420  return failure();
421 
422  LinalgOp consumerOp = dyn_cast<LinalgOp>(consumerOpOperand.getOwner());
423  if (!consumerOp)
424  return failure();
425 
426  Value inputTensor = consumerOpOperand.get();
427 
428  // Must be an extract_slice op to guarantee there are loops we can fuse into.
429  auto sliceOp = inputTensor.getDefiningOp<tensor::ExtractSliceOp>();
430  if (!sliceOp) {
431  LLVM_DEBUG(llvm::dbgs()
432  << "\nNot fusable, not an extract_slice op: " << inputTensor);
433  return failure();
434  }
435 
436  // If producer is already in the same block as consumer, we are done.
437  if (consumerOpOperand.get().getParentBlock() ==
438  producerOpResult.getParentBlock())
439  return failure();
440 
441  // Insert fused `producer` just before `consumer`.
443  b.setInsertionPoint(consumerOp);
444  LLVM_DEBUG(llvm::dbgs() << "Fuse into consumer: " << *consumerOp << "\n");
445  OpOperand *opOperand =
446  producerOp.getDpsInitOperand(producerOpResult.getResultNumber());
447  LinalgOp fusedProducer =
448  fuse(b, producerOp, producerOp.getMatchingIndexingMap(opOperand),
449  consumerOpOperand);
450 
451  // Replace use.
452  // Canonicalizations are not guaranteed to have happened before constructing
453  // `fusedProducer`. In the tensor case this can result in temporary type
454  // mismatches. Insert a `tensor.cast` op to propagate the transformation
455  // invariant that types are compatible.
456  Value def = fusedProducer->getResult(producerOpResult.getResultNumber());
457  Type consumerType = consumerOpOperand.get().getType();
458  if (consumerType != def.getType())
459  def = b.create<tensor::CastOp>(fusedProducer.getLoc(), consumerType, def);
460  consumerOpOperand.set(def);
461  return FusionInfo{cast<LinalgOp>(producerOpResult.getOwner()), fusedProducer};
462 }
static LinalgOp fuse(OpBuilder &b, LinalgOp producer, const DenseMap< unsigned, Range > &fusedLoopsAndRanges)
Fuses the producer by cloning the producer.
Definition: Fusion.cpp:113
static void getProducerOfTensor(Value tensor, OpResult &opResult)
Walk back use-def chain through scf::For yields.
Definition: Fusion.cpp:379
static bool isStructurallyFusableProducer(LinalgOp producer, Value consumedView, LinalgOp consumer)
Definition: Fusion.cpp:208
static SmallVector< Value > getTiledOperands(LinalgOp producer)
Definition: Fusion.cpp:106
static ShapeDimension getShapeDefiningLoopRange(LinalgOp op, unsigned loopDepth, bool fromSubViewOpOnly=false)
Definition: Fusion.cpp:67
static FailureOr< LinalgDependenceGraph::LinalgDependenceGraphElem > findFusableProducer(OpOperand &consumerOpOperand, const LinalgDependenceGraph &dependenceGraph)
For consumer with buffer semantics, find the Linalg operation on buffers that is the last writer of c...
Definition: Fusion.cpp:277
static Range getRangeFromOperandShape(OpBuilder &b, Location loc, Value shapedOperand, unsigned dim)
Get the loop range for a dimension dim based on the shapedOperand.
Definition: Fusion.cpp:180
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:42
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:319
unsigned getNumResults() const
Definition: AffineMap.cpp:314
This class represents an argument of a Block.
Definition: Value.h:296
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:109
A class for computing basic dominance information.
Definition: Dominance.h:117
bool dominates(Operation *a, Operation *b) const
Return true if operation A dominates operation B, i.e.
Definition: Dominance.h:137
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
IRValueT get() const
Return the current value being used by this operand.
Definition: UseDefLists.h:137
void set(IRValueT newValue)
Set the current value being used by this operand.
Definition: UseDefLists.h:140
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:64
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:300
This class helps build Operations.
Definition: Builders.h:198
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:350
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:422
This class represents a single result from folding an operation.
Definition: OpDefinition.h:233
This class represents an operand of an operation.
Definition: Value.h:247
unsigned getOperandNumber()
Return which operand this is in the OpOperand list of the Operation.
Definition: Value.cpp:212
This is a value defined by a result of an operation.
Definition: Value.h:442
Operation * getOwner() const
Returns the operation that owns this result.
Definition: Value.h:451
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:454
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:31
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:154
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
bool isa() const
Definition: Types.h:260
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
Type getType() const
Return the type of this value.
Definition: Value.h:114
U dyn_cast() const
Definition: Value.h:95
Block * getParentBlock()
Return the Block in which this Value is defined.
Definition: Value.cpp:48
U cast() const
Definition: Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:40
Data structure for holding a dependence graph that operates on LinalgOp and views as SSA values.
static StringRef getDependenceTypeStr(DependenceType depType)
dependence_range getDependencesInto(Operation *dst, DependenceType dt) const
Returns the X such that X -> op is a dependence of type dt.
SmallVector< Operation *, 8 > findCoveringWrites(LinalgOp srcLinalgOp, LinalgOp dstLinalgOp, Value view) const
Returns the operations that are interleaved between srcLinalgOp and dstLinalgOp and that are involved...
SmallVector< Operation *, 8 > findCoveringDependences(LinalgOp srcLinalgOp, LinalgOp dstLinalgOp) const
Returns the operations that are interleaved between srcLinalgOp and dstLinalgOp and that are involved...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:230
FailureOr< FusionInfo > fuseProducerOfTensor(OpBuilder &b, OpOperand &consumerOpOperand)
Tensor counterpart of fuseProducerOfBuffer.
Definition: Fusion.cpp:404
bool isProducerLastWriteOfView(const LinalgDependenceGraph &graph, LinalgOp consumer, Value consumedView, LinalgOp producer)
Checks whether the specific producer is the last write to exactly the whole consumedView.
Definition: Fusion.cpp:229
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition: Utils.cpp:984
bool isFusableInto(const LinalgDependenceGraph &graph, LinalgOp consumer, Value consumedView, LinalgOp producer)
Checks whether fusing the specific producer of the consumedView is feasible.
Definition: Fusion.cpp:253
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition: Utils.cpp:1005
FailureOr< FusionInfo > fuseProducerOfBuffer(OpBuilder &b, OpOperand &consumerOpOperand, const LinalgDependenceGraph &graph)
Fuses producer into consumer if the producer is structurally feasible and the fusion would not violat...
Definition: Fusion.cpp:334
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Definition: Utils.cpp:208
Include the generated interface declarations.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
Implements a simple high-level fusion pass on linalg structured operations.
Definition: Fusion.cpp:56
unsigned dimension
Definition: Fusion.cpp:58
Value shape
Definition: Fusion.cpp:57
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
OpFoldResult offset
A struct containing the Linalg producer before and after fusion.
Definition: Utils.h:280