MLIR 22.0.0git
Utils.cpp
Go to the documentation of this file.
1//===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utilities for the Linalg dialect.
10//
11//===----------------------------------------------------------------------===//
12
14
29#include "mlir/IR/AffineExpr.h"
31#include "mlir/IR/AffineMap.h"
32#include "mlir/IR/Matchers.h"
33#include "llvm/ADT/TypeSwitch.h"
34#include "llvm/Support/Debug.h"
35#include <optional>
36
37#define DEBUG_TYPE "linalg-utils"
38
39using namespace mlir;
40using namespace presburger;
41using namespace mlir::affine;
42using namespace mlir::linalg;
43using namespace mlir::scf;
44
45namespace {
46
47// Helper visitor to determine whether an AffineExpr is tiled.
48// This is achieved by traversing every AffineDimExpr with position `pos` and
49// checking whether the corresponding `tileSizes[pos]` is non-zero.
50// This also enforces only positive coefficients occur in multiplications.
51//
52// Example:
53// `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
54//
55struct TileCheck : public AffineExprVisitor<TileCheck> {
56 TileCheck(ArrayRef<OpFoldResult> tileSizes) : tileSizes(tileSizes) {}
57
58 void visitDimExpr(AffineDimExpr expr) {
59 isTiled |= !isZeroInteger(tileSizes[expr.getPosition()]);
60 }
61 void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
62 visit(expr.getLHS());
63 visit(expr.getRHS());
65 assert(cast<AffineConstantExpr>(expr.getRHS()).getValue() > 0 &&
66 "nonpositive multiplying coefficient");
67 }
68 bool isTiled = false;
69 ArrayRef<OpFoldResult> tileSizes;
70};
71
72} // namespace
73
74static bool isTiled(AffineExpr expr, ArrayRef<OpFoldResult> tileSizes) {
75 if (!expr)
76 return false;
77 TileCheck t(tileSizes);
78 t.visit(expr);
79 return t.isTiled;
80}
81
82// Checks whether the `map varies with respect to a non-zero `tileSize`.
83static bool isTiled(AffineMap map, ArrayRef<OpFoldResult> tileSizes) {
84 if (!map)
85 return false;
86 for (unsigned r = 0; r < map.getNumResults(); ++r)
87 if (isTiled(map.getResult(r), tileSizes))
88 return true;
89 return false;
90}
91
92std::optional<RegionMatcher::BinaryOpKind>
94 auto &region = op.getRegion();
95 if (!region.hasOneBlock())
96 return std::nullopt;
97
98 Block &block = region.front();
99 if (block.getNumArguments() != 2 ||
102 return std::nullopt;
103
104 auto &ops = block.getOperations();
105 if (!llvm::hasSingleElement(block.without_terminator()))
106 return std::nullopt;
107
109 auto a = m_Val(block.getArgument(0));
110 auto b = m_Val(block.getArgument(1));
111
112 auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
113 if (addPattern.match(&ops.back()))
114 return BinaryOpKind::IAdd;
115
116 return std::nullopt;
117}
118
119/// Explicit instantiation of loop nest generator for different loop types.
123
124/// Given a list of subview ranges, extract individual values for lower, upper
125/// bounds and steps and put them into the corresponding vectors.
126static void unpackRanges(OpBuilder &builder, Location loc,
129 SmallVectorImpl<Value> &steps) {
130 for (Range range : ranges) {
131 lbs.emplace_back(
132 getValueOrCreateConstantIndexOp(builder, loc, range.offset));
133 ubs.emplace_back(getValueOrCreateConstantIndexOp(builder, loc, range.size));
134 steps.emplace_back(
135 getValueOrCreateConstantIndexOp(builder, loc, range.stride));
136 }
137}
138
139//===----------------------------------------------------------------------===//
140// General utilities
141//===----------------------------------------------------------------------===//
142//
143/// The permutation can be obtained from two permutations:
144/// a) Compute the permutation vector to move the last `numPackedDims` into
145/// the `innerPosDims` of a shape of rank `rank`.
146/// b) Compute the permutation vector to move outer dims if the
147/// `outerPerm` parameter is not empty.
148/// Apply (b) permutation on (a) permutation to get the final permutation.
149static SmallVector<int64_t>
151 ArrayRef<int64_t> &outerPerm,
152 PackingMetadata &packingMetadata) {
153 int64_t numPackedDims = innerDimsPos.size();
154 auto lastDims =
155 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
156 packingMetadata = computePackingMetadata(rank, innerDimsPos);
157 SmallVector<int64_t> innerPositionsPerm =
158 computePermutationVector(rank, lastDims, packingMetadata.insertPositions);
159
160 SmallVector<int64_t> outerPos = packingMetadata.outerPositions;
161 if (!outerPerm.empty())
162 applyPermutationToVector(outerPos, outerPerm);
163 SmallVector<int64_t> outerPositionPerm =
164 computePermutationVector(rank, packingMetadata.outerPositions, outerPos);
165
166 SmallVector<int64_t> packInverseDestPermutation = innerPositionsPerm;
167 applyPermutationToVector(packInverseDestPermutation, outerPositionPerm);
168 return packInverseDestPermutation;
169}
170
171namespace mlir {
172namespace linalg {
173
175 PackingMetadata &metadata) {
176
177 int64_t packedRank = packOp.getDestType().getRank();
178 ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos();
179 ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm();
180 SmallVector<int64_t> packInvDestPerm =
181 computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata);
182 return packInvDestPerm;
183}
184
186 PackingMetadata &metadata) {
187 int64_t packedRank = unpackOp.getSourceType().getRank();
188 ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
189 ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm();
190 SmallVector<int64_t> unpackInvSrcPerm =
191 computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata);
192 return unpackInvSrcPerm;
193}
194
196 return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {
197 return m.isProjectedPermutation(/*allowZeroInResults=*/true);
198 });
199}
200
202 if (!r.hasOneBlock())
203 return false;
204 for (Operation &op : r.front()) {
205 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
206 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
208 llvm::any_of(op.getResultTypes(),
209 [](Type type) { return !type.isIntOrIndexOrFloat(); }))
210 return false;
211 }
212 return true;
213}
214
215bool isElementwise(LinalgOp op) {
216 if (op.getNumLoops() != op.getNumParallelLoops())
217 return false;
218
220 return false;
221
222 // TODO: relax the restrictions on indexing map.
223 for (OpOperand &opOperand : op.getDpsInitsMutable()) {
224 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
225 return false;
226 }
227 return hasOnlyScalarElementwiseOp(op->getRegion(0));
228}
229
230bool isParallelIterator(utils::IteratorType iteratorType) {
231 return iteratorType == utils::IteratorType::parallel;
232}
233
234bool isReductionIterator(utils::IteratorType iteratorType) {
235 return iteratorType == utils::IteratorType::reduction;
236}
237
238//===----------------------------------------------------------------------===//
239// Convolution matcher utilities
240//===----------------------------------------------------------------------===//
241
242/// Returns the BlockArgument that leads to `val`, if any. Traverses optional
243/// ext* ops.
245 BlockArgument blockArg = dyn_cast<BlockArgument>(val);
246 if ((blockArg))
247 return blockArg;
248
249 Operation *defOp = val.getDefiningOp();
250 if (!dyn_cast_if_present<arith::ExtFOp>(defOp) &&
251 !dyn_cast_if_present<arith::ExtSIOp>(defOp) &&
252 !dyn_cast_if_present<arith::ExtUIOp>(defOp)) {
253 return nullptr;
254 }
255 return dyn_cast<BlockArgument>(defOp->getOperand(0));
256}
257
258/// Utility to match block body for convolution ops.
259/// The body is thus expected to yield :-
260/// %out + (%lhs * %rhs)
261/// where: %lhs, %rhs and %out are block arguments and
262/// %lhs and %rhs can have optional upcast operation.
263static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body) {
264 Operation *addOp = yieldVal.getDefiningOp();
265 if (!isa_and_present<arith::AddIOp, arith::AddFOp>(addOp))
266 return false;
267
268 Operation *mulOp = addOp->getOperand(1).getDefiningOp();
269 if (!isa_and_present<arith::MulIOp, arith::MulFOp>(mulOp))
270 return false;
271
272 BlockArgument lhsBlockArg =
274 BlockArgument rhsBlockArg =
276 BlockArgument outBlockArg =
278 if (!lhsBlockArg || !rhsBlockArg || !outBlockArg ||
279 lhsBlockArg.getOwner() != body || rhsBlockArg.getOwner() != body ||
280 outBlockArg.getOwner() != body || lhsBlockArg.getArgNumber() != 0 ||
281 rhsBlockArg.getArgNumber() != 1 || outBlockArg.getArgNumber() != 2)
282 return false;
283 return true;
284}
285
286/// Utility to match block body for linalg.pool* ops.
287template <typename... OpTypes>
288static bool bodyMatcherForPoolOps(Value yieldVal, Block *body) {
289 Operation *defOp = yieldVal.getDefiningOp();
290 if (!(isa_and_present<OpTypes>(defOp) || ...))
291 return false;
292
293 BlockArgument lhsArg =
295 BlockArgument rhsArg =
297 if (!lhsArg || !rhsArg || lhsArg.getOwner() != body ||
298 rhsArg.getOwner() != body || lhsArg.getArgNumber() != 2 ||
299 rhsArg.getArgNumber() != 0)
300 return false;
301 return true;
302}
303
304static bool bodyMatcherForMaxSignedPoolOps(Value yieldVal, Block *body) {
306 body);
307}
308
309// max_unsigned ops should not allow float data type.
310// TODO(#164800): Retire OPDSL logic.
311static bool bodyMatcherForMaxUnsignedPoolOps(Value yieldVal, Block *body) {
313 body);
314}
315
316static bool bodyMatcherForMinSignedPoolOps(Value yieldVal, Block *body) {
318 body);
319}
320
321// min_unsigned ops should not allow float data type.
322// TODO(#164800): Retire OPDSL logic.
323static bool bodyMatcherForMinUnsignedPoolOps(Value yieldVal, Block *body) {
325 body);
326}
327
328static bool bodyMatcherForSumPoolOps(Value yieldVal, Block *body) {
330}
331
332static AffineExpr getAffineMapDim(ArrayAttr indexingMaps, uint32_t mapIndex,
333 uint32_t dimIndex) {
334 auto affineMap = cast<AffineMapAttr>(indexingMaps[mapIndex]).getValue();
335 if (dimIndex < affineMap.getNumResults())
336 return affineMap.getResult(dimIndex);
337 return nullptr;
338}
339
340/// Check if `expr` is either:
341/// - a dimension expr alone (implying multiplication by 1), or
342/// - a multiplication of dimension expr by any positive constant != 1
343/// In both cases we will capture the dimension expression into `dim` and
344/// return the constant multiplier. Returns -1 in case of a match failure.
346 if ((dim = dyn_cast<AffineDimExpr>(expr)))
347 return 1;
348
349 auto mulExpr = dyn_cast<AffineBinaryOpExpr>(expr);
350 if (!mulExpr || mulExpr.getKind() != AffineExprKind::Mul)
351 return -1;
352
353 AffineExpr lhs = mulExpr.getLHS();
354 AffineExpr rhs = mulExpr.getRHS();
355
356 AffineConstantExpr cst = nullptr;
357 if (((dim = dyn_cast<AffineDimExpr>(lhs)) &&
358 (cst = dyn_cast<AffineConstantExpr>(rhs))) ||
359 ((dim = dyn_cast<AffineDimExpr>(rhs)) &&
360 (cst = dyn_cast<AffineConstantExpr>(lhs))))
361 return cst.getValue();
362 return -1;
363}
364
365/// Given an array of AffineMaps `indexingMaps` verify the following
366/// commutatively:-
367/// indexingMaps[0].getResult(iDim) ==
368/// indexingMaps[1].getResult(fDim) * <c0> +
369/// indexingMaps[n-1].getResult(oDim) * <c1>
370/// where,
371/// - c0 and c1 can be any constant,
372/// - n is the size of the indexingMaps' array,
373/// - 0, 1 and n-1 are input, filter and output map indices respectively,
374/// - iDim, fDim and oDim are the input, filter and output dimension
375/// indices in their respective indexing maps
376/// Example:
377/// #inputMap = affine_map<(d0, d1, d2, d3, d4, d5, d6)
378/// -> (d0, d1 * 2 + d4 * 3, d2 + d5, d6)>
379/// #filterMap = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
380/// #outputMap = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
381///
382/// Here,
383/// #inputMap[1] = #outputMap[1] * 2 + #filterMap[0] * 3
384/// Therefore,
385/// matchConvDimAddExprPattern(indexingMaps, 1, 0, 1, dilation, stride)
386/// would return true and update dilation = 3 and stride = 2
387static bool matchConvDimAddExprPattern(ArrayAttr indexingMaps, unsigned iDim,
388 unsigned fDim, unsigned oDim,
389 int64_t &dilation, int64_t &stride) {
390 unsigned inputMapIdx = 0, filterMapIdx = 1,
391 outputMapIdx = indexingMaps.size() - 1;
392 AffineExpr inpExpr = getAffineMapDim(indexingMaps, inputMapIdx, iDim);
393 auto addExpr = dyn_cast_or_null<AffineBinaryOpExpr>(inpExpr);
394 if (!addExpr || addExpr.getKind() != AffineExprKind::Add)
395 return false;
396
397 AffineExpr dim0, dim1;
398 int64_t c0 = isDimTimesConstantOrDimOnly(addExpr.getLHS(), dim0);
399 int64_t c1 = isDimTimesConstantOrDimOnly(addExpr.getRHS(), dim1);
400
401 if (c0 == -1 || c1 == -1)
402 return false;
403 // Pattern matched with dims and constants extracted.
404 AffineExpr fExpr = getAffineMapDim(indexingMaps, filterMapIdx, fDim);
405 AffineExpr oExpr = getAffineMapDim(indexingMaps, outputMapIdx, oDim);
406 if (dim0 == fExpr && dim1 == oExpr) {
407 dilation = c0;
408 stride = c1;
409 return true;
410 }
411 if (dim1 == fExpr && dim0 == oExpr) {
412 dilation = c1;
413 stride = c0;
414 return true;
415 }
416 return false;
417}
418
419// ---------------------------------------------
420// Matchers for specific convolution operation.
421// ---------------------------------------------
422
423/// Returns true if the given indexing maps matches with the expected indexing
424/// maps.
426 ArrayAttr indexingMaps, MLIRContext *context) {
427 SmallVector<AffineMap, 4> expectedIndexingMaps =
428 AffineMap::inferFromExprList(mapListExpected, context);
429 return indexingMaps ==
430 ArrayAttr::get(
431 context, llvm::to_vector<4>(llvm::map_range(
432 expectedIndexingMaps, [&](AffineMap m) -> Attribute {
433 return AffineMapAttr::get(m);
434 })));
435}
436
437// #inputMap = affine_map<(W, w) -> (W + w)>
438// #filterMap = affine_map<(W, w) -> (w)>
439// #outputMap = affine_map<(W, w) -> (W)>
440template <>
442 SmallVector<int64_t> *dilations,
443 SmallVector<int64_t> *strides) {
444 if (isa<linalg::Conv1DOp>(op))
445 return true;
446
447 assert(isaConvolutionOpInterface(op) &&
448 "expected op to implement ConvolutionOpInterface");
449
450 *dilations = SmallVector<int64_t>(1, 1);
451 *strides = SmallVector<int64_t>(1, 1);
452 MLIRContext *context = op->getContext();
453 AffineExpr W = getAffineDimExpr(0, context);
454 AffineExpr w = getAffineDimExpr(1, context);
455 ArrayAttr indexingMaps = op.getIndexingMaps();
456 // First fetch dilations/strides :-
457 // Match: W * stride + w * dilation
458 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/0, /*fDim=*/0,
459 /*oDim=*/0, (*dilations)[0], (*strides)[0]))
460 return false;
461 // Match expected indexing maps
463 {/*inputMap=*/{W * (*strides)[0] + w * (*dilations)[0]},
464 /*filterMap=*/{w},
465 /*outputMap=*/{W}},
466 indexingMaps, context))
467 return false;
468 // Match body
469 Block *body = op.getBlock();
470 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
471 Value yieldVal = yieldOp.getOperand(0);
472 return bodyMatcherForConvolutionOps(yieldVal, body);
473}
474
475// #inputMap = affine_map<(N, W, F, w, c) -> (N, W + w, c)>
476// #filterMap = affine_map<(N, W, F, w, c) -> (w, c, F)>
477// #outputMap = affine_map<(N, W, F, w, c) -> (N, W, F)>
478template <>
480 LinalgOp op, SmallVector<int64_t> *dilations,
481 SmallVector<int64_t> *strides) {
482 if (isa<linalg::Conv1DNwcWcfOp>(op))
483 return true;
484
485 assert(isaConvolutionOpInterface(op) &&
486 "expected op to implement ConvolutionOpInterface");
487
488 *dilations = SmallVector<int64_t>(1, 1);
489 *strides = SmallVector<int64_t>(1, 1);
490 MLIRContext *context = op->getContext();
491 AffineExpr N = getAffineDimExpr(0, context);
492 AffineExpr W = getAffineDimExpr(1, context);
493 AffineExpr F = getAffineDimExpr(2, context);
494 AffineExpr w = getAffineDimExpr(3, context);
495 AffineExpr c = getAffineDimExpr(4, context);
496 ArrayAttr indexingMaps = op.getIndexingMaps();
497 // First fetch dilations/strides :-
498 // Match: W * stride + w * dilation
499 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
500 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
501 return false;
502 // Match expected indexing maps
504 {/*inputMap=*/{N, W * (*strides)[0] + w * (*dilations)[0], c},
505 /*filterMap=*/{w, c, F},
506 /*outputMap=*/{N, W, F}},
507 indexingMaps, context))
508 return false;
509 // Match body
510 Block *body = op.getBlock();
511 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
512 Value yieldVal = yieldOp.getOperand(0);
513 return bodyMatcherForConvolutionOps(yieldVal, body);
514}
515
516// #inputMap = affine_map<(N, F, W, c, w) -> (N, c, W + w)>
517// #filterMap = affine_map<(N, F, W, c, w) -> (F, c, w)>
518// #outputMap = affine_map<(N, F, W, c, w) -> (N, F, W)>
519template <>
521 LinalgOp op, SmallVector<int64_t> *dilations,
522 SmallVector<int64_t> *strides) {
523 if (isa<linalg::Conv1DNcwFcwOp>(op))
524 return true;
525
526 assert(isaConvolutionOpInterface(op) &&
527 "expected op to implement ConvolutionOpInterface");
528
529 *dilations = SmallVector<int64_t>(1, 1);
530 *strides = SmallVector<int64_t>(1, 1);
531 MLIRContext *context = op->getContext();
532 AffineExpr N = getAffineDimExpr(0, context);
533 AffineExpr F = getAffineDimExpr(1, context);
534 AffineExpr W = getAffineDimExpr(2, context);
535 AffineExpr c = getAffineDimExpr(3, context);
536 AffineExpr w = getAffineDimExpr(4, context);
537 ArrayAttr indexingMaps = op.getIndexingMaps();
538 // First fetch dilations/strides :-
539 // Match: W * stride + w * dilation
540 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/2,
541 /*oDim=*/2, (*dilations)[0], (*strides)[0]))
542 return false;
543 // Match expected indexing maps
545 {/*inputMap=*/{N, c, W * (*strides)[0] + w * (*dilations)[0]},
546 /*filterMap=*/{F, c, w},
547 /*outputMap=*/{N, F, W}},
548 indexingMaps, context))
549 return false;
550 // Match body
551 Block *body = op.getBlock();
552 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
553 Value yieldVal = yieldOp.getOperand(0);
554 return bodyMatcherForConvolutionOps(yieldVal, body);
555}
556
557// #inputMap = affine_map<(H, W, h, w) -> (H + h, W + w)>
558// #filterMap = affine_map<(H, W, h, w) -> (h, w)>
559// #outputMap = affine_map<(H, W, h, w) -> (H, W)>
560template <>
562 SmallVector<int64_t> *dilations,
563 SmallVector<int64_t> *strides) {
564 if (isa<linalg::Conv2DOp>(op))
565 return true;
566
567 assert(isaConvolutionOpInterface(op) &&
568 "expected op to implement ConvolutionOpInterface");
569
570 *dilations = SmallVector<int64_t>(2, 1);
571 *strides = SmallVector<int64_t>(2, 1);
572 MLIRContext *context = op->getContext();
573 AffineExpr H = getAffineDimExpr(0, context);
574 AffineExpr W = getAffineDimExpr(1, context);
575 AffineExpr h = getAffineDimExpr(2, context);
576 AffineExpr w = getAffineDimExpr(3, context);
577 ArrayAttr indexingMaps = op.getIndexingMaps();
578 // First fetch dilations/strides :-
579 // Match: H * stride + h * dilation
580 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/0, /*fDim=*/0,
581 /*oDim=*/0, (*dilations)[0], (*strides)[0]))
582 return false;
583 // Match: W * stride + w * dilation
584 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/1,
585 /*oDim=*/1, (*dilations)[1], (*strides)[1]))
586 return false;
587 // Match expected indexing maps
589 {/*inputMap=*/{H * (*strides)[0] + h * (*dilations)[0],
590 W * (*strides)[1] + w * (*dilations)[1]},
591 /*filterMap=*/{h, w},
592 /*outputMap=*/{H, W}},
593 indexingMaps, context))
594 return false;
595 // Match body
596 Block *body = op.getBlock();
597 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
598 Value yieldVal = yieldOp.getOperand(0);
599 return bodyMatcherForConvolutionOps(yieldVal, body);
600}
601
602// #inputMap = affine_map<(D, H, W, d, h, w) -> (D + d, H + h, W + w)>
603// #filterMap = affine_map<(D, H, W, d, h, w) -> (d, h, w)>
604// #outputMap = affine_map<(D, H, W, d, h, w) -> (D, H, W)>
605template <>
607 SmallVector<int64_t> *dilations,
608 SmallVector<int64_t> *strides) {
609 if (isa<linalg::Conv3DOp>(op))
610 return true;
611
612 assert(isaConvolutionOpInterface(op) &&
613 "expected op to implement ConvolutionOpInterface");
614
615 *dilations = SmallVector<int64_t>(3, 1);
616 *strides = SmallVector<int64_t>(3, 1);
617 MLIRContext *context = op->getContext();
618 AffineExpr D = getAffineDimExpr(0, context);
619 AffineExpr H = getAffineDimExpr(1, context);
620 AffineExpr W = getAffineDimExpr(2, context);
621 AffineExpr d = getAffineDimExpr(3, context);
622 AffineExpr h = getAffineDimExpr(4, context);
623 AffineExpr w = getAffineDimExpr(5, context);
624 ArrayAttr indexingMaps = op.getIndexingMaps();
625 // First fetch dilations/strides :-
626 // Match: D * stride + d * dilation
627 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/0, /*fDim=*/0,
628 /*oDim=*/0, (*dilations)[0], (*strides)[0]))
629 return false;
630 // Match: H * stride + h * dilation
631 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/1,
632 /*oDim=*/1, (*dilations)[1], (*strides)[1]))
633 return false;
634 // Match: W * stride + w * dilation
635 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/2,
636 /*oDim=*/2, (*dilations)[2], (*strides)[2]))
637 return false;
638 // Match expected indexing maps
640 {/*inputMap=*/{D * (*strides)[0] + d * (*dilations)[0],
641 H * (*strides)[1] + h * (*dilations)[1],
642 W * (*strides)[2] + w * (*dilations)[2]},
643 /*filterMap=*/{d, h, w},
644 /*outputMap=*/{D, H, W}},
645 indexingMaps, context))
646 return false;
647 // Match body
648 Block *body = op.getBlock();
649 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
650 Value yieldVal = yieldOp.getOperand(0);
651 return bodyMatcherForConvolutionOps(yieldVal, body);
652}
653
654// #inputMap = affine_map<(N, W, C, w) -> (N, C, W + w)>
655// #filterMap = affine_map<(N, W, C, w) -> (C, w)>
656// #outputMap = affine_map<(N, W, C, w) -> (N, C, W)>
657template <>
659 LinalgOp op, SmallVector<int64_t> *dilations,
660 SmallVector<int64_t> *strides) {
661 if (isa<linalg::DepthwiseConv1DNcwCwOp>(op))
662 return true;
663
664 assert(isaConvolutionOpInterface(op) &&
665 "expected op to implement ConvolutionOpInterface");
666
667 *dilations = SmallVector<int64_t>(1, 1);
668 *strides = SmallVector<int64_t>(1, 1);
669 MLIRContext *context = op->getContext();
670 AffineExpr N = getAffineDimExpr(0, context);
671 AffineExpr W = getAffineDimExpr(1, context);
672 AffineExpr C = getAffineDimExpr(2, context);
673 AffineExpr w = getAffineDimExpr(3, context);
674 ArrayAttr indexingMaps = op.getIndexingMaps();
675 // First fetch dilations/strides :-
676 // Match: W * stride + w * dilation
677 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
678 /*oDim=*/2, (*dilations)[0], (*strides)[0]))
679 return false;
680 // Match expected indexing maps
682 {/*inputMap=*/{N, C, W * (*strides)[0] + w * (*dilations)[0]},
683 /*filterMap=*/{C, w},
684 /*outputMap=*/{N, C, W}},
685 indexingMaps, context))
686 return false;
687 // Match body
688 Block *body = op.getBlock();
689 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
690 Value yieldVal = yieldOp.getOperand(0);
691 return bodyMatcherForConvolutionOps(yieldVal, body);
692}
693
694// #inputMap = affine_map<(N, W, C, w) -> (N, W + w, C)>
695// #filterMap = affine_map<(N, W, C, w) -> (w, C)>
696// #outputMap = affine_map<(N, W, C, w) -> (N, W, C)>
697template <>
699 LinalgOp op, SmallVector<int64_t> *dilations,
700 SmallVector<int64_t> *strides) {
701 if (isa<linalg::DepthwiseConv1DNwcWcOp>(op))
702 return true;
703
704 assert(isaConvolutionOpInterface(op) &&
705 "expected op to implement ConvolutionOpInterface");
706
707 *dilations = SmallVector<int64_t>(1, 1);
708 *strides = SmallVector<int64_t>(1, 1);
709 MLIRContext *context = op->getContext();
710 AffineExpr N = getAffineDimExpr(0, context);
711 AffineExpr W = getAffineDimExpr(1, context);
712 AffineExpr C = getAffineDimExpr(2, context);
713 AffineExpr w = getAffineDimExpr(3, context);
714 ArrayAttr indexingMaps = op.getIndexingMaps();
715 // First fetch dilations/strides :-
716 // Match: W * stride + w * dilation
717 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
718 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
719 return false;
720 // Match expected indexing maps
722 {/*inputMap=*/{N, W * (*strides)[0] + w * (*dilations)[0], C},
723 /*filterMap=*/{w, C},
724 /*outputMap=*/{N, W, C}},
725 indexingMaps, context))
726 return false;
727 // Match body
728 Block *body = op.getBlock();
729 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
730 Value yieldVal = yieldOp.getOperand(0);
731 return bodyMatcherForConvolutionOps(yieldVal, body);
732}
733
734// #inputMap = affine_map<(N, W, C, CM, w) -> (N, W + w, C)>
735// #filterMap = affine_map<(N, W, C, CM, w) -> (w, C, CM)>
736// #outputMap = affine_map<(N, W, C, CM, w) -> (N, W, C, CM)>
737template <>
739 LinalgOp op, SmallVector<int64_t> *dilations,
740 SmallVector<int64_t> *strides) {
741 if (isa<linalg::DepthwiseConv1DNwcWcmOp>(op))
742 return true;
743
744 assert(isaConvolutionOpInterface(op) &&
745 "expected op to implement ConvolutionOpInterface");
746
747 *dilations = SmallVector<int64_t>(1, 1);
748 *strides = SmallVector<int64_t>(1, 1);
749 MLIRContext *context = op->getContext();
750 AffineExpr N = getAffineDimExpr(0, context);
751 AffineExpr W = getAffineDimExpr(1, context);
752 AffineExpr C = getAffineDimExpr(2, context);
753 AffineExpr CM = getAffineDimExpr(3, context);
754 AffineExpr w = getAffineDimExpr(4, context);
755 ArrayAttr indexingMaps = op.getIndexingMaps();
756 // First fetch dilations/strides :-
757 // Match: W * stride + w * dilation
758 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
759 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
760 return false;
761 // Match expected indexing maps
763 {/*inputMap=*/{N, W * (*strides)[0] + w * (*dilations)[0], C},
764 /*filterMap=*/{w, C, CM},
765 /*outputMap=*/{N, W, C, CM}},
766 indexingMaps, context))
767 return false;
768 // Match body
769 Block *body = op.getBlock();
770 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
771 Value yieldVal = yieldOp.getOperand(0);
772 return bodyMatcherForConvolutionOps(yieldVal, body);
773}
774
775// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, C, H + h, W + w)>
776// #filterMap = affine_map<(N, H, W, C, h, w) -> (C, h, w)>
777// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, C, H, W)>
778template <>
780 LinalgOp op, SmallVector<int64_t> *dilations,
781 SmallVector<int64_t> *strides) {
782 if (isa<linalg::DepthwiseConv2DNchwChwOp>(op))
783 return true;
784
785 assert(isaConvolutionOpInterface(op) &&
786 "expected op to implement ConvolutionOpInterface");
787
788 *dilations = SmallVector<int64_t>(2, 1);
789 *strides = SmallVector<int64_t>(2, 1);
790 MLIRContext *context = op->getContext();
791 AffineExpr N = getAffineDimExpr(0, context);
792 AffineExpr H = getAffineDimExpr(1, context);
793 AffineExpr W = getAffineDimExpr(2, context);
794 AffineExpr C = getAffineDimExpr(3, context);
795 AffineExpr h = getAffineDimExpr(4, context);
796 AffineExpr w = getAffineDimExpr(5, context);
797 ArrayAttr indexingMaps = op.getIndexingMaps();
798 // First fetch dilations/strides :-
799 // Match: H * stride + h * dilation
800 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
801 /*oDim=*/2, (*dilations)[0], (*strides)[0]))
802 return false;
803 // Match: W * stride + w * dilation
804 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/3, /*fDim=*/2,
805 /*oDim=*/3, (*dilations)[1], (*strides)[1]))
806 return false;
807 // Match expected indexing maps
809 {/*inputMap=*/{N, C, H * (*strides)[0] + h * (*dilations)[0],
810 W * (*strides)[1] + w * (*dilations)[1]},
811 /*filterMap=*/{C, h, w},
812 /*outputMap=*/{N, C, H, W}},
813 indexingMaps, context))
814 return false;
815 // Match body
816 Block *body = op.getBlock();
817 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
818 Value yieldVal = yieldOp.getOperand(0);
819 return bodyMatcherForConvolutionOps(yieldVal, body);
820}
821
822// #inputMap = affine_map<(N, D, H, W, CM, d, h, w, C)
823// -> (N, D + d, H + h, W + w, C)>
824// #filterMap = affine_map<(N, D, H, W, CM, d, h, w, C)
825// -> (d, h, w, C, CM)>
826// #outputMap = affine_map<(N, D, H, W, CM, d, h, w, C)
827// -> (N, D, H, W, C, CM)>
828template <>
830 LinalgOp op, SmallVector<int64_t> *dilations,
831 SmallVector<int64_t> *strides) {
832 if (isa<linalg::DepthwiseConv3DNdhwcDhwcmOp>(op))
833 return true;
834
835 assert(isaConvolutionOpInterface(op) &&
836 "expected op to implement ConvolutionOpInterface");
837
838 *dilations = SmallVector<int64_t>(3, 1);
839 *strides = SmallVector<int64_t>(3, 1);
840 MLIRContext *context = op->getContext();
841 AffineExpr N = getAffineDimExpr(0, context);
842 AffineExpr D = getAffineDimExpr(1, context);
843 AffineExpr H = getAffineDimExpr(2, context);
844 AffineExpr W = getAffineDimExpr(3, context);
845 AffineExpr CM = getAffineDimExpr(4, context);
846 AffineExpr d = getAffineDimExpr(5, context);
847 AffineExpr h = getAffineDimExpr(6, context);
848 AffineExpr w = getAffineDimExpr(7, context);
849 AffineExpr C = getAffineDimExpr(8, context);
850 ArrayAttr indexingMaps = op.getIndexingMaps();
851 // First fetch dilations/strides :-
852 // Match: D * stride + d * dilation
853 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
854 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
855 return false;
856 // Match: H * stride + h * dilation
857 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
858 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
859 return false;
860 // Match: W * stride + w * dilation
861 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/3, /*fDim=*/2,
862 /*oDim=*/3, (*dilations)[2], (*strides)[2]))
863 return false;
864 // Match expected indexing maps
866 {/*inputMap=*/{N, D * (*strides)[0] + d * (*dilations)[0],
867 H * (*strides)[1] + h * (*dilations)[1],
868 W * (*strides)[2] + w * (*dilations)[2], C},
869 /*filterMap=*/{d, h, w, C, CM},
870 /*outputMap=*/{N, D, H, W, C, CM}},
871 indexingMaps, context))
872 return false;
873 // Match body
874 Block *body = op.getBlock();
875 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
876 Value yieldVal = yieldOp.getOperand(0);
877 return bodyMatcherForConvolutionOps(yieldVal, body);
878}
879
880// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
881// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
882// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
883template <>
885 LinalgOp op, SmallVector<int64_t> *dilations,
886 SmallVector<int64_t> *strides) {
887 if (isa<linalg::PoolingNhwcMaxOp>(op))
888 return true;
889
890 assert(isaConvolutionOpInterface(op) &&
891 "expected op to implement ConvolutionOpInterface");
892
893 *dilations = SmallVector<int64_t>(2, 1);
894 *strides = SmallVector<int64_t>(2, 1);
895 MLIRContext *context = op->getContext();
896 AffineExpr N = getAffineDimExpr(0, context);
897 AffineExpr H = getAffineDimExpr(1, context);
898 AffineExpr W = getAffineDimExpr(2, context);
899 AffineExpr C = getAffineDimExpr(3, context);
900 AffineExpr h = getAffineDimExpr(4, context);
901 AffineExpr w = getAffineDimExpr(5, context);
902 ArrayAttr indexingMaps = op.getIndexingMaps();
903 // First fetch dilations/strides :-
904 // Match: H * stride + h * dilation
905 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
906 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
907 return false;
908 // Match: W * stride + w * dilation
909 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
910 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
911 return false;
912 // Match expected indexing maps
914 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
915 W * (*strides)[1] + w * (*dilations)[1], C},
916 /*filterMap=*/{h, w},
917 /*outputMap=*/{N, H, W, C}},
918 indexingMaps, context))
919 return false;
920 // Match body
921 Block *body = op.getBlock();
922 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
923 Value yieldVal = yieldOp.getOperand(0);
924 return bodyMatcherForMaxSignedPoolOps(yieldVal, body);
925}
926
927// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
928// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
929// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
930template <>
932 LinalgOp op, SmallVector<int64_t> *dilations,
933 SmallVector<int64_t> *strides) {
934 if (isa<linalg::PoolingNhwcMinOp>(op))
935 return true;
936
937 assert(isaConvolutionOpInterface(op) &&
938 "expected op to implement ConvolutionOpInterface");
939
940 *dilations = SmallVector<int64_t>(2, 1);
941 *strides = SmallVector<int64_t>(2, 1);
942 MLIRContext *context = op->getContext();
943 AffineExpr N = getAffineDimExpr(0, context);
944 AffineExpr H = getAffineDimExpr(1, context);
945 AffineExpr W = getAffineDimExpr(2, context);
946 AffineExpr C = getAffineDimExpr(3, context);
947 AffineExpr h = getAffineDimExpr(4, context);
948 AffineExpr w = getAffineDimExpr(5, context);
949 ArrayAttr indexingMaps = op.getIndexingMaps();
950 // First fetch dilations/strides :-
951 // Match: H * stride + h * dilation
952 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
953 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
954 return false;
955 // Match: W * stride + w * dilation
956 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
957 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
958 return false;
959 // Match expected indexing maps
961 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
962 W * (*strides)[1] + w * (*dilations)[1], C},
963 /*filterMap=*/{h, w},
964 /*outputMap=*/{N, H, W, C}},
965 indexingMaps, context))
966 return false;
967 // Match body
968 Block *body = op.getBlock();
969 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
970 Value yieldVal = yieldOp.getOperand(0);
971 return bodyMatcherForMinSignedPoolOps(yieldVal, body);
972}
973
974// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
975// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
976// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
977template <>
979 LinalgOp op, SmallVector<int64_t> *dilations,
980 SmallVector<int64_t> *strides) {
981 if (isa<linalg::PoolingNhwcSumOp>(op))
982 return true;
983
984 assert(isaConvolutionOpInterface(op) &&
985 "expected op to implement ConvolutionOpInterface");
986
987 *dilations = SmallVector<int64_t>(2, 1);
988 *strides = SmallVector<int64_t>(2, 1);
989 MLIRContext *context = op->getContext();
990 AffineExpr N = getAffineDimExpr(0, context);
991 AffineExpr H = getAffineDimExpr(1, context);
992 AffineExpr W = getAffineDimExpr(2, context);
993 AffineExpr C = getAffineDimExpr(3, context);
994 AffineExpr h = getAffineDimExpr(4, context);
995 AffineExpr w = getAffineDimExpr(5, context);
996 ArrayAttr indexingMaps = op.getIndexingMaps();
997 // First fetch dilations/strides :-
998 // Match: H * stride + h * dilation
999 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
1000 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
1001 return false;
1002 // Match: W * stride + w * dilation
1003 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
1004 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
1005 return false;
1006 // Match expected indexing maps
1007 if (!convLayoutMatches(
1008 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
1009 W * (*strides)[1] + w * (*dilations)[1], C},
1010 /*filterMap=*/{h, w},
1011 /*outputMap=*/{N, H, W, C}},
1012 indexingMaps, context))
1013 return false;
1014 // Match body
1015 Block *body = op.getBlock();
1016 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
1017 Value yieldVal = yieldOp.getOperand(0);
1018 return bodyMatcherForSumPoolOps(yieldVal, body);
1019}
1020
1021// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
1022// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
1023// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
1024template <>
1026 LinalgOp op, SmallVector<int64_t> *dilations,
1027 SmallVector<int64_t> *strides) {
1028 if (isa<linalg::PoolingNhwcMaxUnsignedOp>(op))
1029 return true;
1030
1031 assert(isaConvolutionOpInterface(op) &&
1032 "expected op to implement ConvolutionOpInterface");
1033
1034 *dilations = SmallVector<int64_t>(2, 1);
1035 *strides = SmallVector<int64_t>(2, 1);
1036 MLIRContext *context = op->getContext();
1037 AffineExpr N = getAffineDimExpr(0, context);
1038 AffineExpr H = getAffineDimExpr(1, context);
1039 AffineExpr W = getAffineDimExpr(2, context);
1040 AffineExpr C = getAffineDimExpr(3, context);
1041 AffineExpr h = getAffineDimExpr(4, context);
1042 AffineExpr w = getAffineDimExpr(5, context);
1043 ArrayAttr indexingMaps = op.getIndexingMaps();
1044 // First fetch dilations/strides :-
1045 // Match: H * stride + h * dilation
1046 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
1047 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
1048 return false;
1049 // Match: W * stride + w * dilation
1050 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
1051 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
1052 return false;
1053 // Match expected indexing maps
1054 if (!convLayoutMatches(
1055 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
1056 W * (*strides)[1] + w * (*dilations)[1], C},
1057 /*filterMap=*/{h, w},
1058 /*outputMap=*/{N, H, W, C}},
1059 indexingMaps, context))
1060 return false;
1061 // Match body
1062 Block *body = op.getBlock();
1063 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
1064 Value yieldVal = yieldOp.getOperand(0);
1065 return bodyMatcherForMaxUnsignedPoolOps(yieldVal, body);
1066}
1067
1068// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
1069// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
1070// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
1071template <>
1073 LinalgOp op, SmallVector<int64_t> *dilations,
1074 SmallVector<int64_t> *strides) {
1075 if (isa<linalg::PoolingNhwcMinUnsignedOp>(op))
1076 return true;
1077
1078 assert(isaConvolutionOpInterface(op) &&
1079 "expected op to implement ConvolutionOpInterface");
1080
1081 *dilations = SmallVector<int64_t>(2, 1);
1082 *strides = SmallVector<int64_t>(2, 1);
1083 MLIRContext *context = op->getContext();
1084 AffineExpr N = getAffineDimExpr(0, context);
1085 AffineExpr H = getAffineDimExpr(1, context);
1086 AffineExpr W = getAffineDimExpr(2, context);
1087 AffineExpr C = getAffineDimExpr(3, context);
1088 AffineExpr h = getAffineDimExpr(4, context);
1089 AffineExpr w = getAffineDimExpr(5, context);
1090 ArrayAttr indexingMaps = op.getIndexingMaps();
1091 // First fetch dilations/strides :-
1092 // Match: H * stride + h * dilation
1093 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
1094 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
1095 return false;
1096 // Match: W * stride + w * dilation
1097 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
1098 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
1099 return false;
1100 // Match expected indexing maps
1101 if (!convLayoutMatches(
1102 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
1103 W * (*strides)[1] + w * (*dilations)[1], C},
1104 /*filterMap=*/{h, w},
1105 /*outputMap=*/{N, H, W, C}},
1106 indexingMaps, context))
1107 return false;
1108 // Match body
1109 Block *body = op.getBlock();
1110 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
1111 Value yieldVal = yieldOp.getOperand(0);
1112 return bodyMatcherForMinUnsignedPoolOps(yieldVal, body);
1113}
1114
1115Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
1116 Value source, Value pad, bool nofold,
1117 ValueRange typeDynDims) {
1118 // Exit if `source` is not defined by an ExtractSliceOp.
1119 auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
1120 if (!sliceOp)
1121 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
1122 typeDynDims);
1123
1124 // Search the `source` use-def chain for padded LinalgOps.
1125 Value current = sliceOp.getSource();
1126 while (current) {
1127 auto linalgOp = current.getDefiningOp<LinalgOp>();
1128 if (!linalgOp)
1129 break;
1130 OpResult opResult = cast<OpResult>(current);
1131 current = linalgOp.getDpsInitOperand(opResult.getResultNumber())->get();
1132 }
1133 auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
1134
1135 // Exit if the search fails to match a tensor::PadOp at the end of the matched
1136 // LinalgOp sequence.
1137 if (!padOp)
1138 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
1139 typeDynDims);
1140
1141 // Exit if the padded result type does not match.
1142 if (sliceOp.getSource().getType() != type)
1143 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
1144 typeDynDims);
1145
1146 // Exit if the LinalgOps are not high padded.
1147 if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
1148 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
1149 }))
1150 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
1151 typeDynDims);
1152
1153 // Exit if `padOpSliceOp`, which defines the slice used by
1154 // `padOp`, is rank-reducing.
1155 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
1156 if (!padOpSliceOp ||
1157 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
1158 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
1159 typeDynDims);
1160
1161 // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
1162 // of the slice padded by `padOp`.
1163 if (llvm::any_of(
1164 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
1165 [](std::tuple<OpFoldResult, OpFoldResult> it) {
1166 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
1167 }))
1168 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
1169 typeDynDims);
1170
1171 // Exit if the padding values do not match.
1172 Attribute padOpPadAttr, padAttr;
1173 Value padOpPad = padOp.getConstantPaddingValue();
1174 if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
1175 !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
1176 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
1177 typeDynDims);
1178
1179 // Return the padded result if the padding values and sizes match.
1180 return sliceOp.getSource();
1181}
1182
1183GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
1184 auto memrefTypeTo = cast<MemRefType>(to.getType());
1185#ifndef NDEBUG
1186 auto memrefTypeFrom = cast<MemRefType>(from.getType());
1187 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
1188 "`from` and `to` memref must have the same rank");
1189#endif // NDEBUG
1190
1191 AffineMap id =
1192 AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
1193 SmallVector<utils::IteratorType> iteratorTypes(memrefTypeTo.getRank(),
1194 utils::IteratorType::parallel);
1195 return linalg::GenericOp::create(
1196 b, loc,
1197 /*inputs=*/from,
1198 /*outputs=*/to,
1199 /*indexingMaps=*/llvm::ArrayRef({id, id}),
1200 /*iteratorTypes=*/iteratorTypes,
1201 [](OpBuilder &b, Location loc, ValueRange args) {
1202 linalg::YieldOp::create(b, loc, args.front());
1203 });
1204}
1205
1206/// Specialization to build an scf "for" nest.
1207template <>
1209 OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
1210 ArrayRef<utils::IteratorType> iteratorTypes,
1212 ValueRange)>
1213 bodyBuilderFn,
1214 ArrayRef<linalg::ProcInfo> procInfo) {
1215 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
1216 "expected as many entries for proc info as number of loops, even if "
1217 "they are null entries");
1218 SmallVector<Value> iterArgInitValues;
1219 if (!linalgOp.hasPureBufferSemantics())
1220 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
1221 SmallVector<Value, 4> lbs, ubs, steps;
1222 unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
1224 b, loc, lbs, ubs, steps, iterArgInitValues,
1225 [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
1226 assert(iterArgs.size() == iterArgInitValues.size() &&
1227 "expect the number of output tensors and iter args to match");
1228 SmallVector<Value> operandValuesToUse = linalgOp->getOperands();
1229 if (!iterArgs.empty()) {
1230 operandValuesToUse = linalgOp.getDpsInputs();
1231 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
1232 }
1233 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
1234 });
1235
1236 if (loopNest.loops.empty() || procInfo.empty())
1237 return;
1238
1239 // Filter out scf.for loops that were created out of parallel dimensions.
1240 for (const auto &loop : llvm::enumerate(loopNest.loops)) {
1241 if (procInfo[loop.index()].distributionMethod ==
1243 mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
1244 procInfo[loop.index()].nprocs);
1245 }
1246 }
1247}
1248
1249/// Specialization to build affine "for" nest.
1250template <>
1252 OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
1253 ArrayRef<utils::IteratorType> iteratorTypes,
1255 ValueRange)>
1256 bodyBuilderFn,
1257 ArrayRef<linalg::ProcInfo> /*procInfo*/) {
1258 SmallVector<Value> iterArgInitValues;
1259 if (!linalgOp.hasPureBufferSemantics())
1260 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
1261 assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
1262 SmallVector<Value, 4> lbs, ubs, steps;
1263 unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
1264
1265 // Affine loops require constant steps.
1266 SmallVector<int64_t, 4> constantSteps;
1267 constantSteps.reserve(steps.size());
1268 for (Value v : steps) {
1269 auto constVal = getConstantIntValue(v);
1270 assert(constVal.has_value() && "Affine loops require constant steps");
1271 constantSteps.push_back(constVal.value());
1272 }
1273
1274 affine::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
1275 [&](OpBuilder &b, Location loc, ValueRange ivs) {
1276 bodyBuilderFn(b, loc, ivs,
1277 linalgOp->getOperands());
1278 });
1279}
1280
1281/// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
1283 Value nprocs, Value &lb, Value &ub,
1284 Value &step) {
1285 AffineExpr d0, d1;
1286 bindDims(b.getContext(), d0, d1);
1287 AffineExpr s0 = getAffineSymbolExpr(0, b.getContext());
1288 lb =
1289 affine::makeComposedAffineApply(b, loc, d0 + d1 * s0, {lb, procId, step});
1290 step = affine::makeComposedAffineApply(b, loc, d0 * s0, {nprocs, step});
1291}
1292
1293/// Generates a loop nest consisting of scf.parallel and scf.for, depending
1294/// on the `iteratorTypes.` Consecutive parallel loops create a single
1295/// scf.parallel operation; each sequential loop creates a new scf.for
1296/// operation. The body of the innermost loop is populated by
1297/// `bodyBuilderFn` that accepts a range of induction variables for all
1298/// loops. `ivStorage` is used to store the partial list of induction
1299/// variables.
1300// TODO: this function can be made iterative instead. However, it
1301// will have at most as many recursive calls as nested loops, which rarely
1302// exceeds 10.
1304 OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
1305 ValueRange steps, ArrayRef<utils::IteratorType> iteratorTypes,
1307 function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
1308 SmallVectorImpl<Value> &ivStorage) {
1309 assert(lbs.size() == ubs.size());
1310 assert(lbs.size() == steps.size());
1311 assert(lbs.size() == iteratorTypes.size());
1312 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
1313
1314 // If there are no (more) loops to be generated, generate the body and be
1315 // done with it.
1316 if (iteratorTypes.empty()) {
1317 bodyBuilderFn(b, loc, ivStorage);
1318 return;
1319 }
1320
1321 // If there are no outer parallel loops, generate one sequential loop and
1322 // recurse.
1323 if (!isParallelIterator(iteratorTypes.front())) {
1324 LoopNest singleLoop = buildLoopNest(
1325 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
1326 [&](OpBuilder &b, Location loc, ValueRange ivs) {
1327 ivStorage.append(ivs.begin(), ivs.end());
1328 generateParallelLoopNest(
1329 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
1330 iteratorTypes.drop_front(),
1331 procInfo.empty() ? procInfo : procInfo.drop_front(),
1332 bodyBuilderFn, ivStorage);
1333 });
1334 return;
1335 }
1336
1337 unsigned nLoops = iteratorTypes.size();
1338 unsigned numProcessed = 0;
1339 DistributionMethod distributionMethod = DistributionMethod::None;
1340 if (procInfo.empty()) {
1341 numProcessed = nLoops - iteratorTypes.drop_while(isParallelIterator).size();
1342 } else {
1343 distributionMethod = procInfo.front().distributionMethod;
1344 numProcessed =
1345 nLoops - procInfo
1346 .drop_while([&](linalg::ProcInfo p) {
1347 return p.distributionMethod == distributionMethod;
1348 })
1349 .size();
1350 }
1351
1352 auto remainderProcInfo =
1353 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
1354 switch (distributionMethod) {
1356 // Generate a single parallel loop-nest operation for all outermost
1357 // parallel loops and recurse.
1358 scf::ParallelOp::create(
1359 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
1360 steps.take_front(numProcessed),
1361 [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
1362 ivStorage.append(localIvs.begin(), localIvs.end());
1363 generateParallelLoopNest(
1364 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
1365 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
1366 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
1367 bodyBuilderFn, ivStorage);
1368 });
1369 return;
1370 }
1372 // Generate a single parallel loop-nest operation for all outermost
1373 // parallel loops and recurse.
1374 scf::ParallelOp::create(
1375 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
1376 steps.take_front(numProcessed),
1377 [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
1378 ivStorage.append(localIvs.begin(), localIvs.end());
1379 generateParallelLoopNest(
1380 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
1381 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
1382 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
1383 bodyBuilderFn, ivStorage);
1384 });
1385 return;
1386 }
1388 // Check (for the processed loops) that the iteration is in-bounds.
1389 ArithBuilder ab(b, loc);
1390 Value cond = ab.slt(lbs[0], ubs[0]);
1391 for (unsigned i = 1; i < numProcessed; ++i)
1392 cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));
1393 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
1394 scf::IfOp::create(b, loc, cond, [&](OpBuilder &b, Location loc) {
1395 generateParallelLoopNest(b, loc, lbs.drop_front(numProcessed),
1396 ubs.drop_front(numProcessed),
1397 steps.drop_front(numProcessed),
1398 iteratorTypes.drop_front(numProcessed),
1399 remainderProcInfo, bodyBuilderFn, ivStorage);
1400 scf::YieldOp::create(b, loc, ValueRange{});
1401 });
1402 return;
1403 }
1405 // No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
1406 // with inner loop generation.
1407 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
1409 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
1410 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
1411 remainderProcInfo, bodyBuilderFn, ivStorage);
1412 return;
1413 }
1414}
1415
1416/// Specialization for generating a mix of parallel and sequential scf loops.
1417template <>
1419 OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
1420 ArrayRef<utils::IteratorType> iteratorTypes,
1422 ValueRange)>
1423 bodyBuilderFn,
1424 ArrayRef<linalg::ProcInfo> procInfo) {
1425 SmallVector<Value> iterArgInitValues;
1426 if (!linalgOp.hasPureBufferSemantics())
1427 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
1428 assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
1429 // This function may be passed more iterator types than ranges.
1430 assert(iteratorTypes.size() >= loopRanges.size() &&
1431 "expected iterator type for all ranges");
1432 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
1433 "expected proc information for all loops when present");
1434 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
1435 SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
1436 unsigned numLoops = iteratorTypes.size();
1437 ivs.reserve(numLoops);
1438 lbsStorage.reserve(numLoops);
1439 ubsStorage.reserve(numLoops);
1440 stepsStorage.reserve(numLoops);
1441
1442 // Get the loop lb, ub, and step.
1443 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
1444
1445 // Modify the lb, ub, and step based on the distribution options.
1446 for (const auto &it : llvm::enumerate(procInfo)) {
1447 if (it.value().distributionMethod != linalg::DistributionMethod::None) {
1449 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
1450 ubsStorage[it.index()], stepsStorage[it.index()]);
1451 }
1452 }
1453 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
1455 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
1456 [&](OpBuilder &b, Location loc, ValueRange ivs) {
1457 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
1458 },
1459 ivs);
1460
1461 assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
1462}
1463
1465 Value valueToTile,
1466 const SliceParameters &sliceParams) {
1467 auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
1468 auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
1469 .Case([&](MemRefType) {
1470 return memref::SubViewOp::create(
1471 builder, loc, valueToTile, sliceParams.offsets,
1472 sliceParams.sizes, sliceParams.strides);
1473 })
1474 .Case([&](RankedTensorType) {
1475 return tensor::ExtractSliceOp::create(
1476 builder, loc, valueToTile, sliceParams.offsets,
1477 sliceParams.sizes, sliceParams.strides);
1478 })
1479 .DefaultUnreachable("Unexpected shaped type");
1480 return sliceOp;
1481}
1482
1484 ArrayRef<OpFoldResult> tileSizes, AffineMap map,
1487 ArrayRef<OpFoldResult> subShapeSizes,
1488 bool omitPartialTileCheck) {
1489 SliceParameters sliceParams =
1490 computeSliceParameters(builder, loc, valueToTile, tileSizes, map, lbs,
1491 ubs, subShapeSizes, omitPartialTileCheck);
1492 return materializeTiledShape(builder, loc, valueToTile, sliceParams);
1493}
1494
1497 ArrayRef<OpFoldResult> tileSizes, AffineMap map,
1499 ArrayRef<OpFoldResult> subShapeSizes,
1500 bool omitPartialTileCheck) {
1501 auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
1502 assert(shapedType && "only shaped types can be tiled");
1503 ArrayRef<int64_t> shape = shapedType.getShape();
1504 int64_t rank = shapedType.getRank();
1505
1506 // Compute offsets/sizes/strides for the tile.
1507 SliceParameters sliceParams;
1508 sliceParams.offsets.reserve(rank);
1509 sliceParams.sizes.reserve(rank);
1510 sliceParams.strides.reserve(rank);
1511 for (unsigned r = 0; r < rank; ++r) {
1512 LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);
1513 if (!isTiled(map.getSubMap({r}), tileSizes)) {
1514 sliceParams.offsets.push_back(builder.getIndexAttr(0));
1515 OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
1516 sliceParams.sizes.push_back(dim);
1517 sliceParams.strides.push_back(builder.getIndexAttr(1));
1518 LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
1519 continue;
1520 }
1521 LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");
1522
1523 // Tiling creates a new slice at the proper index, the slice step is 1
1524 // (i.e. the op does not subsample, stepping occurs in the loop).
1525 auto m = map.getSubMap({r});
1526 LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");
1527 IRRewriter rewriter(builder);
1528 // The offset of the slice is m(lbs) - m(0).
1529 SmallVector<Attribute> zeros(lbs.size(), rewriter.getIndexAttr(0));
1530 SmallVector<Attribute> mAtZero;
1531 [[maybe_unused]] auto res = m.constantFold(zeros, mAtZero);
1532 assert(succeeded(res) && "affine_map must be evaluatable (not symbols)");
1533 int64_t mAtZeroInt =
1534 cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
1536 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
1537 sliceParams.offsets.push_back(offset);
1538
1539 OpFoldResult closedIntSize =
1540 makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes);
1541 // Resulting size needs to be made half open interval again.
1542 AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
1543 OpFoldResult size =
1544 makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize);
1545 LLVM_DEBUG(llvm::dbgs()
1546 << "computeSliceParameters: raw size: " << size << "\n");
1547 LLVM_DEBUG(llvm::dbgs()
1548 << "computeSliceParameters: new offset: " << offset << "\n");
1549 sliceParams.strides.push_back(builder.getIndexAttr(1));
1550
1551 if (omitPartialTileCheck) {
1552 // We statically know that the partial/boundary tile condition is
1553 // unnecessary.
1554 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
1555 sliceParams.sizes.push_back(size);
1556 continue;
1557 }
1558
1559 // The size of the subview / extract_slice should be trimmed to avoid
1560 // out-of-bounds accesses, unless:
1561 // a. We statically know the subshape size divides the shape size evenly.
1562 // b. The subshape size is 1. According to the way the loops are set up,
1563 // tensors with "0" dimensions would never be constructed.
1564 int64_t shapeSize = shape[r];
1565 std::optional<int64_t> sizeCst = getConstantIntValue(size);
1566 auto hasTileSizeOne = sizeCst == 1;
1567 auto dividesEvenly = sizeCst && ShapedType::isStatic(shapeSize) &&
1568 ((shapeSize % *sizeCst) == 0);
1569 if (!hasTileSizeOne && !dividesEvenly) {
1570 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
1571 << ", size: " << size
1572 << ": make sure in bound with affine.min\n");
1573
1574 AffineExpr dim0, dim1, dim2;
1575 MLIRContext *context = builder.getContext();
1576 bindDims(context, dim0, dim1, dim2);
1577
1578 // Get the dimension size for this dimension. We need to first calculate
1579 // the max index and then plus one. This is important because for
1580 // convolution ops, we have its input window dimension's affine map of the
1581 // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
1582 // dimension and `s0` is stride. Directly use the dimension size of
1583 // output/filer window dimensions will cause incorrect calculation.
1585 {ArrayRef<AffineExpr>{dim0 - 1}}, context)
1586 .front();
1588 {ArrayRef<AffineExpr>{dim0 + 1}}, context)
1589 .front();
1590 SmallVector<OpFoldResult> maxIndices =
1591 llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) {
1592 return makeComposedFoldedAffineApply(rewriter, loc, minusOneMap,
1593 {ub});
1594 }));
1595 OpFoldResult maxIndex =
1596 makeComposedFoldedAffineApply(rewriter, loc, m, maxIndices);
1597 OpFoldResult d =
1598 makeComposedFoldedAffineApply(rewriter, loc, plusOneMap, {maxIndex});
1599
1600 // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
1602 {ArrayRef<AffineExpr>{dim1 - dim2, dim0}}, context)
1603 .front();
1604 size =
1605 makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset});
1606 }
1607 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
1608 sliceParams.sizes.push_back(size);
1609 }
1610 return sliceParams;
1611}
1612
1615 ArrayRef<OpFoldResult> tileSizes) {
1617 for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
1618 LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
1619 bool isTiled = !isZeroInteger(tileSizes[idx]);
1620 offsets.push_back(isTiled ? ivs[idxIvs++] : b.getIndexAttr(0));
1621 LLVM_DEBUG(llvm::dbgs()
1622 << "computeTileOffsets: " << offsets.back() << "\n");
1623 }
1624 return offsets;
1625}
1626
1628 ArrayRef<OpFoldResult> tileSizes,
1629 ArrayRef<OpFoldResult> sizeBounds) {
1631 for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
1632 bool isTiled = !isZeroInteger(tileSizes[idx]);
1633 // Before composing, we need to make range a closed interval.
1634 OpFoldResult size = isTiled ? tileSizes[idx] : sizeBounds[idx];
1635 AffineExpr d0 = getAffineDimExpr(0, b.getContext());
1636 IRRewriter rewriter(b);
1637 sizes.push_back(makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, size));
1638 LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
1639 }
1640 return sizes;
1641}
1642
1644 if (op.hasPureBufferSemantics())
1645 return {};
1646 return llvm::to_vector(
1647 llvm::map_range(op.getDpsInitsMutable(), [&](OpOperand &opOperand) {
1648 return operands[opOperand.getOperandNumber()].getType();
1649 }));
1650}
1651
1653 LinalgOp op, ValueRange operands,
1654 ValueRange results) {
1655 if (op.hasPureBufferSemantics())
1656 return {};
1657 SmallVector<Value> tensorResults;
1658 tensorResults.reserve(results.size());
1659 // Insert a insert_slice for each output tensor.
1660 unsigned resultIdx = 0;
1661 for (OpOperand &opOperand : op.getDpsInitsMutable()) {
1662 // TODO: use an interface/adaptor to avoid leaking position in
1663 // `tiledOperands`.
1664 Value outputTensor = operands[opOperand.getOperandNumber()];
1665 if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
1666 Value inserted = tensor::InsertSliceOp::create(
1667 builder, loc, sliceOp.getSource().getType(), results[resultIdx],
1668 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
1669 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
1670 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
1671 tensorResults.push_back(inserted);
1672 } else {
1673 tensorResults.push_back(results[resultIdx]);
1674 }
1675 ++resultIdx;
1676 }
1677 return tensorResults;
1678}
1679
1681computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
1682 ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
1683 ArrayRef<OpFoldResult> tileSizes,
1684 ArrayRef<OpFoldResult> sizeBounds,
1685 bool omitPartialTileCheck) {
1686 assert(ivs.size() == static_cast<size_t>(llvm::count_if(
1687 llvm::make_range(tileSizes.begin(), tileSizes.end()),
1688 [](OpFoldResult v) { return !isZeroInteger(v); })) &&
1689 "expected as many ivs as non-zero sizes");
1690
1691 // Construct (potentially temporary) mins and maxes on which to apply maps
1692 // that define tile subshapes.
1694 computeTileOffsets(builder, loc, ivs, tileSizes);
1695 SmallVector<OpFoldResult> subShapeSizes =
1696 computeTileSizes(builder, loc, tileSizes, sizeBounds);
1697
1698 assert(static_cast<int64_t>(valuesToTile.size()) <=
1699 linalgOp->getNumOperands() &&
1700 "more value to tile than operands.");
1702 allSliceParams.reserve(valuesToTile.size());
1703 for (auto [opOperand, val] :
1704 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
1705 Value shapedOp = val;
1706 LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
1707 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
1708 // Use `opOperand` as is if it is not tiled and not an output tensor. Having
1709 // an extract/insert slice pair for all output tensors simplifies follow up
1710 // transformations such as padding and bufferization since the
1711 // extract/insert slice pairs make the accessed iteration argument
1712 // subdomains explicit.
1713
1714 Type operandType = opOperand.get().getType();
1715 if (!isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
1716 linalgOp.isDpsInit(&opOperand))) {
1717 allSliceParams.push_back(std::nullopt);
1718 LLVM_DEBUG(llvm::dbgs()
1719 << ": not tiled: use shape: " << operandType << "\n");
1720 continue;
1721 }
1722 LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
1723
1724 allSliceParams.push_back(computeSliceParameters(
1725 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
1726 omitPartialTileCheck));
1727 }
1728
1729 return allSliceParams;
1730}
1731
1733 LinalgOp linalgOp, ValueRange valuesToTile,
1735 ArrayRef<OpFoldResult> tileSizes,
1736 ArrayRef<OpFoldResult> sizeBounds,
1737 bool omitPartialTileCheck) {
1739 computeAllSliceParameters(builder, loc, linalgOp, valuesToTile, ivs,
1740 tileSizes, sizeBounds, omitPartialTileCheck);
1741 SmallVector<Value> tiledShapes;
1742 for (auto item : llvm::zip(valuesToTile, allSliceParameter)) {
1743 Value valueToTile = std::get<0>(item);
1744 std::optional<SliceParameters> sliceParams = std::get<1>(item);
1745 tiledShapes.push_back(
1746 sliceParams.has_value()
1747 ? materializeTiledShape(builder, loc, valueToTile, *sliceParams)
1748 ->getResult(0)
1749 : valueToTile);
1750 }
1751 return tiledShapes;
1752}
1753
1754void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
1755 ArrayRef<OpFoldResult> offsets) {
1756 IRRewriter rewriter(b);
1757 offsetIndices(rewriter, linalgOp, offsets);
1758}
1759
1760void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
1761 ArrayRef<OpFoldResult> offsets) {
1762 if (!linalgOp.hasIndexSemantics())
1763 return;
1764
1765 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
1766 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
1767 continue;
1769 b.setInsertionPointAfter(indexOp);
1770 AffineExpr index, offset;
1771 bindDims(b.getContext(), index, offset);
1773 b, indexOp.getLoc(), index + offset,
1774 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
1775 Value materialized =
1776 getValueOrCreateConstantIndexOp(b, indexOp.getLoc(), applied);
1777 b.replaceUsesWithIf(indexOp, materialized, [&](OpOperand &use) {
1778 return use.getOwner() != materialized.getDefiningOp();
1779 });
1780 }
1781}
1782
1783/// Get the reassociation maps to fold the result of a extract_slice (or source
1784/// of a insert_slice) operation with given offsets, and sizes to its
1785/// rank-reduced version. This is only done for the cases where the size is 1
1786/// and offset is 0. Strictly speaking the offset 0 is not required in general,
1787/// but non-zero offsets are not handled by SPIR-V backend at this point (and
1788/// potentially cannot be handled).
1789std::optional<SmallVector<ReassociationIndices>>
1793 for (const auto &it : llvm::enumerate(mixedSizes)) {
1794 auto dim = it.index();
1795 auto size = it.value();
1796 curr.push_back(dim);
1797 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
1798 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
1799 continue;
1800 reassociation.emplace_back(ReassociationIndices{});
1801 std::swap(reassociation.back(), curr);
1802 }
1803 // When the reassociations are not empty, then fold the remaining
1804 // unit-dimensions into the last dimension. If the reassociations so far is
1805 // empty, then leave it emtpy. This will fold everything to a rank-0 tensor.
1806 if (!curr.empty() && !reassociation.empty())
1807 reassociation.back().append(curr.begin(), curr.end());
1808 return reassociation;
1809}
1810
1811} // namespace linalg
1812} // namespace mlir
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
Definition Utils.cpp:150
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition Utils.cpp:74
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
Definition Utils.cpp:126
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition PDL.cpp:62
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
ArrayAttr()
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
Affine binary operation expression.
Definition AffineExpr.h:214
AffineExpr getLHS() const
AffineExpr getRHS() const
An integer constant appearing in affine expression.
Definition AffineExpr.h:239
int64_t getValue() const
A dimensional identifier appearing in an affine expression.
Definition AffineExpr.h:223
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition AffineExpr.h:68
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr > > exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Attributes are known-constant values of operations.
Definition Attributes.h:25
This class represents an argument of a Block.
Definition Value.h:309
unsigned getArgNumber() const
Returns the number of this argument.
Definition Value.h:321
Block * getOwner() const
Returns the block that owns this argument.
Definition Value.h:318
Block represents an ordered list of Operations.
Definition Block.h:33
BlockArgument getArgument(unsigned i)
Definition Block.h:129
unsigned getNumArguments()
Definition Block.h:128
OpListType & getOperations()
Definition Block.h:137
Operation & front()
Definition Block.h:153
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:244
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition Block.h:212
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:108
MLIRContext * getContext() const
Definition Builders.h:56
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:348
This class helps build Operations.
Definition Builders.h:207
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
unsigned getResultNumber() const
Returns the number of this result.
Definition Value.h:469
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Value getOperand(unsigned idx)
Definition Operation.h:350
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & front()
Definition Region.h:65
bool hasOneBlock()
Return true if this region has exactly one block.
Definition Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
Definition Types.cpp:108
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
Operation * getOwner() const
Return the owner of this operand.
Definition UseDefLists.h:38
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
bool isaConvolutionOpOfType< linalg::DepthwiseConv3DNdhwcDhwcmOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:829
bool isaConvolutionOpOfType< linalg::PoolingNhwcSumOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:978
bool isaConvolutionOpOfType< linalg::DepthwiseConv2DNchwChwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:779
SmallVector< int64_t > getUnPackInverseSrcPerm(linalg::UnPackOp, PackingMetadata &metadata)
Compute inverse permutation for the source tensor (i.e.
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition Utils.cpp:1732
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
Definition Utils.cpp:195
bool isaConvolutionOpOfType< linalg::Conv1DOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:441
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
Definition Utils.cpp:230
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition Utils.cpp:1627
bool isaConvolutionOpOfType< linalg::Conv1DNcwFcwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:520
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition Utils.cpp:1183
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
Definition Utils.cpp:1303
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
Definition Utils.cpp:1613
bool isaConvolutionOpOfType< linalg::Conv2DOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:561
bool isaConvolutionOpOfType< linalg::DepthwiseConv1DNwcWcOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:698
static bool bodyMatcherForMinUnsignedPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:323
bool isaConvolutionOpOfType< linalg::Conv3DOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:606
bool isaConvolutionOpOfType< linalg::PoolingNhwcMaxOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:884
static bool bodyMatcherForMaxSignedPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:304
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
Definition Utils.cpp:234
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
Definition Utils.cpp:201
static AffineExpr getAffineMapDim(ArrayAttr indexingMaps, uint32_t mapIndex, uint32_t dimIndex)
Definition Utils.cpp:332
static bool bodyMatcherForPoolOps(Value yieldVal, Block *body)
Utility to match block body for linalg.pool* ops.
Definition Utils.cpp:288
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
Definition Utils.cpp:1790
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
Definition Utils.h:262
@ None
No Distribution.
Definition Utils.h:307
@ CyclicNumProcsGeNumIters
Cyclic distribution where the number of processors can be assumed to be more than or equal to the num...
Definition Utils.h:292
@ Cyclic
Cyclic distribution where no assumption is made about the dynamic relationship between number of proc...
Definition Utils.h:274
@ CyclicNumProcsEqNumIters
Cyclic distribution where the number of processors can be assumed to be equal to the number of iterat...
Definition Utils.h:304
static bool bodyMatcherForMaxUnsignedPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:311
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
Definition Utils.cpp:1652
bool isaConvolutionOpInterface(LinalgOp linalgOp, bool allowEmptyConvolvedDims=false)
Checks whether linalgOp conforms to ConvolutionOpInterface.
bool isaConvolutionOpOfType< linalg::DepthwiseConv1DNcwCwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:658
static BlockArgument getBlockArgumentWithOptionalExtOps(Value val)
Returns the BlockArgument that leads to val, if any.
Definition Utils.cpp:244
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition Utils.cpp:215
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition Utils.cpp:1754
static bool bodyMatcherForSumPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:328
SmallVector< int64_t > getPackInverseDestPerm(linalg::PackOp packOp, PackingMetadata &metadata)
Compute inverse permutation for the destination tensor (i.e.
bool isaConvolutionOpOfType< linalg::DepthwiseConv1DNwcWcmOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:738
bool isaConvolutionOpOfType< linalg::PoolingNhwcMinOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:931
static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body)
Utility to match block body for convolution ops.
Definition Utils.cpp:263
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Definition Utils.cpp:1681
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition Utils.cpp:1483
static bool convLayoutMatches(ArrayRef< ArrayRef< AffineExpr > > mapListExpected, ArrayAttr indexingMaps, MLIRContext *context)
Returns true if the given indexing maps matches with the expected indexing maps.
Definition Utils.cpp:425
static bool bodyMatcherForMinSignedPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:316
static bool matchConvDimAddExprPattern(ArrayAttr indexingMaps, unsigned iDim, unsigned fDim, unsigned oDim, int64_t &dilation, int64_t &stride)
Given an array of AffineMaps indexingMaps verify the following commutatively:- indexingMaps[0]....
Definition Utils.cpp:387
bool isaConvolutionOpOfType< linalg::PoolingNhwcMinUnsignedOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:1072
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
Definition Utils.cpp:1464
bool isaConvolutionOpOfType< linalg::Conv1DNwcWcfOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:479
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
Definition Utils.cpp:1115
static int64_t isDimTimesConstantOrDimOnly(AffineExpr expr, AffineExpr &dim)
Check if expr is either:
Definition Utils.cpp:345
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition Utils.cpp:1282
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition Utils.cpp:1643
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition Utils.cpp:1496
bool isaConvolutionOpOfType< linalg::PoolingNhwcMaxUnsignedOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:1025
auto m_Val(Value v)
Definition Matchers.h:539
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition SCF.cpp:837
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition SCF.h:64
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Definition Utils.cpp:23
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition Matchers.h:490
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
detail::NameOpMatcher m_Op(StringRef opName)
Matches a named operation.
Definition Matchers.h:379
@ Mul
RHS of mul is always a constant or a symbolic expression.
Definition AffineExpr.h:43
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
llvm::TypeSwitch< T, ResultT > TypeSwitch
Definition LLVM.h:144
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
detail::op_matcher< OpClass > m_Op()
Matches the given OpClass.
Definition Matchers.h:484
SmallVector< int64_t, 2 > ReassociationIndices
Definition Utils.h:27
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition Matchers.h:369
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Definition Utils.h:103
Value _and(Value lhs, Value rhs)
Definition Utils.cpp:311
Value slt(Value lhs, Value rhs)
Definition Utils.cpp:334
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition Utils.h:376
static void doit(OpBuilder &b, Location loc, ArrayRef< Range > loopRanges, LinalgOp linalgOp, ArrayRef< utils::IteratorType > iteratorTypes, function_ref< scf::ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilderFn, ArrayRef< linalg::ProcInfo > procInfo={})
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition Utils.h:312
DistributionMethod distributionMethod
Definition Utils.h:315
static std::optional< BinaryOpKind > matchAsScalarBinaryOp(GenericOp op)
Matches the given linalg op if its body is performing binary operation on int or float scalar values ...
Definition Utils.cpp:93
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition Utils.h:158
SmallVector< OpFoldResult > strides
Definition Utils.h:161
SmallVector< OpFoldResult > sizes
Definition Utils.h:160
SmallVector< OpFoldResult > offsets
Definition Utils.h:159
LoopVector loops
Definition SCF.h:67