MLIR 22.0.0git
Utils.cpp
Go to the documentation of this file.
1//===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utilities for the Linalg dialect.
10//
11//===----------------------------------------------------------------------===//
12
14
29#include "mlir/IR/AffineExpr.h"
31#include "mlir/IR/AffineMap.h"
32#include "mlir/IR/Matchers.h"
33#include "llvm/ADT/TypeSwitch.h"
34#include "llvm/Support/Debug.h"
35#include <optional>
36
37#define DEBUG_TYPE "linalg-utils"
38
39using namespace mlir;
40using namespace presburger;
41using namespace mlir::affine;
42using namespace mlir::linalg;
43using namespace mlir::scf;
44
45namespace {
46
47// Helper visitor to determine whether an AffineExpr is tiled.
48// This is achieved by traversing every AffineDimExpr with position `pos` and
49// checking whether the corresponding `tileSizes[pos]` is non-zero.
50// This also enforces only positive coefficients occur in multiplications.
51//
52// Example:
53// `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
54//
55struct TileCheck : public AffineExprVisitor<TileCheck> {
56 TileCheck(ArrayRef<OpFoldResult> tileSizes) : tileSizes(tileSizes) {}
57
58 void visitDimExpr(AffineDimExpr expr) {
59 isTiled |= !isZeroInteger(tileSizes[expr.getPosition()]);
60 }
61 void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
62 visit(expr.getLHS());
63 visit(expr.getRHS());
65 assert(cast<AffineConstantExpr>(expr.getRHS()).getValue() > 0 &&
66 "nonpositive multiplying coefficient");
67 }
68 bool isTiled = false;
69 ArrayRef<OpFoldResult> tileSizes;
70};
71
72} // namespace
73
74static bool isTiled(AffineExpr expr, ArrayRef<OpFoldResult> tileSizes) {
75 if (!expr)
76 return false;
77 TileCheck t(tileSizes);
78 t.visit(expr);
79 return t.isTiled;
80}
81
82// Checks whether the `map varies with respect to a non-zero `tileSize`.
83static bool isTiled(AffineMap map, ArrayRef<OpFoldResult> tileSizes) {
84 if (!map)
85 return false;
86 for (unsigned r = 0; r < map.getNumResults(); ++r)
87 if (isTiled(map.getResult(r), tileSizes))
88 return true;
89 return false;
90}
91
92std::optional<RegionMatcher::BinaryOpKind>
94 auto &region = op.getRegion();
95 if (!region.hasOneBlock())
96 return std::nullopt;
97
98 Block &block = region.front();
99 if (block.getNumArguments() != 2 ||
102 return std::nullopt;
103
104 auto &ops = block.getOperations();
105 if (!llvm::hasSingleElement(block.without_terminator()))
106 return std::nullopt;
107
109 auto a = m_Val(block.getArgument(0));
110 auto b = m_Val(block.getArgument(1));
111
112 auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
113 if (addPattern.match(&ops.back()))
114 return BinaryOpKind::IAdd;
115
116 return std::nullopt;
117}
118
119/// Explicit instantiation of loop nest generator for different loop types.
123
124/// Given a list of subview ranges, extract individual values for lower, upper
125/// bounds and steps and put them into the corresponding vectors.
126static void unpackRanges(OpBuilder &builder, Location loc,
129 SmallVectorImpl<Value> &steps) {
130 for (Range range : ranges) {
131 lbs.emplace_back(
132 getValueOrCreateConstantIndexOp(builder, loc, range.offset));
133 ubs.emplace_back(getValueOrCreateConstantIndexOp(builder, loc, range.size));
134 steps.emplace_back(
135 getValueOrCreateConstantIndexOp(builder, loc, range.stride));
136 }
137}
138
139//===----------------------------------------------------------------------===//
140// General utilities
141//===----------------------------------------------------------------------===//
142//
143/// The permutation can be obtained from two permutations:
144/// a) Compute the permutation vector to move the last `numPackedDims` into
145/// the `innerPosDims` of a shape of rank `rank`.
146/// b) Compute the permutation vector to move outer dims if the
147/// `outerPerm` parameter is not empty.
148/// Apply (b) permutation on (a) permutation to get the final permutation.
149static SmallVector<int64_t>
151 ArrayRef<int64_t> &outerPerm,
152 PackingMetadata &packingMetadata) {
153 int64_t numPackedDims = innerDimsPos.size();
154 auto lastDims =
155 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
156 packingMetadata = computePackingMetadata(rank, innerDimsPos);
157 SmallVector<int64_t> innerPositionsPerm =
158 computePermutationVector(rank, lastDims, packingMetadata.insertPositions);
159
160 SmallVector<int64_t> outerPos = packingMetadata.outerPositions;
161 if (!outerPerm.empty())
162 applyPermutationToVector(outerPos, outerPerm);
163 SmallVector<int64_t> outerPositionPerm =
164 computePermutationVector(rank, packingMetadata.outerPositions, outerPos);
165
166 SmallVector<int64_t> packInverseDestPermutation = innerPositionsPerm;
167 applyPermutationToVector(packInverseDestPermutation, outerPositionPerm);
168 return packInverseDestPermutation;
169}
170
171namespace mlir {
172namespace linalg {
173
175 PackingMetadata &metadata) {
176
177 int64_t packedRank = packOp.getDestType().getRank();
178 ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos();
179 ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm();
180 SmallVector<int64_t> packInvDestPerm =
181 computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata);
182 return packInvDestPerm;
183}
184
186 PackingMetadata &metadata) {
187 int64_t packedRank = unpackOp.getSourceType().getRank();
188 ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
189 ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm();
190 SmallVector<int64_t> unpackInvSrcPerm =
191 computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata);
192 return unpackInvSrcPerm;
193}
194
196 return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {
197 return m.isProjectedPermutation(/*allowZeroInResults=*/true);
198 });
199}
200
202 if (!r.hasOneBlock())
203 return false;
204 for (Operation &op : r.front()) {
205 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
206 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
208 llvm::any_of(op.getResultTypes(),
209 [](Type type) { return !type.isIntOrIndexOrFloat(); }))
210 return false;
211 }
212 return true;
213}
214
215bool isElementwise(LinalgOp op) {
216 if (op.getNumLoops() != op.getNumParallelLoops())
217 return false;
218
220 return false;
221
222 // TODO: relax the restrictions on indexing map.
223 for (OpOperand &opOperand : op.getDpsInitsMutable()) {
224 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
225 return false;
226 }
227 return hasOnlyScalarElementwiseOp(op->getRegion(0));
228}
229
230bool isParallelIterator(utils::IteratorType iteratorType) {
231 return iteratorType == utils::IteratorType::parallel;
232}
233
234bool isReductionIterator(utils::IteratorType iteratorType) {
235 return iteratorType == utils::IteratorType::reduction;
236}
237
238//===----------------------------------------------------------------------===//
239// Convolution matcher utilities
240//===----------------------------------------------------------------------===//
241
242/// Returns the BlockArgument that leads to `val`, if any. Traverses optional
243/// ext* ops.
245 BlockArgument blockArg = dyn_cast<BlockArgument>(val);
246 if ((blockArg))
247 return blockArg;
248
249 Operation *defOp = val.getDefiningOp();
250 if (!dyn_cast_if_present<arith::ExtFOp>(defOp) &&
251 !dyn_cast_if_present<arith::ExtSIOp>(defOp) &&
252 !dyn_cast_if_present<arith::ExtUIOp>(defOp)) {
253 return nullptr;
254 }
255 return dyn_cast<BlockArgument>(defOp->getOperand(0));
256}
257
258/// Utility to match block body for convolution ops.
259/// The body is thus expected to yield :-
260/// %out + (%lhs * %rhs)
261/// where: %lhs, %rhs and %out are block arguments and
262/// %lhs and %rhs can have optional upcast operation.
263static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body) {
264 Operation *addOp = yieldVal.getDefiningOp();
265 if (!isa_and_present<arith::AddIOp, arith::AddFOp>(addOp))
266 return false;
267
268 Operation *mulOp = addOp->getOperand(1).getDefiningOp();
269 if (!isa_and_present<arith::MulIOp, arith::MulFOp>(mulOp))
270 return false;
271
272 BlockArgument lhsBlockArg =
274 BlockArgument rhsBlockArg =
276 BlockArgument outBlockArg =
278 if (!lhsBlockArg || !rhsBlockArg || !outBlockArg ||
279 lhsBlockArg.getOwner() != body || rhsBlockArg.getOwner() != body ||
280 outBlockArg.getOwner() != body || lhsBlockArg.getArgNumber() != 0 ||
281 rhsBlockArg.getArgNumber() != 1 || outBlockArg.getArgNumber() != 2)
282 return false;
283 return true;
284}
285
286/// Utility to match block body for linalg.pool* ops.
287template <typename... OpTypes>
288static bool bodyMatcherForPoolOps(Value yieldVal, Block *body) {
289 Operation *defOp = yieldVal.getDefiningOp();
290 if (!(isa_and_present<OpTypes>(defOp) || ...))
291 return false;
292
293 BlockArgument lhsArg =
295 BlockArgument rhsArg =
297 if (!lhsArg || !rhsArg || lhsArg.getOwner() != body ||
298 rhsArg.getOwner() != body || lhsArg.getArgNumber() != 2 ||
299 rhsArg.getArgNumber() != 0)
300 return false;
301 return true;
302}
303
304static bool bodyMatcherForMaxSignedPoolOps(Value yieldVal, Block *body) {
306 body);
307}
308
309// max_unsigned ops should not allow float data type.
310// TODO(#164800): Retire OPDSL logic.
311static bool bodyMatcherForMaxUnsignedPoolOps(Value yieldVal, Block *body) {
313 body);
314}
315
316static bool bodyMatcherForMinSignedPoolOps(Value yieldVal, Block *body) {
318 body);
319}
320
321// min_unsigned ops should not allow float data type.
322// TODO(#164800): Retire OPDSL logic.
323static bool bodyMatcherForMinUnsignedPoolOps(Value yieldVal, Block *body) {
325 body);
326}
327
328static bool bodyMatcherForSumPoolOps(Value yieldVal, Block *body) {
330}
331
332static AffineExpr getAffineMapDim(ArrayAttr indexingMaps, uint32_t mapIndex,
333 uint32_t dimIndex) {
334 auto affineMap = cast<AffineMapAttr>(indexingMaps[mapIndex]).getValue();
335 if (dimIndex < affineMap.getNumResults())
336 return affineMap.getResult(dimIndex);
337 return nullptr;
338}
339
340/// Check if `expr` is either:
341/// - a dimension expr alone (implying multiplication by 1), or
342/// - a multiplication of dimension expr by any positive constant != 1
343/// In both cases we will capture the dimension expression into `dim` and
344/// return the constant multiplier. Returns -1 in case of a match failure.
346 if ((dim = dyn_cast<AffineDimExpr>(expr)))
347 return 1;
348
349 auto mulExpr = dyn_cast<AffineBinaryOpExpr>(expr);
350 if (!mulExpr || mulExpr.getKind() != AffineExprKind::Mul)
351 return -1;
352
353 AffineExpr lhs = mulExpr.getLHS();
354 AffineExpr rhs = mulExpr.getRHS();
355
356 AffineConstantExpr cst = nullptr;
357 if (((dim = dyn_cast<AffineDimExpr>(lhs)) &&
358 (cst = dyn_cast<AffineConstantExpr>(rhs))) ||
359 ((dim = dyn_cast<AffineDimExpr>(rhs)) &&
360 (cst = dyn_cast<AffineConstantExpr>(lhs))))
361 return cst.getValue();
362 return -1;
363}
364
365/// Given an array of AffineMaps `indexingMaps` verify the following
366/// commutatively:-
367/// indexingMaps[0].getResult(iDim) ==
368/// indexingMaps[1].getResult(fDim) * <c0> +
369/// indexingMaps[n-1].getResult(oDim) * <c1>
370/// where,
371/// - c0 and c1 can be any constant,
372/// - n is the size of the indexingMaps' array,
373/// - 0, 1 and n-1 are input, filter and output map indices respectively,
374/// - iDim, fDim and oDim are the input, filter and output dimension
375/// indices in their respective indexing maps
376/// Example:
377/// #inputMap = affine_map<(d0, d1, d2, d3, d4, d5, d6)
378/// -> (d0, d1 * 2 + d4 * 3, d2 + d5, d6)>
379/// #filterMap = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
380/// #outputMap = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
381///
382/// Here,
383/// #inputMap[1] = #outputMap[1] * 2 + #filterMap[0] * 3
384/// Therefore,
385/// matchConvDimAddExprPattern(indexingMaps, 1, 0, 1, dilation, stride)
386/// would return true and update dilation = 3 and stride = 2
387static bool matchConvDimAddExprPattern(ArrayAttr indexingMaps, unsigned iDim,
388 unsigned fDim, unsigned oDim,
389 int64_t &dilation, int64_t &stride) {
390 unsigned inputMapIdx = 0, filterMapIdx = 1,
391 outputMapIdx = indexingMaps.size() - 1;
392 AffineExpr inpExpr = getAffineMapDim(indexingMaps, inputMapIdx, iDim);
393 auto addExpr = dyn_cast<AffineBinaryOpExpr>(inpExpr);
394 if (!addExpr || addExpr.getKind() != AffineExprKind::Add)
395 return false;
396
397 AffineExpr dim0, dim1;
398 int64_t c0 = isDimTimesConstantOrDimOnly(addExpr.getLHS(), dim0);
399 int64_t c1 = isDimTimesConstantOrDimOnly(addExpr.getRHS(), dim1);
400
401 if (c0 == -1 || c1 == -1)
402 return false;
403 // Pattern matched with dims and constants extracted.
404 AffineExpr fExpr = getAffineMapDim(indexingMaps, filterMapIdx, fDim);
405 AffineExpr oExpr = getAffineMapDim(indexingMaps, outputMapIdx, oDim);
406 if (dim0 == fExpr && dim1 == oExpr) {
407 dilation = c0;
408 stride = c1;
409 return true;
410 }
411 if (dim1 == fExpr && dim0 == oExpr) {
412 dilation = c1;
413 stride = c0;
414 return true;
415 }
416 return false;
417}
418
419// ---------------------------------------------
420// Matchers for specific convolution operation.
421// ---------------------------------------------
422
423/// Returns true if the given indexing maps matches with the expected indexing
424/// maps.
426 ArrayAttr indexingMaps, MLIRContext *context) {
427 SmallVector<AffineMap, 4> expectedIndexingMaps =
428 AffineMap::inferFromExprList(mapListExpected, context);
429 return indexingMaps ==
430 ArrayAttr::get(
431 context, llvm::to_vector<4>(llvm::map_range(
432 expectedIndexingMaps, [&](AffineMap m) -> Attribute {
433 return AffineMapAttr::get(m);
434 })));
435}
436
437// #inputMap = affine_map<(N, W, C, w) -> (N, W + w, C)>
438// #filterMap = affine_map<(N, W, C, w) -> (w, C)>
439// #outputMap = affine_map<(N, W, C, w) -> (N, W, C)>
440template <>
442 LinalgOp op, SmallVector<int64_t> *dilations,
443 SmallVector<int64_t> *strides) {
444 if (isa<linalg::DepthwiseConv1DNwcWcOp>(op))
445 return true;
446
447 assert(isaConvolutionOpInterface(op) &&
448 "expected op to implement ConvolutionOpInterface");
449
450 *dilations = SmallVector<int64_t>(1, 1);
451 *strides = SmallVector<int64_t>(1, 1);
452 MLIRContext *context = op->getContext();
453 AffineExpr N = getAffineDimExpr(0, context);
454 AffineExpr W = getAffineDimExpr(1, context);
455 AffineExpr C = getAffineDimExpr(2, context);
456 AffineExpr w = getAffineDimExpr(3, context);
457 ArrayAttr indexingMaps = op.getIndexingMaps();
458 // First fetch dilations/strides :-
459 // Match: W * stride + w * dilation
460 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
461 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
462 return false;
463 // Match expected indexing maps
465 {/*inputMap=*/{N, W * (*strides)[0] + w * (*dilations)[0], C},
466 /*filterMap=*/{w, C},
467 /*outputMap=*/{N, W, C}},
468 indexingMaps, context))
469 return false;
470 // Match body
471 Block *body = op.getBlock();
472 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
473 Value yieldVal = yieldOp.getOperand(0);
474 return bodyMatcherForConvolutionOps(yieldVal, body);
475}
476
477// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, C, H + h, W + w)>
478// #filterMap = affine_map<(N, H, W, C, h, w) -> (C, h, w)>
479// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, C, H, W)>
480template <>
482 LinalgOp op, SmallVector<int64_t> *dilations,
483 SmallVector<int64_t> *strides) {
484 if (isa<linalg::DepthwiseConv2DNchwChwOp>(op))
485 return true;
486
487 assert(isaConvolutionOpInterface(op) &&
488 "expected op to implement ConvolutionOpInterface");
489
490 *dilations = SmallVector<int64_t>(2, 1);
491 *strides = SmallVector<int64_t>(2, 1);
492 MLIRContext *context = op->getContext();
493 AffineExpr N = getAffineDimExpr(0, context);
494 AffineExpr H = getAffineDimExpr(1, context);
495 AffineExpr W = getAffineDimExpr(2, context);
496 AffineExpr C = getAffineDimExpr(3, context);
497 AffineExpr h = getAffineDimExpr(4, context);
498 AffineExpr w = getAffineDimExpr(5, context);
499 ArrayAttr indexingMaps = op.getIndexingMaps();
500 // First fetch dilations/strides :-
501 // Match: H * stride + h * dilation
502 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
503 /*oDim=*/2, (*dilations)[0], (*strides)[0]))
504 return false;
505 // Match: W * stride + w * dilation
506 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/3, /*fDim=*/2,
507 /*oDim=*/3, (*dilations)[1], (*strides)[1]))
508 return false;
509 // Match expected indexing maps
511 {/*inputMap=*/{N, C, H * (*strides)[0] + h * (*dilations)[0],
512 W * (*strides)[1] + w * (*dilations)[1]},
513 /*filterMap=*/{C, h, w},
514 /*outputMap=*/{N, C, H, W}},
515 indexingMaps, context))
516 return false;
517 // Match body
518 Block *body = op.getBlock();
519 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
520 Value yieldVal = yieldOp.getOperand(0);
521 return bodyMatcherForConvolutionOps(yieldVal, body);
522}
523
524// #inputMap = affine_map<(N, D, H, W, CM, d, h, w, C)
525// -> (N, D + d, H + h, W + w, C)>
526// #filterMap = affine_map<(N, D, H, W, CM, d, h, w, C)
527// -> (d, h, w, C, CM)>
528// #outputMap = affine_map<(N, D, H, W, CM, d, h, w, C)
529// -> (N, D, H, W, C, CM)>
530template <>
532 LinalgOp op, SmallVector<int64_t> *dilations,
533 SmallVector<int64_t> *strides) {
534 if (isa<linalg::DepthwiseConv3DNdhwcDhwcmOp>(op))
535 return true;
536
537 assert(isaConvolutionOpInterface(op) &&
538 "expected op to implement ConvolutionOpInterface");
539
540 *dilations = SmallVector<int64_t>(3, 1);
541 *strides = SmallVector<int64_t>(3, 1);
542 MLIRContext *context = op->getContext();
543 AffineExpr N = getAffineDimExpr(0, context);
544 AffineExpr D = getAffineDimExpr(1, context);
545 AffineExpr H = getAffineDimExpr(2, context);
546 AffineExpr W = getAffineDimExpr(3, context);
547 AffineExpr CM = getAffineDimExpr(4, context);
548 AffineExpr d = getAffineDimExpr(5, context);
549 AffineExpr h = getAffineDimExpr(6, context);
550 AffineExpr w = getAffineDimExpr(7, context);
551 AffineExpr C = getAffineDimExpr(8, context);
552 ArrayAttr indexingMaps = op.getIndexingMaps();
553 // First fetch dilations/strides :-
554 // Match: D * stride + d * dilation
555 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
556 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
557 return false;
558 // Match: H * stride + h * dilation
559 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
560 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
561 return false;
562 // Match: W * stride + w * dilation
563 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/3, /*fDim=*/2,
564 /*oDim=*/3, (*dilations)[2], (*strides)[2]))
565 return false;
566 // Match expected indexing maps
568 {/*inputMap=*/{N, D * (*strides)[0] + d * (*dilations)[0],
569 H * (*strides)[1] + h * (*dilations)[1],
570 W * (*strides)[2] + w * (*dilations)[2], C},
571 /*filterMap=*/{d, h, w, C, CM},
572 /*outputMap=*/{N, D, H, W, C, CM}},
573 indexingMaps, context))
574 return false;
575 // Match body
576 Block *body = op.getBlock();
577 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
578 Value yieldVal = yieldOp.getOperand(0);
579 return bodyMatcherForConvolutionOps(yieldVal, body);
580}
581
582// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
583// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
584// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
585template <>
587 LinalgOp op, SmallVector<int64_t> *dilations,
588 SmallVector<int64_t> *strides) {
589 if (isa<linalg::PoolingNhwcMaxOp>(op))
590 return true;
591
592 assert(isaConvolutionOpInterface(op) &&
593 "expected op to implement ConvolutionOpInterface");
594
595 *dilations = SmallVector<int64_t>(2, 1);
596 *strides = SmallVector<int64_t>(2, 1);
597 MLIRContext *context = op->getContext();
598 AffineExpr N = getAffineDimExpr(0, context);
599 AffineExpr H = getAffineDimExpr(1, context);
600 AffineExpr W = getAffineDimExpr(2, context);
601 AffineExpr C = getAffineDimExpr(3, context);
602 AffineExpr h = getAffineDimExpr(4, context);
603 AffineExpr w = getAffineDimExpr(5, context);
604 ArrayAttr indexingMaps = op.getIndexingMaps();
605 // First fetch dilations/strides :-
606 // Match: H * stride + h * dilation
607 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
608 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
609 return false;
610 // Match: W * stride + w * dilation
611 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
612 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
613 return false;
614 // Match expected indexing maps
616 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
617 W * (*strides)[1] + w * (*dilations)[1], C},
618 /*filterMap=*/{h, w},
619 /*outputMap=*/{N, H, W, C}},
620 indexingMaps, context))
621 return false;
622 // Match body
623 Block *body = op.getBlock();
624 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
625 Value yieldVal = yieldOp.getOperand(0);
626 return bodyMatcherForMaxSignedPoolOps(yieldVal, body);
627}
628
629// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
630// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
631// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
632template <>
634 LinalgOp op, SmallVector<int64_t> *dilations,
635 SmallVector<int64_t> *strides) {
636 if (isa<linalg::PoolingNhwcMinOp>(op))
637 return true;
638
639 assert(isaConvolutionOpInterface(op) &&
640 "expected op to implement ConvolutionOpInterface");
641
642 *dilations = SmallVector<int64_t>(2, 1);
643 *strides = SmallVector<int64_t>(2, 1);
644 MLIRContext *context = op->getContext();
645 AffineExpr N = getAffineDimExpr(0, context);
646 AffineExpr H = getAffineDimExpr(1, context);
647 AffineExpr W = getAffineDimExpr(2, context);
648 AffineExpr C = getAffineDimExpr(3, context);
649 AffineExpr h = getAffineDimExpr(4, context);
650 AffineExpr w = getAffineDimExpr(5, context);
651 ArrayAttr indexingMaps = op.getIndexingMaps();
652 // First fetch dilations/strides :-
653 // Match: H * stride + h * dilation
654 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
655 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
656 return false;
657 // Match: W * stride + w * dilation
658 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
659 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
660 return false;
661 // Match expected indexing maps
663 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
664 W * (*strides)[1] + w * (*dilations)[1], C},
665 /*filterMap=*/{h, w},
666 /*outputMap=*/{N, H, W, C}},
667 indexingMaps, context))
668 return false;
669 // Match body
670 Block *body = op.getBlock();
671 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
672 Value yieldVal = yieldOp.getOperand(0);
673 return bodyMatcherForMinSignedPoolOps(yieldVal, body);
674}
675
676// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
677// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
678// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
679template <>
681 LinalgOp op, SmallVector<int64_t> *dilations,
682 SmallVector<int64_t> *strides) {
683 if (isa<linalg::PoolingNhwcSumOp>(op))
684 return true;
685
686 assert(isaConvolutionOpInterface(op) &&
687 "expected op to implement ConvolutionOpInterface");
688
689 *dilations = SmallVector<int64_t>(2, 1);
690 *strides = SmallVector<int64_t>(2, 1);
691 MLIRContext *context = op->getContext();
692 AffineExpr N = getAffineDimExpr(0, context);
693 AffineExpr H = getAffineDimExpr(1, context);
694 AffineExpr W = getAffineDimExpr(2, context);
695 AffineExpr C = getAffineDimExpr(3, context);
696 AffineExpr h = getAffineDimExpr(4, context);
697 AffineExpr w = getAffineDimExpr(5, context);
698 ArrayAttr indexingMaps = op.getIndexingMaps();
699 // First fetch dilations/strides :-
700 // Match: H * stride + h * dilation
701 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
702 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
703 return false;
704 // Match: W * stride + w * dilation
705 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
706 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
707 return false;
708 // Match expected indexing maps
710 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
711 W * (*strides)[1] + w * (*dilations)[1], C},
712 /*filterMap=*/{h, w},
713 /*outputMap=*/{N, H, W, C}},
714 indexingMaps, context))
715 return false;
716 // Match body
717 Block *body = op.getBlock();
718 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
719 Value yieldVal = yieldOp.getOperand(0);
720 return bodyMatcherForSumPoolOps(yieldVal, body);
721}
722
723// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
724// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
725// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
726template <>
728 LinalgOp op, SmallVector<int64_t> *dilations,
729 SmallVector<int64_t> *strides) {
730 if (isa<linalg::PoolingNhwcMaxUnsignedOp>(op))
731 return true;
732
733 assert(isaConvolutionOpInterface(op) &&
734 "expected op to implement ConvolutionOpInterface");
735
736 *dilations = SmallVector<int64_t>(2, 1);
737 *strides = SmallVector<int64_t>(2, 1);
738 MLIRContext *context = op->getContext();
739 AffineExpr N = getAffineDimExpr(0, context);
740 AffineExpr H = getAffineDimExpr(1, context);
741 AffineExpr W = getAffineDimExpr(2, context);
742 AffineExpr C = getAffineDimExpr(3, context);
743 AffineExpr h = getAffineDimExpr(4, context);
744 AffineExpr w = getAffineDimExpr(5, context);
745 ArrayAttr indexingMaps = op.getIndexingMaps();
746 // First fetch dilations/strides :-
747 // Match: H * stride + h * dilation
748 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
749 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
750 return false;
751 // Match: W * stride + w * dilation
752 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
753 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
754 return false;
755 // Match expected indexing maps
757 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
758 W * (*strides)[1] + w * (*dilations)[1], C},
759 /*filterMap=*/{h, w},
760 /*outputMap=*/{N, H, W, C}},
761 indexingMaps, context))
762 return false;
763 // Match body
764 Block *body = op.getBlock();
765 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
766 Value yieldVal = yieldOp.getOperand(0);
767 return bodyMatcherForMaxUnsignedPoolOps(yieldVal, body);
768}
769
770// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)>
771// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)>
772// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)>
773template <>
775 LinalgOp op, SmallVector<int64_t> *dilations,
776 SmallVector<int64_t> *strides) {
777 if (isa<linalg::PoolingNhwcMinUnsignedOp>(op))
778 return true;
779
780 assert(isaConvolutionOpInterface(op) &&
781 "expected op to implement ConvolutionOpInterface");
782
783 *dilations = SmallVector<int64_t>(2, 1);
784 *strides = SmallVector<int64_t>(2, 1);
785 MLIRContext *context = op->getContext();
786 AffineExpr N = getAffineDimExpr(0, context);
787 AffineExpr H = getAffineDimExpr(1, context);
788 AffineExpr W = getAffineDimExpr(2, context);
789 AffineExpr C = getAffineDimExpr(3, context);
790 AffineExpr h = getAffineDimExpr(4, context);
791 AffineExpr w = getAffineDimExpr(5, context);
792 ArrayAttr indexingMaps = op.getIndexingMaps();
793 // First fetch dilations/strides :-
794 // Match: H * stride + h * dilation
795 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0,
796 /*oDim=*/1, (*dilations)[0], (*strides)[0]))
797 return false;
798 // Match: W * stride + w * dilation
799 if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1,
800 /*oDim=*/2, (*dilations)[1], (*strides)[1]))
801 return false;
802 // Match expected indexing maps
804 {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0],
805 W * (*strides)[1] + w * (*dilations)[1], C},
806 /*filterMap=*/{h, w},
807 /*outputMap=*/{N, H, W, C}},
808 indexingMaps, context))
809 return false;
810 // Match body
811 Block *body = op.getBlock();
812 auto yieldOp = cast<linalg::YieldOp>(body->getTerminator());
813 Value yieldVal = yieldOp.getOperand(0);
814 return bodyMatcherForMinUnsignedPoolOps(yieldVal, body);
815}
816
817Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
818 Value source, Value pad, bool nofold,
819 ValueRange typeDynDims) {
820 // Exit if `source` is not defined by an ExtractSliceOp.
821 auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
822 if (!sliceOp)
823 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
824 typeDynDims);
825
826 // Search the `source` use-def chain for padded LinalgOps.
827 Value current = sliceOp.getSource();
828 while (current) {
829 auto linalgOp = current.getDefiningOp<LinalgOp>();
830 if (!linalgOp)
831 break;
832 OpResult opResult = cast<OpResult>(current);
833 current = linalgOp.getDpsInitOperand(opResult.getResultNumber())->get();
834 }
835 auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
836
837 // Exit if the search fails to match a tensor::PadOp at the end of the matched
838 // LinalgOp sequence.
839 if (!padOp)
840 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
841 typeDynDims);
842
843 // Exit if the padded result type does not match.
844 if (sliceOp.getSource().getType() != type)
845 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
846 typeDynDims);
847
848 // Exit if the LinalgOps are not high padded.
849 if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
850 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
851 }))
852 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
853 typeDynDims);
854
855 // Exit if `padOpSliceOp`, which defines the slice used by
856 // `padOp`, is rank-reducing.
857 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
858 if (!padOpSliceOp ||
859 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
860 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
861 typeDynDims);
862
863 // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
864 // of the slice padded by `padOp`.
865 if (llvm::any_of(
866 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
867 [](std::tuple<OpFoldResult, OpFoldResult> it) {
868 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
869 }))
870 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
871 typeDynDims);
872
873 // Exit if the padding values do not match.
874 Attribute padOpPadAttr, padAttr;
875 Value padOpPad = padOp.getConstantPaddingValue();
876 if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
877 !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
878 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
879 typeDynDims);
880
881 // Return the padded result if the padding values and sizes match.
882 return sliceOp.getSource();
883}
884
885GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
886 auto memrefTypeTo = cast<MemRefType>(to.getType());
887#ifndef NDEBUG
888 auto memrefTypeFrom = cast<MemRefType>(from.getType());
889 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
890 "`from` and `to` memref must have the same rank");
891#endif // NDEBUG
892
893 AffineMap id =
894 AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
895 SmallVector<utils::IteratorType> iteratorTypes(memrefTypeTo.getRank(),
896 utils::IteratorType::parallel);
897 return linalg::GenericOp::create(
898 b, loc,
899 /*inputs=*/from,
900 /*outputs=*/to,
901 /*indexingMaps=*/llvm::ArrayRef({id, id}),
902 /*iteratorTypes=*/iteratorTypes,
903 [](OpBuilder &b, Location loc, ValueRange args) {
904 linalg::YieldOp::create(b, loc, args.front());
905 });
906}
907
908/// Specialization to build an scf "for" nest.
909template <>
911 OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
912 ArrayRef<utils::IteratorType> iteratorTypes,
914 ValueRange)>
915 bodyBuilderFn,
917 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
918 "expected as many entries for proc info as number of loops, even if "
919 "they are null entries");
920 SmallVector<Value> iterArgInitValues;
921 if (!linalgOp.hasPureBufferSemantics())
922 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
923 SmallVector<Value, 4> lbs, ubs, steps;
924 unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
926 b, loc, lbs, ubs, steps, iterArgInitValues,
927 [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
928 assert(iterArgs.size() == iterArgInitValues.size() &&
929 "expect the number of output tensors and iter args to match");
930 SmallVector<Value> operandValuesToUse = linalgOp->getOperands();
931 if (!iterArgs.empty()) {
932 operandValuesToUse = linalgOp.getDpsInputs();
933 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
934 }
935 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
936 });
937
938 if (loopNest.loops.empty() || procInfo.empty())
939 return;
940
941 // Filter out scf.for loops that were created out of parallel dimensions.
942 for (const auto &loop : llvm::enumerate(loopNest.loops)) {
943 if (procInfo[loop.index()].distributionMethod ==
945 mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
946 procInfo[loop.index()].nprocs);
947 }
948 }
949}
950
951/// Specialization to build affine "for" nest.
952template <>
954 OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
955 ArrayRef<utils::IteratorType> iteratorTypes,
957 ValueRange)>
958 bodyBuilderFn,
959 ArrayRef<linalg::ProcInfo> /*procInfo*/) {
960 SmallVector<Value> iterArgInitValues;
961 if (!linalgOp.hasPureBufferSemantics())
962 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
963 assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
964 SmallVector<Value, 4> lbs, ubs, steps;
965 unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
966
967 // Affine loops require constant steps.
968 SmallVector<int64_t, 4> constantSteps;
969 constantSteps.reserve(steps.size());
970 for (Value v : steps) {
971 auto constVal = getConstantIntValue(v);
972 assert(constVal.has_value() && "Affine loops require constant steps");
973 constantSteps.push_back(constVal.value());
974 }
975
976 affine::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
977 [&](OpBuilder &b, Location loc, ValueRange ivs) {
978 bodyBuilderFn(b, loc, ivs,
979 linalgOp->getOperands());
980 });
981}
982
983/// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
985 Value nprocs, Value &lb, Value &ub,
986 Value &step) {
987 AffineExpr d0, d1;
988 bindDims(b.getContext(), d0, d1);
989 AffineExpr s0 = getAffineSymbolExpr(0, b.getContext());
990 lb =
991 affine::makeComposedAffineApply(b, loc, d0 + d1 * s0, {lb, procId, step});
992 step = affine::makeComposedAffineApply(b, loc, d0 * s0, {nprocs, step});
993}
994
995/// Generates a loop nest consisting of scf.parallel and scf.for, depending
996/// on the `iteratorTypes.` Consecutive parallel loops create a single
997/// scf.parallel operation; each sequential loop creates a new scf.for
998/// operation. The body of the innermost loop is populated by
999/// `bodyBuilderFn` that accepts a range of induction variables for all
1000/// loops. `ivStorage` is used to store the partial list of induction
1001/// variables.
1002// TODO: this function can be made iterative instead. However, it
1003// will have at most as many recursive calls as nested loops, which rarely
1004// exceeds 10.
1006 OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
1007 ValueRange steps, ArrayRef<utils::IteratorType> iteratorTypes,
1009 function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
1010 SmallVectorImpl<Value> &ivStorage) {
1011 assert(lbs.size() == ubs.size());
1012 assert(lbs.size() == steps.size());
1013 assert(lbs.size() == iteratorTypes.size());
1014 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
1015
1016 // If there are no (more) loops to be generated, generate the body and be
1017 // done with it.
1018 if (iteratorTypes.empty()) {
1019 bodyBuilderFn(b, loc, ivStorage);
1020 return;
1021 }
1022
1023 // If there are no outer parallel loops, generate one sequential loop and
1024 // recurse.
1025 if (!isParallelIterator(iteratorTypes.front())) {
1026 LoopNest singleLoop = buildLoopNest(
1027 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
1028 [&](OpBuilder &b, Location loc, ValueRange ivs) {
1029 ivStorage.append(ivs.begin(), ivs.end());
1030 generateParallelLoopNest(
1031 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
1032 iteratorTypes.drop_front(),
1033 procInfo.empty() ? procInfo : procInfo.drop_front(),
1034 bodyBuilderFn, ivStorage);
1035 });
1036 return;
1037 }
1038
1039 unsigned nLoops = iteratorTypes.size();
1040 unsigned numProcessed = 0;
1041 DistributionMethod distributionMethod = DistributionMethod::None;
1042 if (procInfo.empty()) {
1043 numProcessed = nLoops - iteratorTypes.drop_while(isParallelIterator).size();
1044 } else {
1045 distributionMethod = procInfo.front().distributionMethod;
1046 numProcessed =
1047 nLoops - procInfo
1048 .drop_while([&](linalg::ProcInfo p) {
1049 return p.distributionMethod == distributionMethod;
1050 })
1051 .size();
1052 }
1053
1054 auto remainderProcInfo =
1055 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
1056 switch (distributionMethod) {
1058 // Generate a single parallel loop-nest operation for all outermost
1059 // parallel loops and recurse.
1060 scf::ParallelOp::create(
1061 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
1062 steps.take_front(numProcessed),
1063 [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
1064 ivStorage.append(localIvs.begin(), localIvs.end());
1065 generateParallelLoopNest(
1066 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
1067 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
1068 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
1069 bodyBuilderFn, ivStorage);
1070 });
1071 return;
1072 }
1074 // Generate a single parallel loop-nest operation for all outermost
1075 // parallel loops and recurse.
1076 scf::ParallelOp::create(
1077 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
1078 steps.take_front(numProcessed),
1079 [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
1080 ivStorage.append(localIvs.begin(), localIvs.end());
1081 generateParallelLoopNest(
1082 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
1083 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
1084 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
1085 bodyBuilderFn, ivStorage);
1086 });
1087 return;
1088 }
1090 // Check (for the processed loops) that the iteration is in-bounds.
1091 ArithBuilder ab(b, loc);
1092 Value cond = ab.slt(lbs[0], ubs[0]);
1093 for (unsigned i = 1; i < numProcessed; ++i)
1094 cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));
1095 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
1096 scf::IfOp::create(b, loc, cond, [&](OpBuilder &b, Location loc) {
1097 generateParallelLoopNest(b, loc, lbs.drop_front(numProcessed),
1098 ubs.drop_front(numProcessed),
1099 steps.drop_front(numProcessed),
1100 iteratorTypes.drop_front(numProcessed),
1101 remainderProcInfo, bodyBuilderFn, ivStorage);
1102 scf::YieldOp::create(b, loc, ValueRange{});
1103 });
1104 return;
1105 }
1107 // No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
1108 // with inner loop generation.
1109 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
1111 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
1112 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
1113 remainderProcInfo, bodyBuilderFn, ivStorage);
1114 return;
1115 }
1116}
1117
1118/// Specialization for generating a mix of parallel and sequential scf loops.
1119template <>
1121 OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
1122 ArrayRef<utils::IteratorType> iteratorTypes,
1124 ValueRange)>
1125 bodyBuilderFn,
1126 ArrayRef<linalg::ProcInfo> procInfo) {
1127 SmallVector<Value> iterArgInitValues;
1128 if (!linalgOp.hasPureBufferSemantics())
1129 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
1130 assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
1131 // This function may be passed more iterator types than ranges.
1132 assert(iteratorTypes.size() >= loopRanges.size() &&
1133 "expected iterator type for all ranges");
1134 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
1135 "expected proc information for all loops when present");
1136 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
1137 SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
1138 unsigned numLoops = iteratorTypes.size();
1139 ivs.reserve(numLoops);
1140 lbsStorage.reserve(numLoops);
1141 ubsStorage.reserve(numLoops);
1142 stepsStorage.reserve(numLoops);
1143
1144 // Get the loop lb, ub, and step.
1145 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
1146
1147 // Modify the lb, ub, and step based on the distribution options.
1148 for (const auto &it : llvm::enumerate(procInfo)) {
1149 if (it.value().distributionMethod != linalg::DistributionMethod::None) {
1151 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
1152 ubsStorage[it.index()], stepsStorage[it.index()]);
1153 }
1154 }
1155 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
1157 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
1158 [&](OpBuilder &b, Location loc, ValueRange ivs) {
1159 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
1160 },
1161 ivs);
1162
1163 assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
1164}
1165
1167 Value valueToTile,
1168 const SliceParameters &sliceParams) {
1169 auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
1170 auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
1171 .Case([&](MemRefType) {
1172 return memref::SubViewOp::create(
1173 builder, loc, valueToTile, sliceParams.offsets,
1174 sliceParams.sizes, sliceParams.strides);
1175 })
1176 .Case([&](RankedTensorType) {
1177 return tensor::ExtractSliceOp::create(
1178 builder, loc, valueToTile, sliceParams.offsets,
1179 sliceParams.sizes, sliceParams.strides);
1180 })
1181 .DefaultUnreachable("Unexpected shaped type");
1182 return sliceOp;
1183}
1184
1186 ArrayRef<OpFoldResult> tileSizes, AffineMap map,
1189 ArrayRef<OpFoldResult> subShapeSizes,
1190 bool omitPartialTileCheck) {
1191 SliceParameters sliceParams =
1192 computeSliceParameters(builder, loc, valueToTile, tileSizes, map, lbs,
1193 ubs, subShapeSizes, omitPartialTileCheck);
1194 return materializeTiledShape(builder, loc, valueToTile, sliceParams);
1195}
1196
1199 ArrayRef<OpFoldResult> tileSizes, AffineMap map,
1201 ArrayRef<OpFoldResult> subShapeSizes,
1202 bool omitPartialTileCheck) {
1203 auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
1204 assert(shapedType && "only shaped types can be tiled");
1205 ArrayRef<int64_t> shape = shapedType.getShape();
1206 int64_t rank = shapedType.getRank();
1207
1208 // Compute offsets/sizes/strides for the tile.
1209 SliceParameters sliceParams;
1210 sliceParams.offsets.reserve(rank);
1211 sliceParams.sizes.reserve(rank);
1212 sliceParams.strides.reserve(rank);
1213 for (unsigned r = 0; r < rank; ++r) {
1214 LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);
1215 if (!isTiled(map.getSubMap({r}), tileSizes)) {
1216 sliceParams.offsets.push_back(builder.getIndexAttr(0));
1217 OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
1218 sliceParams.sizes.push_back(dim);
1219 sliceParams.strides.push_back(builder.getIndexAttr(1));
1220 LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
1221 continue;
1222 }
1223 LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");
1224
1225 // Tiling creates a new slice at the proper index, the slice step is 1
1226 // (i.e. the op does not subsample, stepping occurs in the loop).
1227 auto m = map.getSubMap({r});
1228 LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");
1229 IRRewriter rewriter(builder);
1230 // The offset of the slice is m(lbs) - m(0).
1231 SmallVector<Attribute> zeros(lbs.size(), rewriter.getIndexAttr(0));
1232 SmallVector<Attribute> mAtZero;
1233 [[maybe_unused]] auto res = m.constantFold(zeros, mAtZero);
1234 assert(succeeded(res) && "affine_map must be evaluatable (not symbols)");
1235 int64_t mAtZeroInt =
1236 cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
1238 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
1239 sliceParams.offsets.push_back(offset);
1240
1241 OpFoldResult closedIntSize =
1242 makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes);
1243 // Resulting size needs to be made half open interval again.
1244 AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
1245 OpFoldResult size =
1246 makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize);
1247 LLVM_DEBUG(llvm::dbgs()
1248 << "computeSliceParameters: raw size: " << size << "\n");
1249 LLVM_DEBUG(llvm::dbgs()
1250 << "computeSliceParameters: new offset: " << offset << "\n");
1251 sliceParams.strides.push_back(builder.getIndexAttr(1));
1252
1253 if (omitPartialTileCheck) {
1254 // We statically know that the partial/boundary tile condition is
1255 // unnecessary.
1256 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
1257 sliceParams.sizes.push_back(size);
1258 continue;
1259 }
1260
1261 // The size of the subview / extract_slice should be trimmed to avoid
1262 // out-of-bounds accesses, unless:
1263 // a. We statically know the subshape size divides the shape size evenly.
1264 // b. The subshape size is 1. According to the way the loops are set up,
1265 // tensors with "0" dimensions would never be constructed.
1266 int64_t shapeSize = shape[r];
1267 std::optional<int64_t> sizeCst = getConstantIntValue(size);
1268 auto hasTileSizeOne = sizeCst == 1;
1269 auto dividesEvenly = sizeCst && ShapedType::isStatic(shapeSize) &&
1270 ((shapeSize % *sizeCst) == 0);
1271 if (!hasTileSizeOne && !dividesEvenly) {
1272 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
1273 << ", size: " << size
1274 << ": make sure in bound with affine.min\n");
1275
1276 AffineExpr dim0, dim1, dim2;
1277 MLIRContext *context = builder.getContext();
1278 bindDims(context, dim0, dim1, dim2);
1279
1280 // Get the dimension size for this dimension. We need to first calculate
1281 // the max index and then plus one. This is important because for
1282 // convolution ops, we have its input window dimension's affine map of the
1283 // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
1284 // dimension and `s0` is stride. Directly use the dimension size of
1285 // output/filer window dimensions will cause incorrect calculation.
1287 {ArrayRef<AffineExpr>{dim0 - 1}}, context)
1288 .front();
1290 {ArrayRef<AffineExpr>{dim0 + 1}}, context)
1291 .front();
1292 SmallVector<OpFoldResult> maxIndices =
1293 llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) {
1294 return makeComposedFoldedAffineApply(rewriter, loc, minusOneMap,
1295 {ub});
1296 }));
1297 OpFoldResult maxIndex =
1298 makeComposedFoldedAffineApply(rewriter, loc, m, maxIndices);
1299 OpFoldResult d =
1300 makeComposedFoldedAffineApply(rewriter, loc, plusOneMap, {maxIndex});
1301
1302 // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
1304 {ArrayRef<AffineExpr>{dim1 - dim2, dim0}}, context)
1305 .front();
1306 size =
1307 makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset});
1308 }
1309 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
1310 sliceParams.sizes.push_back(size);
1311 }
1312 return sliceParams;
1313}
1314
1317 ArrayRef<OpFoldResult> tileSizes) {
1319 for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
1320 LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
1321 bool isTiled = !isZeroInteger(tileSizes[idx]);
1322 offsets.push_back(isTiled ? ivs[idxIvs++] : b.getIndexAttr(0));
1323 LLVM_DEBUG(llvm::dbgs()
1324 << "computeTileOffsets: " << offsets.back() << "\n");
1325 }
1326 return offsets;
1327}
1328
1330 ArrayRef<OpFoldResult> tileSizes,
1331 ArrayRef<OpFoldResult> sizeBounds) {
1333 for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
1334 bool isTiled = !isZeroInteger(tileSizes[idx]);
1335 // Before composing, we need to make range a closed interval.
1336 OpFoldResult size = isTiled ? tileSizes[idx] : sizeBounds[idx];
1337 AffineExpr d0 = getAffineDimExpr(0, b.getContext());
1338 IRRewriter rewriter(b);
1339 sizes.push_back(makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, size));
1340 LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
1341 }
1342 return sizes;
1343}
1344
1346 if (op.hasPureBufferSemantics())
1347 return {};
1348 return llvm::to_vector(
1349 llvm::map_range(op.getDpsInitsMutable(), [&](OpOperand &opOperand) {
1350 return operands[opOperand.getOperandNumber()].getType();
1351 }));
1352}
1353
1355 LinalgOp op, ValueRange operands,
1356 ValueRange results) {
1357 if (op.hasPureBufferSemantics())
1358 return {};
1359 SmallVector<Value> tensorResults;
1360 tensorResults.reserve(results.size());
1361 // Insert a insert_slice for each output tensor.
1362 unsigned resultIdx = 0;
1363 for (OpOperand &opOperand : op.getDpsInitsMutable()) {
1364 // TODO: use an interface/adaptor to avoid leaking position in
1365 // `tiledOperands`.
1366 Value outputTensor = operands[opOperand.getOperandNumber()];
1367 if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
1368 Value inserted = tensor::InsertSliceOp::create(
1369 builder, loc, sliceOp.getSource().getType(), results[resultIdx],
1370 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
1371 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
1372 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
1373 tensorResults.push_back(inserted);
1374 } else {
1375 tensorResults.push_back(results[resultIdx]);
1376 }
1377 ++resultIdx;
1378 }
1379 return tensorResults;
1380}
1381
1383computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
1384 ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
1385 ArrayRef<OpFoldResult> tileSizes,
1386 ArrayRef<OpFoldResult> sizeBounds,
1387 bool omitPartialTileCheck) {
1388 assert(ivs.size() == static_cast<size_t>(llvm::count_if(
1389 llvm::make_range(tileSizes.begin(), tileSizes.end()),
1390 [](OpFoldResult v) { return !isZeroInteger(v); })) &&
1391 "expected as many ivs as non-zero sizes");
1392
1393 // Construct (potentially temporary) mins and maxes on which to apply maps
1394 // that define tile subshapes.
1396 computeTileOffsets(builder, loc, ivs, tileSizes);
1397 SmallVector<OpFoldResult> subShapeSizes =
1398 computeTileSizes(builder, loc, tileSizes, sizeBounds);
1399
1400 assert(static_cast<int64_t>(valuesToTile.size()) <=
1401 linalgOp->getNumOperands() &&
1402 "more value to tile than operands.");
1404 allSliceParams.reserve(valuesToTile.size());
1405 for (auto [opOperand, val] :
1406 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
1407 Value shapedOp = val;
1408 LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
1409 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
1410 // Use `opOperand` as is if it is not tiled and not an output tensor. Having
1411 // an extract/insert slice pair for all output tensors simplifies follow up
1412 // transformations such as padding and bufferization since the
1413 // extract/insert slice pairs make the accessed iteration argument
1414 // subdomains explicit.
1415
1416 Type operandType = opOperand.get().getType();
1417 if (!isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
1418 linalgOp.isDpsInit(&opOperand))) {
1419 allSliceParams.push_back(std::nullopt);
1420 LLVM_DEBUG(llvm::dbgs()
1421 << ": not tiled: use shape: " << operandType << "\n");
1422 continue;
1423 }
1424 LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
1425
1426 allSliceParams.push_back(computeSliceParameters(
1427 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
1428 omitPartialTileCheck));
1429 }
1430
1431 return allSliceParams;
1432}
1433
1435 LinalgOp linalgOp, ValueRange valuesToTile,
1437 ArrayRef<OpFoldResult> tileSizes,
1438 ArrayRef<OpFoldResult> sizeBounds,
1439 bool omitPartialTileCheck) {
1441 computeAllSliceParameters(builder, loc, linalgOp, valuesToTile, ivs,
1442 tileSizes, sizeBounds, omitPartialTileCheck);
1443 SmallVector<Value> tiledShapes;
1444 for (auto item : llvm::zip(valuesToTile, allSliceParameter)) {
1445 Value valueToTile = std::get<0>(item);
1446 std::optional<SliceParameters> sliceParams = std::get<1>(item);
1447 tiledShapes.push_back(
1448 sliceParams.has_value()
1449 ? materializeTiledShape(builder, loc, valueToTile, *sliceParams)
1450 ->getResult(0)
1451 : valueToTile);
1452 }
1453 return tiledShapes;
1454}
1455
1456void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
1457 ArrayRef<OpFoldResult> offsets) {
1458 IRRewriter rewriter(b);
1459 offsetIndices(rewriter, linalgOp, offsets);
1460}
1461
1462void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
1463 ArrayRef<OpFoldResult> offsets) {
1464 if (!linalgOp.hasIndexSemantics())
1465 return;
1466
1467 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
1468 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
1469 continue;
1471 b.setInsertionPointAfter(indexOp);
1472 AffineExpr index, offset;
1473 bindDims(b.getContext(), index, offset);
1475 b, indexOp.getLoc(), index + offset,
1476 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
1477 Value materialized =
1478 getValueOrCreateConstantIndexOp(b, indexOp.getLoc(), applied);
1479 b.replaceUsesWithIf(indexOp, materialized, [&](OpOperand &use) {
1480 return use.getOwner() != materialized.getDefiningOp();
1481 });
1482 }
1483}
1484
1485/// Get the reassociation maps to fold the result of a extract_slice (or source
1486/// of a insert_slice) operation with given offsets, and sizes to its
1487/// rank-reduced version. This is only done for the cases where the size is 1
1488/// and offset is 0. Strictly speaking the offset 0 is not required in general,
1489/// but non-zero offsets are not handled by SPIR-V backend at this point (and
1490/// potentially cannot be handled).
1491std::optional<SmallVector<ReassociationIndices>>
1495 for (const auto &it : llvm::enumerate(mixedSizes)) {
1496 auto dim = it.index();
1497 auto size = it.value();
1498 curr.push_back(dim);
1499 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
1500 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
1501 continue;
1502 reassociation.emplace_back(ReassociationIndices{});
1503 std::swap(reassociation.back(), curr);
1504 }
1505 // When the reassociations are not empty, then fold the remaining
1506 // unit-dimensions into the last dimension. If the reassociations so far is
1507 // empty, then leave it emtpy. This will fold everything to a rank-0 tensor.
1508 if (!curr.empty() && !reassociation.empty())
1509 reassociation.back().append(curr.begin(), curr.end());
1510 return reassociation;
1511}
1512
1513} // namespace linalg
1514} // namespace mlir
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
Definition Utils.cpp:150
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition Utils.cpp:74
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
Definition Utils.cpp:126
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition PDL.cpp:62
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
ArrayAttr()
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
Affine binary operation expression.
Definition AffineExpr.h:214
AffineExpr getLHS() const
AffineExpr getRHS() const
An integer constant appearing in affine expression.
Definition AffineExpr.h:239
int64_t getValue() const
A dimensional identifier appearing in an affine expression.
Definition AffineExpr.h:223
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition AffineExpr.h:68
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr > > exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Attributes are known-constant values of operations.
Definition Attributes.h:25
This class represents an argument of a Block.
Definition Value.h:309
unsigned getArgNumber() const
Returns the number of this argument.
Definition Value.h:321
Block * getOwner() const
Returns the block that owns this argument.
Definition Value.h:318
Block represents an ordered list of Operations.
Definition Block.h:33
BlockArgument getArgument(unsigned i)
Definition Block.h:129
unsigned getNumArguments()
Definition Block.h:128
OpListType & getOperations()
Definition Block.h:137
Operation & front()
Definition Block.h:153
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:244
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition Block.h:212
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:108
MLIRContext * getContext() const
Definition Builders.h:56
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:348
This class helps build Operations.
Definition Builders.h:207
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
unsigned getResultNumber() const
Returns the number of this result.
Definition Value.h:469
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
Value getOperand(unsigned idx)
Definition Operation.h:350
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & front()
Definition Region.h:65
bool hasOneBlock()
Return true if this region has exactly one block.
Definition Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
Definition Types.cpp:108
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
Operation * getOwner() const
Return the owner of this operand.
Definition UseDefLists.h:38
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
bool isaConvolutionOpOfType< linalg::DepthwiseConv3DNdhwcDhwcmOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:531
bool isaConvolutionOpOfType< linalg::PoolingNhwcSumOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:680
bool isaConvolutionOpOfType< linalg::DepthwiseConv2DNchwChwOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:481
SmallVector< int64_t > getUnPackInverseSrcPerm(linalg::UnPackOp, PackingMetadata &metadata)
Compute inverse permutation for the source tensor (i.e.
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition Utils.cpp:1434
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
Definition Utils.cpp:195
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
Definition Utils.cpp:230
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition Utils.cpp:1329
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition Utils.cpp:885
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
Definition Utils.cpp:1005
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
Definition Utils.cpp:1315
bool isaConvolutionOpOfType< linalg::DepthwiseConv1DNwcWcOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:441
static bool bodyMatcherForMinUnsignedPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:323
bool isaConvolutionOpOfType< linalg::PoolingNhwcMaxOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:586
static bool bodyMatcherForMaxSignedPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:304
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
Definition Utils.cpp:234
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
Definition Utils.cpp:201
static AffineExpr getAffineMapDim(ArrayAttr indexingMaps, uint32_t mapIndex, uint32_t dimIndex)
Definition Utils.cpp:332
static bool bodyMatcherForPoolOps(Value yieldVal, Block *body)
Utility to match block body for linalg.pool* ops.
Definition Utils.cpp:288
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
Definition Utils.cpp:1492
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
Definition Utils.h:262
@ None
No Distribution.
Definition Utils.h:307
@ CyclicNumProcsGeNumIters
Cyclic distribution where the number of processors can be assumed to be more than or equal to the num...
Definition Utils.h:292
@ Cyclic
Cyclic distribution where no assumption is made about the dynamic relationship between number of proc...
Definition Utils.h:274
@ CyclicNumProcsEqNumIters
Cyclic distribution where the number of processors can be assumed to be equal to the number of iterat...
Definition Utils.h:304
static bool bodyMatcherForMaxUnsignedPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:311
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
Definition Utils.cpp:1354
bool isaConvolutionOpInterface(LinalgOp linalgOp, bool allowEmptyConvolvedDims=false)
Checks whether linalgOp conforms to ConvolutionOpInterface.
static BlockArgument getBlockArgumentWithOptionalExtOps(Value val)
Returns the BlockArgument that leads to val, if any.
Definition Utils.cpp:244
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition Utils.cpp:215
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition Utils.cpp:1456
static bool bodyMatcherForSumPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:328
SmallVector< int64_t > getPackInverseDestPerm(linalg::PackOp packOp, PackingMetadata &metadata)
Compute inverse permutation for the destination tensor (i.e.
bool isaConvolutionOpOfType< linalg::PoolingNhwcMinOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:633
static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body)
Utility to match block body for convolution ops.
Definition Utils.cpp:263
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Definition Utils.cpp:1383
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition Utils.cpp:1185
static bool convLayoutMatches(ArrayRef< ArrayRef< AffineExpr > > mapListExpected, ArrayAttr indexingMaps, MLIRContext *context)
Returns true if the given indexing maps matches with the expected indexing maps.
Definition Utils.cpp:425
static bool bodyMatcherForMinSignedPoolOps(Value yieldVal, Block *body)
Definition Utils.cpp:316
static bool matchConvDimAddExprPattern(ArrayAttr indexingMaps, unsigned iDim, unsigned fDim, unsigned oDim, int64_t &dilation, int64_t &stride)
Given an array of AffineMaps indexingMaps verify the following commutatively:- indexingMaps[0]....
Definition Utils.cpp:387
bool isaConvolutionOpOfType< linalg::PoolingNhwcMinUnsignedOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:774
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
Definition Utils.cpp:1166
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
Definition Utils.cpp:817
static int64_t isDimTimesConstantOrDimOnly(AffineExpr expr, AffineExpr &dim)
Check if expr is either:
Definition Utils.cpp:345
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition Utils.cpp:984
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition Utils.cpp:1345
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition Utils.cpp:1198
bool isaConvolutionOpOfType< linalg::PoolingNhwcMaxUnsignedOp >(LinalgOp op, SmallVector< int64_t > *dilations, SmallVector< int64_t > *strides)
Definition Utils.cpp:727
auto m_Val(Value v)
Definition Matchers.h:539
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition SCF.cpp:837
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition SCF.h:64
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Definition Utils.cpp:23
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition Matchers.h:490
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
detail::NameOpMatcher m_Op(StringRef opName)
Matches a named operation.
Definition Matchers.h:379
@ Mul
RHS of mul is always a constant or a symbolic expression.
Definition AffineExpr.h:43
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
llvm::TypeSwitch< T, ResultT > TypeSwitch
Definition LLVM.h:144
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
detail::op_matcher< OpClass > m_Op()
Matches the given OpClass.
Definition Matchers.h:484
SmallVector< int64_t, 2 > ReassociationIndices
Definition Utils.h:27
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition Matchers.h:369
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Definition Utils.h:103
Value _and(Value lhs, Value rhs)
Definition Utils.cpp:311
Value slt(Value lhs, Value rhs)
Definition Utils.cpp:334
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition Utils.h:376
static void doit(OpBuilder &b, Location loc, ArrayRef< Range > loopRanges, LinalgOp linalgOp, ArrayRef< utils::IteratorType > iteratorTypes, function_ref< scf::ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilderFn, ArrayRef< linalg::ProcInfo > procInfo={})
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition Utils.h:312
DistributionMethod distributionMethod
Definition Utils.h:315
static std::optional< BinaryOpKind > matchAsScalarBinaryOp(GenericOp op)
Matches the given linalg op if its body is performing binary operation on int or float scalar values ...
Definition Utils.cpp:93
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition Utils.h:158
SmallVector< OpFoldResult > strides
Definition Utils.h:161
SmallVector< OpFoldResult > sizes
Definition Utils.h:160
SmallVector< OpFoldResult > offsets
Definition Utils.h:159
LoopVector loops
Definition SCF.h:67