MLIR 22.0.0git
Utils.cpp
Go to the documentation of this file.
1//===- Utils.cpp - Utilities to support the Linalg dialect ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utilities for the Linalg dialect.
10//
11//===----------------------------------------------------------------------===//
12
14
29#include "mlir/IR/AffineExpr.h"
31#include "mlir/IR/AffineMap.h"
32#include "mlir/IR/Matchers.h"
33#include "llvm/ADT/TypeSwitch.h"
34#include "llvm/Support/Debug.h"
35#include <optional>
36
37#define DEBUG_TYPE "linalg-utils"
38
39using namespace mlir;
40using namespace presburger;
41using namespace mlir::affine;
42using namespace mlir::linalg;
43using namespace mlir::scf;
44
45namespace {
46
47// Helper visitor to determine whether an AffineExpr is tiled.
48// This is achieved by traversing every AffineDimExpr with position `pos` and
49// checking whether the corresponding `tileSizes[pos]` is non-zero.
50// This also enforces only positive coefficients occur in multiplications.
51//
52// Example:
53// `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0]
54//
55struct TileCheck : public AffineExprVisitor<TileCheck> {
56 TileCheck(ArrayRef<OpFoldResult> tileSizes) : tileSizes(tileSizes) {}
57
58 void visitDimExpr(AffineDimExpr expr) {
59 isTiled |= !isZeroInteger(tileSizes[expr.getPosition()]);
60 }
61 void visitAffineBinaryOpExpr(AffineBinaryOpExpr expr) {
62 visit(expr.getLHS());
63 visit(expr.getRHS());
65 assert(cast<AffineConstantExpr>(expr.getRHS()).getValue() > 0 &&
66 "nonpositive multiplying coefficient");
67 }
68 bool isTiled = false;
69 ArrayRef<OpFoldResult> tileSizes;
70};
71
72} // namespace
73
74static bool isTiled(AffineExpr expr, ArrayRef<OpFoldResult> tileSizes) {
75 if (!expr)
76 return false;
77 TileCheck t(tileSizes);
78 t.visit(expr);
79 return t.isTiled;
80}
81
82// Checks whether the `map varies with respect to a non-zero `tileSize`.
83static bool isTiled(AffineMap map, ArrayRef<OpFoldResult> tileSizes) {
84 if (!map)
85 return false;
86 for (unsigned r = 0; r < map.getNumResults(); ++r)
87 if (isTiled(map.getResult(r), tileSizes))
88 return true;
89 return false;
90}
91
92std::optional<RegionMatcher::BinaryOpKind>
94 auto &region = op.getRegion();
95 if (!region.hasOneBlock())
96 return std::nullopt;
97
98 Block &block = region.front();
99 if (block.getNumArguments() != 2 ||
102 return std::nullopt;
103
104 auto &ops = block.getOperations();
105 if (!llvm::hasSingleElement(block.without_terminator()))
106 return std::nullopt;
107
109 auto a = m_Val(block.getArgument(0));
110 auto b = m_Val(block.getArgument(1));
111
112 auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
113 if (addPattern.match(&ops.back()))
114 return BinaryOpKind::IAdd;
115
116 return std::nullopt;
117}
118
119/// Explicit instantiation of loop nest generator for different loop types.
123
124/// Given a list of subview ranges, extract individual values for lower, upper
125/// bounds and steps and put them into the corresponding vectors.
126static void unpackRanges(OpBuilder &builder, Location loc,
129 SmallVectorImpl<Value> &steps) {
130 for (Range range : ranges) {
131 lbs.emplace_back(
132 getValueOrCreateConstantIndexOp(builder, loc, range.offset));
133 ubs.emplace_back(getValueOrCreateConstantIndexOp(builder, loc, range.size));
134 steps.emplace_back(
135 getValueOrCreateConstantIndexOp(builder, loc, range.stride));
136 }
137}
138
139//===----------------------------------------------------------------------===//
140// General utilities
141//===----------------------------------------------------------------------===//
142//
143/// The permutation can be obtained from two permutations:
144/// a) Compute the permutation vector to move the last `numPackedDims` into
145/// the `innerPosDims` of a shape of rank `rank`.
146/// b) Compute the permutation vector to move outer dims if the
147/// `outerPerm` parameter is not empty.
148/// Apply (b) permutation on (a) permutation to get the final permutation.
149static SmallVector<int64_t>
151 ArrayRef<int64_t> &outerPerm,
152 PackingMetadata &packingMetadata) {
153 int64_t numPackedDims = innerDimsPos.size();
154 auto lastDims =
155 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
156 packingMetadata = computePackingMetadata(rank, innerDimsPos);
157 SmallVector<int64_t> innerPositionsPerm =
158 computePermutationVector(rank, lastDims, packingMetadata.insertPositions);
159
160 SmallVector<int64_t> outerPos = packingMetadata.outerPositions;
161 if (!outerPerm.empty())
162 applyPermutationToVector(outerPos, outerPerm);
163 SmallVector<int64_t> outerPositionPerm =
164 computePermutationVector(rank, packingMetadata.outerPositions, outerPos);
165
166 SmallVector<int64_t> packInverseDestPermutation = innerPositionsPerm;
167 applyPermutationToVector(packInverseDestPermutation, outerPositionPerm);
168 return packInverseDestPermutation;
169}
170
171namespace mlir {
172namespace linalg {
173
175 PackingMetadata &metadata) {
176
177 int64_t packedRank = packOp.getDestType().getRank();
178 ArrayRef<int64_t> innerDimPos = packOp.getInnerDimsPos();
179 ArrayRef<int64_t> outerPerm = packOp.getOuterDimsPerm();
180 SmallVector<int64_t> packInvDestPerm =
181 computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata);
182 return packInvDestPerm;
183}
184
186 PackingMetadata &metadata) {
187 int64_t packedRank = unpackOp.getSourceType().getRank();
188 ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
189 ArrayRef<int64_t> outerPerm = unpackOp.getOuterDimsPerm();
190 SmallVector<int64_t> unpackInvSrcPerm =
191 computePackUnPackPerm(packedRank, innerDimPos, outerPerm, metadata);
192 return unpackInvSrcPerm;
193}
194
196 return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {
197 return m.isProjectedPermutation(/*allowZeroInResults=*/true);
198 });
199}
200
202 if (!r.hasOneBlock())
203 return false;
204 for (Operation &op : r.front()) {
205 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
206 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
208 llvm::any_of(op.getResultTypes(),
209 [](Type type) { return !type.isIntOrIndexOrFloat(); }))
210 return false;
211 }
212 return true;
213}
214
215bool isElementwise(LinalgOp op) {
216 if (op.getNumLoops() != op.getNumParallelLoops())
217 return false;
218
220 return false;
221
222 // TODO: relax the restrictions on indexing map.
223 for (OpOperand &opOperand : op.getDpsInitsMutable()) {
224 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
225 return false;
226 }
227 return hasOnlyScalarElementwiseOp(op->getRegion(0));
228}
229
230bool isParallelIterator(utils::IteratorType iteratorType) {
231 return iteratorType == utils::IteratorType::parallel;
232}
233
234bool isReductionIterator(utils::IteratorType iteratorType) {
235 return iteratorType == utils::IteratorType::reduction;
236}
237
238Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
239 Value source, Value pad, bool nofold,
240 ValueRange typeDynDims) {
241 // Exit if `source` is not defined by an ExtractSliceOp.
242 auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
243 if (!sliceOp)
244 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
245 typeDynDims);
246
247 // Search the `source` use-def chain for padded LinalgOps.
248 Value current = sliceOp.getSource();
249 while (current) {
250 auto linalgOp = current.getDefiningOp<LinalgOp>();
251 if (!linalgOp)
252 break;
253 OpResult opResult = cast<OpResult>(current);
254 current = linalgOp.getDpsInitOperand(opResult.getResultNumber())->get();
255 }
256 auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
257
258 // Exit if the search fails to match a tensor::PadOp at the end of the matched
259 // LinalgOp sequence.
260 if (!padOp)
261 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
262 typeDynDims);
263
264 // Exit if the padded result type does not match.
265 if (sliceOp.getSource().getType() != type)
266 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
267 typeDynDims);
268
269 // Exit if the LinalgOps are not high padded.
270 if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
271 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
272 }))
273 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
274 typeDynDims);
275
276 // Exit if `padOpSliceOp`, which defines the slice used by
277 // `padOp`, is rank-reducing.
278 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
279 if (!padOpSliceOp ||
280 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
281 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
282 typeDynDims);
283
284 // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
285 // of the slice padded by `padOp`.
286 if (llvm::any_of(
287 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
288 [](std::tuple<OpFoldResult, OpFoldResult> it) {
289 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
290 }))
291 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
292 typeDynDims);
293
294 // Exit if the padding values do not match.
295 Attribute padOpPadAttr, padAttr;
296 Value padOpPad = padOp.getConstantPaddingValue();
297 if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
298 !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
299 return tensor::createPadHighOp(type, source, pad, nofold, loc, b,
300 typeDynDims);
301
302 // Return the padded result if the padding values and sizes match.
303 return sliceOp.getSource();
304}
305
306GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to) {
307 auto memrefTypeTo = cast<MemRefType>(to.getType());
308#ifndef NDEBUG
309 auto memrefTypeFrom = cast<MemRefType>(from.getType());
310 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
311 "`from` and `to` memref must have the same rank");
312#endif // NDEBUG
313
314 AffineMap id =
315 AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
316 SmallVector<utils::IteratorType> iteratorTypes(memrefTypeTo.getRank(),
317 utils::IteratorType::parallel);
318 return linalg::GenericOp::create(
319 b, loc,
320 /*inputs=*/from,
321 /*outputs=*/to,
322 /*indexingMaps=*/llvm::ArrayRef({id, id}),
323 /*iteratorTypes=*/iteratorTypes,
324 [](OpBuilder &b, Location loc, ValueRange args) {
325 linalg::YieldOp::create(b, loc, args.front());
326 });
327}
328
329/// Specialization to build an scf "for" nest.
330template <>
332 OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
333 ArrayRef<utils::IteratorType> iteratorTypes,
335 ValueRange)>
336 bodyBuilderFn,
338 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
339 "expected as many entries for proc info as number of loops, even if "
340 "they are null entries");
341 SmallVector<Value> iterArgInitValues;
342 if (!linalgOp.hasPureBufferSemantics())
343 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
344 SmallVector<Value, 4> lbs, ubs, steps;
345 unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
347 b, loc, lbs, ubs, steps, iterArgInitValues,
348 [&](OpBuilder &b, Location loc, ValueRange ivs, ValueRange iterArgs) {
349 assert(iterArgs.size() == iterArgInitValues.size() &&
350 "expect the number of output tensors and iter args to match");
351 SmallVector<Value> operandValuesToUse = linalgOp->getOperands();
352 if (!iterArgs.empty()) {
353 operandValuesToUse = linalgOp.getDpsInputs();
354 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
355 }
356 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
357 });
358
359 if (loopNest.loops.empty() || procInfo.empty())
360 return;
361
362 // Filter out scf.for loops that were created out of parallel dimensions.
363 for (const auto &loop : llvm::enumerate(loopNest.loops)) {
364 if (procInfo[loop.index()].distributionMethod ==
366 mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
367 procInfo[loop.index()].nprocs);
368 }
369 }
370}
371
372/// Specialization to build affine "for" nest.
373template <>
375 OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
376 ArrayRef<utils::IteratorType> iteratorTypes,
378 ValueRange)>
379 bodyBuilderFn,
380 ArrayRef<linalg::ProcInfo> /*procInfo*/) {
381 SmallVector<Value> iterArgInitValues;
382 if (!linalgOp.hasPureBufferSemantics())
383 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
384 assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");
385 SmallVector<Value, 4> lbs, ubs, steps;
386 unpackRanges(b, loc, loopRanges, lbs, ubs, steps);
387
388 // Affine loops require constant steps.
389 SmallVector<int64_t, 4> constantSteps;
390 constantSteps.reserve(steps.size());
391 for (Value v : steps) {
392 auto constVal = getConstantIntValue(v);
393 assert(constVal.has_value() && "Affine loops require constant steps");
394 constantSteps.push_back(constVal.value());
395 }
396
397 affine::buildAffineLoopNest(b, loc, lbs, ubs, constantSteps,
398 [&](OpBuilder &b, Location loc, ValueRange ivs) {
399 bodyBuilderFn(b, loc, ivs,
400 linalgOp->getOperands());
401 });
402}
403
404/// Update the `lb`, `ub` and `step` to get per processor `lb`, `ub` and `step`.
406 Value nprocs, Value &lb, Value &ub,
407 Value &step) {
408 AffineExpr d0, d1;
409 bindDims(b.getContext(), d0, d1);
410 AffineExpr s0 = getAffineSymbolExpr(0, b.getContext());
411 lb =
412 affine::makeComposedAffineApply(b, loc, d0 + d1 * s0, {lb, procId, step});
413 step = affine::makeComposedAffineApply(b, loc, d0 * s0, {nprocs, step});
414}
415
416/// Generates a loop nest consisting of scf.parallel and scf.for, depending
417/// on the `iteratorTypes.` Consecutive parallel loops create a single
418/// scf.parallel operation; each sequential loop creates a new scf.for
419/// operation. The body of the innermost loop is populated by
420/// `bodyBuilderFn` that accepts a range of induction variables for all
421/// loops. `ivStorage` is used to store the partial list of induction
422/// variables.
423// TODO: this function can be made iterative instead. However, it
424// will have at most as many recursive calls as nested loops, which rarely
425// exceeds 10.
427 OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs,
428 ValueRange steps, ArrayRef<utils::IteratorType> iteratorTypes,
430 function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilderFn,
431 SmallVectorImpl<Value> &ivStorage) {
432 assert(lbs.size() == ubs.size());
433 assert(lbs.size() == steps.size());
434 assert(lbs.size() == iteratorTypes.size());
435 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
436
437 // If there are no (more) loops to be generated, generate the body and be
438 // done with it.
439 if (iteratorTypes.empty()) {
440 bodyBuilderFn(b, loc, ivStorage);
441 return;
442 }
443
444 // If there are no outer parallel loops, generate one sequential loop and
445 // recurse.
446 if (!isParallelIterator(iteratorTypes.front())) {
447 LoopNest singleLoop = buildLoopNest(
448 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
449 [&](OpBuilder &b, Location loc, ValueRange ivs) {
450 ivStorage.append(ivs.begin(), ivs.end());
451 generateParallelLoopNest(
452 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
453 iteratorTypes.drop_front(),
454 procInfo.empty() ? procInfo : procInfo.drop_front(),
455 bodyBuilderFn, ivStorage);
456 });
457 return;
458 }
459
460 unsigned nLoops = iteratorTypes.size();
461 unsigned numProcessed = 0;
462 DistributionMethod distributionMethod = DistributionMethod::None;
463 if (procInfo.empty()) {
464 numProcessed = nLoops - iteratorTypes.drop_while(isParallelIterator).size();
465 } else {
466 distributionMethod = procInfo.front().distributionMethod;
467 numProcessed =
468 nLoops - procInfo
469 .drop_while([&](linalg::ProcInfo p) {
470 return p.distributionMethod == distributionMethod;
471 })
472 .size();
473 }
474
475 auto remainderProcInfo =
476 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
477 switch (distributionMethod) {
479 // Generate a single parallel loop-nest operation for all outermost
480 // parallel loops and recurse.
481 scf::ParallelOp::create(
482 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
483 steps.take_front(numProcessed),
484 [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
485 ivStorage.append(localIvs.begin(), localIvs.end());
486 generateParallelLoopNest(
487 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
488 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
489 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
490 bodyBuilderFn, ivStorage);
491 });
492 return;
493 }
495 // Generate a single parallel loop-nest operation for all outermost
496 // parallel loops and recurse.
497 scf::ParallelOp::create(
498 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
499 steps.take_front(numProcessed),
500 [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange localIvs) {
501 ivStorage.append(localIvs.begin(), localIvs.end());
502 generateParallelLoopNest(
503 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
504 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
505 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
506 bodyBuilderFn, ivStorage);
507 });
508 return;
509 }
511 // Check (for the processed loops) that the iteration is in-bounds.
512 ArithBuilder ab(b, loc);
513 Value cond = ab.slt(lbs[0], ubs[0]);
514 for (unsigned i = 1; i < numProcessed; ++i)
515 cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));
516 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
517 scf::IfOp::create(b, loc, cond, [&](OpBuilder &b, Location loc) {
518 generateParallelLoopNest(b, loc, lbs.drop_front(numProcessed),
519 ubs.drop_front(numProcessed),
520 steps.drop_front(numProcessed),
521 iteratorTypes.drop_front(numProcessed),
522 remainderProcInfo, bodyBuilderFn, ivStorage);
523 scf::YieldOp::create(b, loc, ValueRange{});
524 });
525 return;
526 }
528 // No check/loops needed here. Set the `%iv` to be the `%lb` and proceed
529 // with inner loop generation.
530 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
532 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
533 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
534 remainderProcInfo, bodyBuilderFn, ivStorage);
535 return;
536 }
537}
538
539/// Specialization for generating a mix of parallel and sequential scf loops.
540template <>
542 OpBuilder &b, Location loc, ArrayRef<Range> loopRanges, LinalgOp linalgOp,
543 ArrayRef<utils::IteratorType> iteratorTypes,
545 ValueRange)>
546 bodyBuilderFn,
548 SmallVector<Value> iterArgInitValues;
549 if (!linalgOp.hasPureBufferSemantics())
550 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
551 assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");
552 // This function may be passed more iterator types than ranges.
553 assert(iteratorTypes.size() >= loopRanges.size() &&
554 "expected iterator type for all ranges");
555 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
556 "expected proc information for all loops when present");
557 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
558 SmallVector<Value, 8> lbsStorage, ubsStorage, stepsStorage, ivs;
559 unsigned numLoops = iteratorTypes.size();
560 ivs.reserve(numLoops);
561 lbsStorage.reserve(numLoops);
562 ubsStorage.reserve(numLoops);
563 stepsStorage.reserve(numLoops);
564
565 // Get the loop lb, ub, and step.
566 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
567
568 // Modify the lb, ub, and step based on the distribution options.
569 for (const auto &it : llvm::enumerate(procInfo)) {
570 if (it.value().distributionMethod != linalg::DistributionMethod::None) {
572 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
573 ubsStorage[it.index()], stepsStorage[it.index()]);
574 }
575 }
576 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
578 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
579 [&](OpBuilder &b, Location loc, ValueRange ivs) {
580 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
581 },
582 ivs);
583
584 assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");
585}
586
588 Value valueToTile,
589 const SliceParameters &sliceParams) {
590 auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
591 auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType)
592 .Case([&](MemRefType) {
593 return memref::SubViewOp::create(
594 builder, loc, valueToTile, sliceParams.offsets,
595 sliceParams.sizes, sliceParams.strides);
596 })
597 .Case([&](RankedTensorType) {
598 return tensor::ExtractSliceOp::create(
599 builder, loc, valueToTile, sliceParams.offsets,
600 sliceParams.sizes, sliceParams.strides);
601 })
602 .DefaultUnreachable("Unexpected shaped type");
603 return sliceOp;
604}
605
606Operation *makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile,
607 ArrayRef<OpFoldResult> tileSizes, AffineMap map,
610 ArrayRef<OpFoldResult> subShapeSizes,
611 bool omitPartialTileCheck) {
612 SliceParameters sliceParams =
613 computeSliceParameters(builder, loc, valueToTile, tileSizes, map, lbs,
614 ubs, subShapeSizes, omitPartialTileCheck);
615 return materializeTiledShape(builder, loc, valueToTile, sliceParams);
616}
617
620 ArrayRef<OpFoldResult> tileSizes, AffineMap map,
622 ArrayRef<OpFoldResult> subShapeSizes,
623 bool omitPartialTileCheck) {
624 auto shapedType = dyn_cast<ShapedType>(valueToTile.getType());
625 assert(shapedType && "only shaped types can be tiled");
626 ArrayRef<int64_t> shape = shapedType.getShape();
627 int64_t rank = shapedType.getRank();
628
629 // Compute offsets/sizes/strides for the tile.
630 SliceParameters sliceParams;
631 sliceParams.offsets.reserve(rank);
632 sliceParams.sizes.reserve(rank);
633 sliceParams.strides.reserve(rank);
634 for (unsigned r = 0; r < rank; ++r) {
635 LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);
636 if (!isTiled(map.getSubMap({r}), tileSizes)) {
637 sliceParams.offsets.push_back(builder.getIndexAttr(0));
638 OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r);
639 sliceParams.sizes.push_back(dim);
640 sliceParams.strides.push_back(builder.getIndexAttr(1));
641 LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");
642 continue;
643 }
644 LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");
645
646 // Tiling creates a new slice at the proper index, the slice step is 1
647 // (i.e. the op does not subsample, stepping occurs in the loop).
648 auto m = map.getSubMap({r});
649 LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");
650 IRRewriter rewriter(builder);
651 // The offset of the slice is m(lbs) - m(0).
652 SmallVector<Attribute> zeros(lbs.size(), rewriter.getIndexAttr(0));
654 [[maybe_unused]] auto res = m.constantFold(zeros, mAtZero);
655 assert(succeeded(res) && "affine_map must be evaluatable (not symbols)");
656 int64_t mAtZeroInt =
657 cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
659 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
660 sliceParams.offsets.push_back(offset);
661
662 OpFoldResult closedIntSize =
663 makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes);
664 // Resulting size needs to be made half open interval again.
665 AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext());
666 OpFoldResult size =
667 makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize);
668 LLVM_DEBUG(llvm::dbgs()
669 << "computeSliceParameters: raw size: " << size << "\n");
670 LLVM_DEBUG(llvm::dbgs()
671 << "computeSliceParameters: new offset: " << offset << "\n");
672 sliceParams.strides.push_back(builder.getIndexAttr(1));
673
674 if (omitPartialTileCheck) {
675 // We statically know that the partial/boundary tile condition is
676 // unnecessary.
677 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
678 sliceParams.sizes.push_back(size);
679 continue;
680 }
681
682 // The size of the subview / extract_slice should be trimmed to avoid
683 // out-of-bounds accesses, unless:
684 // a. We statically know the subshape size divides the shape size evenly.
685 // b. The subshape size is 1. According to the way the loops are set up,
686 // tensors with "0" dimensions would never be constructed.
687 int64_t shapeSize = shape[r];
688 std::optional<int64_t> sizeCst = getConstantIntValue(size);
689 auto hasTileSizeOne = sizeCst == 1;
690 auto dividesEvenly = sizeCst && ShapedType::isStatic(shapeSize) &&
691 ((shapeSize % *sizeCst) == 0);
692 if (!hasTileSizeOne && !dividesEvenly) {
693 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize
694 << ", size: " << size
695 << ": make sure in bound with affine.min\n");
696
697 AffineExpr dim0, dim1, dim2;
698 MLIRContext *context = builder.getContext();
699 bindDims(context, dim0, dim1, dim2);
700
701 // Get the dimension size for this dimension. We need to first calculate
702 // the max index and then plus one. This is important because for
703 // convolution ops, we have its input window dimension's affine map of the
704 // form `(d0 * s0 + d1)`, where `d0`/`d1 is an output/filter window
705 // dimension and `s0` is stride. Directly use the dimension size of
706 // output/filer window dimensions will cause incorrect calculation.
708 {ArrayRef<AffineExpr>{dim0 - 1}}, context)
709 .front();
711 {ArrayRef<AffineExpr>{dim0 + 1}}, context)
712 .front();
713 SmallVector<OpFoldResult> maxIndices =
714 llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) {
715 return makeComposedFoldedAffineApply(rewriter, loc, minusOneMap,
716 {ub});
717 }));
718 OpFoldResult maxIndex =
719 makeComposedFoldedAffineApply(rewriter, loc, m, maxIndices);
720 OpFoldResult d =
721 makeComposedFoldedAffineApply(rewriter, loc, plusOneMap, {maxIndex});
722
723 // Compute min(dim - offset, size) to avoid out-of-bounds accesses.
725 {ArrayRef<AffineExpr>{dim1 - dim2, dim0}}, context)
726 .front();
727 size =
728 makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset});
729 }
730 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");
731 sliceParams.sizes.push_back(size);
732 }
733 return sliceParams;
734}
735
738 ArrayRef<OpFoldResult> tileSizes) {
740 for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
741 LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");
742 bool isTiled = !isZeroInteger(tileSizes[idx]);
743 offsets.push_back(isTiled ? ivs[idxIvs++] : b.getIndexAttr(0));
744 LLVM_DEBUG(llvm::dbgs()
745 << "computeTileOffsets: " << offsets.back() << "\n");
746 }
747 return offsets;
748}
749
751 ArrayRef<OpFoldResult> tileSizes,
752 ArrayRef<OpFoldResult> sizeBounds) {
754 for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
755 bool isTiled = !isZeroInteger(tileSizes[idx]);
756 // Before composing, we need to make range a closed interval.
757 OpFoldResult size = isTiled ? tileSizes[idx] : sizeBounds[idx];
758 AffineExpr d0 = getAffineDimExpr(0, b.getContext());
759 IRRewriter rewriter(b);
760 sizes.push_back(makeComposedFoldedAffineApply(rewriter, loc, d0 - 1, size));
761 LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");
762 }
763 return sizes;
764}
765
767 if (op.hasPureBufferSemantics())
768 return {};
769 return llvm::to_vector(
770 llvm::map_range(op.getDpsInitsMutable(), [&](OpOperand &opOperand) {
771 return operands[opOperand.getOperandNumber()].getType();
772 }));
773}
774
776 LinalgOp op, ValueRange operands,
777 ValueRange results) {
778 if (op.hasPureBufferSemantics())
779 return {};
780 SmallVector<Value> tensorResults;
781 tensorResults.reserve(results.size());
782 // Insert a insert_slice for each output tensor.
783 unsigned resultIdx = 0;
784 for (OpOperand &opOperand : op.getDpsInitsMutable()) {
785 // TODO: use an interface/adaptor to avoid leaking position in
786 // `tiledOperands`.
787 Value outputTensor = operands[opOperand.getOperandNumber()];
788 if (auto sliceOp = outputTensor.getDefiningOp<tensor::ExtractSliceOp>()) {
789 Value inserted = tensor::InsertSliceOp::create(
790 builder, loc, sliceOp.getSource().getType(), results[resultIdx],
791 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
792 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
793 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
794 tensorResults.push_back(inserted);
795 } else {
796 tensorResults.push_back(results[resultIdx]);
797 }
798 ++resultIdx;
799 }
800 return tensorResults;
801}
802
804computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp,
805 ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs,
806 ArrayRef<OpFoldResult> tileSizes,
807 ArrayRef<OpFoldResult> sizeBounds,
808 bool omitPartialTileCheck) {
809 assert(ivs.size() == static_cast<size_t>(llvm::count_if(
810 llvm::make_range(tileSizes.begin(), tileSizes.end()),
811 [](OpFoldResult v) { return !isZeroInteger(v); })) &&
812 "expected as many ivs as non-zero sizes");
813
814 // Construct (potentially temporary) mins and maxes on which to apply maps
815 // that define tile subshapes.
817 computeTileOffsets(builder, loc, ivs, tileSizes);
818 SmallVector<OpFoldResult> subShapeSizes =
819 computeTileSizes(builder, loc, tileSizes, sizeBounds);
820
821 assert(static_cast<int64_t>(valuesToTile.size()) <=
822 linalgOp->getNumOperands() &&
823 "more value to tile than operands.");
825 allSliceParams.reserve(valuesToTile.size());
826 for (auto [opOperand, val] :
827 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
828 Value shapedOp = val;
829 LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);
830 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
831 // Use `opOperand` as is if it is not tiled and not an output tensor. Having
832 // an extract/insert slice pair for all output tensors simplifies follow up
833 // transformations such as padding and bufferization since the
834 // extract/insert slice pairs make the accessed iteration argument
835 // subdomains explicit.
836
837 Type operandType = opOperand.get().getType();
838 if (!isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
839 linalgOp.isDpsInit(&opOperand))) {
840 allSliceParams.push_back(std::nullopt);
841 LLVM_DEBUG(llvm::dbgs()
842 << ": not tiled: use shape: " << operandType << "\n");
843 continue;
844 }
845 LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");
846
847 allSliceParams.push_back(computeSliceParameters(
848 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
849 omitPartialTileCheck));
850 }
851
852 return allSliceParams;
853}
854
856 LinalgOp linalgOp, ValueRange valuesToTile,
858 ArrayRef<OpFoldResult> tileSizes,
859 ArrayRef<OpFoldResult> sizeBounds,
860 bool omitPartialTileCheck) {
862 computeAllSliceParameters(builder, loc, linalgOp, valuesToTile, ivs,
863 tileSizes, sizeBounds, omitPartialTileCheck);
864 SmallVector<Value> tiledShapes;
865 for (auto item : llvm::zip(valuesToTile, allSliceParameter)) {
866 Value valueToTile = std::get<0>(item);
867 std::optional<SliceParameters> sliceParams = std::get<1>(item);
868 tiledShapes.push_back(
869 sliceParams.has_value()
870 ? materializeTiledShape(builder, loc, valueToTile, *sliceParams)
871 ->getResult(0)
872 : valueToTile);
873 }
874 return tiledShapes;
875}
876
877void offsetIndices(OpBuilder &b, LinalgOp linalgOp,
878 ArrayRef<OpFoldResult> offsets) {
879 IRRewriter rewriter(b);
880 offsetIndices(rewriter, linalgOp, offsets);
881}
882
883void offsetIndices(RewriterBase &b, LinalgOp linalgOp,
884 ArrayRef<OpFoldResult> offsets) {
885 if (!linalgOp.hasIndexSemantics())
886 return;
887
888 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
889 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
890 continue;
892 b.setInsertionPointAfter(indexOp);
893 AffineExpr index, offset;
894 bindDims(b.getContext(), index, offset);
896 b, indexOp.getLoc(), index + offset,
897 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
898 Value materialized =
899 getValueOrCreateConstantIndexOp(b, indexOp.getLoc(), applied);
900 b.replaceUsesWithIf(indexOp, materialized, [&](OpOperand &use) {
901 return use.getOwner() != materialized.getDefiningOp();
902 });
903 }
904}
905
906/// Get the reassociation maps to fold the result of a extract_slice (or source
907/// of a insert_slice) operation with given offsets, and sizes to its
908/// rank-reduced version. This is only done for the cases where the size is 1
909/// and offset is 0. Strictly speaking the offset 0 is not required in general,
910/// but non-zero offsets are not handled by SPIR-V backend at this point (and
911/// potentially cannot be handled).
912std::optional<SmallVector<ReassociationIndices>>
916 for (const auto &it : llvm::enumerate(mixedSizes)) {
917 auto dim = it.index();
918 auto size = it.value();
919 curr.push_back(dim);
920 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
921 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
922 continue;
923 reassociation.emplace_back(ReassociationIndices{});
924 std::swap(reassociation.back(), curr);
925 }
926 // When the reassociations are not empty, then fold the remaining
927 // unit-dimensions into the last dimension. If the reassociations so far is
928 // empty, then leave it emtpy. This will fold everything to a rank-0 tensor.
929 if (!curr.empty() && !reassociation.empty())
930 reassociation.back().append(curr.begin(), curr.end());
931 return reassociation;
932}
933
934} // namespace linalg
935} // namespace mlir
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
Definition Utils.cpp:150
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
Definition Utils.cpp:74
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
Definition Utils.cpp:126
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Definition PDL.cpp:62
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
Affine binary operation expression.
Definition AffineExpr.h:214
AffineExpr getLHS() const
AffineExpr getRHS() const
A dimensional identifier appearing in an affine expression.
Definition AffineExpr.h:223
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
Definition AffineExpr.h:68
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr > > exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Attributes are known-constant values of operations.
Definition Attributes.h:25
Block represents an ordered list of Operations.
Definition Block.h:33
BlockArgument getArgument(unsigned i)
Definition Block.h:129
unsigned getNumArguments()
Definition Block.h:128
OpListType & getOperations()
Definition Block.h:137
Operation & front()
Definition Block.h:153
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
Definition Block.h:212
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:108
MLIRContext * getContext() const
Definition Builders.h:56
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:348
This class helps build Operations.
Definition Builders.h:207
This class represents a single result from folding an operation.
This class represents an operand of an operation.
Definition Value.h:257
This is a value defined by a result of an operation.
Definition Value.h:457
unsigned getResultNumber() const
Returns the number of this result.
Definition Value.h:469
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & front()
Definition Region.h:65
bool hasOneBlock()
Return true if this region has exactly one block.
Definition Region.h:68
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
Definition Types.cpp:108
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18
Operation * getOwner() const
Return the owner of this operand.
Definition UseDefLists.h:38
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
SmallVector< int64_t > getUnPackInverseSrcPerm(linalg::UnPackOp, PackingMetadata &metadata)
Compute inverse permutation for the source tensor (i.e.
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
Definition Utils.cpp:855
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
Definition Utils.cpp:195
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
Definition Utils.cpp:230
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
Definition Utils.cpp:750
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
Definition Utils.cpp:306
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
Definition Utils.cpp:426
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
Definition Utils.cpp:736
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
Definition Utils.cpp:234
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
Definition Utils.cpp:201
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
Definition Utils.cpp:913
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
Definition Utils.h:251
@ None
No Distribution.
Definition Utils.h:296
@ CyclicNumProcsGeNumIters
Cyclic distribution where the number of processors can be assumed to be more than or equal to the num...
Definition Utils.h:281
@ Cyclic
Cyclic distribution where no assumption is made about the dynamic relationship between number of proc...
Definition Utils.h:263
@ CyclicNumProcsEqNumIters
Cyclic distribution where the number of processors can be assumed to be equal to the number of iterat...
Definition Utils.h:293
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
Definition Utils.cpp:775
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
Definition Utils.cpp:215
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
Definition Utils.cpp:877
SmallVector< int64_t > getPackInverseDestPerm(linalg::PackOp packOp, PackingMetadata &metadata)
Compute inverse permutation for the destination tensor (i.e.
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Definition Utils.cpp:804
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Definition Utils.cpp:606
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
Definition Utils.cpp:587
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
Definition Utils.cpp:238
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
Definition Utils.cpp:405
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Definition Utils.cpp:766
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
Definition Utils.cpp:619
auto m_Val(Value v)
Definition Matchers.h:539
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Definition SCF.cpp:837
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
Definition SCF.h:64
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Definition Utils.cpp:23
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition Matchers.h:490
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition AffineExpr.h:311
detail::NameOpMatcher m_Op(StringRef opName)
Matches a named operation.
Definition Matchers.h:379
@ Mul
RHS of mul is always a constant or a symbolic expression.
Definition AffineExpr.h:43
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
llvm::TypeSwitch< T, ResultT > TypeSwitch
Definition LLVM.h:144
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
detail::op_matcher< OpClass > m_Op()
Matches the given OpClass.
Definition Matchers.h:484
SmallVector< int64_t, 2 > ReassociationIndices
Definition Utils.h:27
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition Matchers.h:369
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Definition Utils.h:103
Value _and(Value lhs, Value rhs)
Definition Utils.cpp:311
Value slt(Value lhs, Value rhs)
Definition Utils.cpp:334
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Definition Utils.h:365
static void doit(OpBuilder &b, Location loc, ArrayRef< Range > loopRanges, LinalgOp linalgOp, ArrayRef< utils::IteratorType > iteratorTypes, function_ref< scf::ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilderFn, ArrayRef< linalg::ProcInfo > procInfo={})
Callback function type used to get processor ID, and number of processors used for distribution for a...
Definition Utils.h:301
DistributionMethod distributionMethod
Definition Utils.h:304
static std::optional< BinaryOpKind > matchAsScalarBinaryOp(GenericOp op)
Matches the given linalg op if its body is performing binary operation on int or float scalar values ...
Definition Utils.cpp:93
A struct containg offsets-sizes-strides arguments of the tiled shape.
Definition Utils.h:147
SmallVector< OpFoldResult > strides
Definition Utils.h:150
SmallVector< OpFoldResult > sizes
Definition Utils.h:149
SmallVector< OpFoldResult > offsets
Definition Utils.h:148
LoopVector loops
Definition SCF.h:67