33#include "llvm/ADT/TypeSwitch.h"
34#include "llvm/Support/Debug.h"
37#define DEBUG_TYPE "linalg-utils"
65 assert(cast<AffineConstantExpr>(expr.
getRHS()).getValue() > 0 &&
66 "nonpositive multiplying coefficient");
77 TileCheck t(tileSizes);
92std::optional<RegionMatcher::BinaryOpKind>
94 auto ®ion = op.getRegion();
95 if (!region.hasOneBlock())
113 if (addPattern.match(&ops.back()))
130 for (
Range range : ranges) {
149static SmallVector<int64_t>
152 PackingMetadata &packingMetadata) {
153 int64_t numPackedDims = innerDimsPos.size();
155 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
156 packingMetadata = computePackingMetadata(rank, innerDimsPos);
161 if (!outerPerm.empty())
168 return packInverseDestPermutation;
175 PackingMetadata &metadata) {
177 int64_t packedRank = packOp.getDestType().getRank();
182 return packInvDestPerm;
186 PackingMetadata &metadata) {
187 int64_t packedRank = unpackOp.getSourceType().getRank();
192 return unpackInvSrcPerm;
196 return llvm::all_of(op.getIndexingMapsArray(), [](
AffineMap m) {
197 return m.isProjectedPermutation(true);
205 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
206 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
208 llvm::any_of(op.getResultTypes(),
209 [](
Type type) { return !type.isIntOrIndexOrFloat(); }))
216 if (op.getNumLoops() != op.getNumParallelLoops())
223 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
224 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
231 return iteratorType == utils::IteratorType::parallel;
235 return iteratorType == utils::IteratorType::reduction;
242 auto sliceOp = source.
getDefiningOp<tensor::ExtractSliceOp>();
248 Value current = sliceOp.getSource();
253 OpResult opResult = cast<OpResult>(current);
254 current = linalgOp.getDpsInitOperand(opResult.
getResultNumber())->get();
256 auto padOp = current ? current.
getDefiningOp<tensor::PadOp>() :
nullptr;
265 if (sliceOp.getSource().getType() != type)
270 if (llvm::any_of(padOp.getMixedLowPad(), [](
OpFoldResult ofr) {
271 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
278 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
280 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
287 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
288 [](std::tuple<OpFoldResult, OpFoldResult> it) {
289 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
296 Value padOpPad = padOp.getConstantPaddingValue();
303 return sliceOp.getSource();
307 auto memrefTypeTo = cast<MemRefType>(to.
getType());
309 auto memrefTypeFrom = cast<MemRefType>(from.
getType());
310 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
311 "`from` and `to` memref must have the same rank");
317 utils::IteratorType::parallel);
318 return linalg::GenericOp::create(
325 linalg::YieldOp::create(
b, loc, args.front());
338 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
339 "expected as many entries for proc info as number of loops, even if "
340 "they are null entries");
342 if (!linalgOp.hasPureBufferSemantics())
343 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
347 b, loc, lbs, ubs, steps, iterArgInitValues,
349 assert(iterArgs.size() == iterArgInitValues.size() &&
350 "expect the number of output tensors and iter args to match");
352 if (!iterArgs.empty()) {
353 operandValuesToUse = linalgOp.getDpsInputs();
354 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
356 return bodyBuilderFn(
b, loc, ivs, operandValuesToUse);
359 if (loopNest.
loops.empty() || procInfo.empty())
363 for (
const auto &loop : llvm::enumerate(loopNest.
loops)) {
364 if (procInfo[loop.index()].distributionMethod ==
366 mapLoopToProcessorIds(loop.value(), procInfo[loop.index()].procId,
367 procInfo[loop.index()].nprocs);
382 if (!linalgOp.hasPureBufferSemantics())
383 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
384 assert(iterArgInitValues.empty() &&
"unexpected AffineForOp init values");
390 constantSteps.reserve(steps.size());
391 for (
Value v : steps) {
393 assert(constVal.has_value() &&
"Affine loops require constant steps");
394 constantSteps.push_back(constVal.value());
399 bodyBuilderFn(
b, loc, ivs,
400 linalgOp->getOperands());
432 assert(lbs.size() == ubs.size());
433 assert(lbs.size() == steps.size());
434 assert(lbs.size() == iteratorTypes.size());
435 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
439 if (iteratorTypes.empty()) {
440 bodyBuilderFn(
b, loc, ivStorage);
448 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
450 ivStorage.append(ivs.begin(), ivs.end());
451 generateParallelLoopNest(
452 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
453 iteratorTypes.drop_front(),
454 procInfo.empty() ? procInfo : procInfo.drop_front(),
455 bodyBuilderFn, ivStorage);
460 unsigned nLoops = iteratorTypes.size();
461 unsigned numProcessed = 0;
463 if (procInfo.empty()) {
466 distributionMethod = procInfo.front().distributionMethod;
475 auto remainderProcInfo =
476 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
477 switch (distributionMethod) {
481 scf::ParallelOp::create(
482 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
483 steps.take_front(numProcessed),
485 ivStorage.append(localIvs.begin(), localIvs.end());
486 generateParallelLoopNest(
487 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
488 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
489 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
490 bodyBuilderFn, ivStorage);
497 scf::ParallelOp::create(
498 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
499 steps.take_front(numProcessed),
501 ivStorage.append(localIvs.begin(), localIvs.end());
502 generateParallelLoopNest(
503 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
504 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
505 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
506 bodyBuilderFn, ivStorage);
513 Value cond = ab.
slt(lbs[0], ubs[0]);
514 for (
unsigned i = 1; i < numProcessed; ++i)
515 cond = ab.
_and(cond, ab.
slt(lbs[i], ubs[i]));
516 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
519 ubs.drop_front(numProcessed),
520 steps.drop_front(numProcessed),
521 iteratorTypes.drop_front(numProcessed),
522 remainderProcInfo, bodyBuilderFn, ivStorage);
530 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
532 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
533 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
534 remainderProcInfo, bodyBuilderFn, ivStorage);
549 if (!linalgOp.hasPureBufferSemantics())
550 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
551 assert(iterArgInitValues.empty() &&
"unexpected ParallelOp init values");
553 assert(iteratorTypes.size() >= loopRanges.size() &&
554 "expected iterator type for all ranges");
555 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
556 "expected proc information for all loops when present");
557 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
559 unsigned numLoops = iteratorTypes.size();
560 ivs.reserve(numLoops);
561 lbsStorage.reserve(numLoops);
562 ubsStorage.reserve(numLoops);
563 stepsStorage.reserve(numLoops);
566 unpackRanges(
b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
569 for (
const auto &it : llvm::enumerate(procInfo)) {
572 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
573 ubsStorage[it.index()], stepsStorage[it.index()]);
576 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
578 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
580 bodyBuilderFn(
b, loc, ivs, linalgOp->getOperands());
584 assert(ivs.size() == iteratorTypes.size() &&
"did not generate enough loops");
590 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
592 .Case([&](MemRefType) {
593 return memref::SubViewOp::create(
594 builder, loc, valueToTile, sliceParams.
offsets,
597 .Case([&](RankedTensorType) {
598 return tensor::ExtractSliceOp::create(
599 builder, loc, valueToTile, sliceParams.
offsets,
602 .DefaultUnreachable(
"Unexpected shaped type");
611 bool omitPartialTileCheck) {
614 ubs, subShapeSizes, omitPartialTileCheck);
623 bool omitPartialTileCheck) {
624 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
625 assert(shapedType &&
"only shaped types can be tiled");
627 int64_t rank = shapedType.getRank();
631 sliceParams.
offsets.reserve(rank);
632 sliceParams.
sizes.reserve(rank);
633 sliceParams.
strides.reserve(rank);
634 for (
unsigned r = 0; r < rank; ++r) {
635 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: for dim#" << r);
639 sliceParams.
sizes.push_back(dim);
641 LLVM_DEBUG(llvm::dbgs() <<
": not tiled: use size: " << dim <<
"\n");
644 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subsize...\n");
649 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: submap: " << m <<
"\n");
654 [[maybe_unused]]
auto res = m.constantFold(zeros, mAtZero);
655 assert(succeeded(res) &&
"affine_map must be evaluatable (not symbols)");
657 cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
659 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
660 sliceParams.
offsets.push_back(offset);
668 LLVM_DEBUG(llvm::dbgs()
669 <<
"computeSliceParameters: raw size: " << size <<
"\n");
670 LLVM_DEBUG(llvm::dbgs()
671 <<
"computeSliceParameters: new offset: " << offset <<
"\n");
674 if (omitPartialTileCheck) {
677 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
678 sliceParams.
sizes.push_back(size);
689 auto hasTileSizeOne = sizeCst == 1;
690 auto dividesEvenly = sizeCst && ShapedType::isStatic(shapeSize) &&
691 ((shapeSize % *sizeCst) == 0);
692 if (!hasTileSizeOne && !dividesEvenly) {
693 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: shapeSize=" << shapeSize
694 <<
", size: " << size
695 <<
": make sure in bound with affine.min\n");
699 bindDims(context, dim0, dim1, dim2);
730 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
731 sliceParams.
sizes.push_back(size);
740 for (
unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
741 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for loop#" << idx <<
"\n");
743 offsets.push_back(
isTiled ? ivs[idxIvs++] :
b.getIndexAttr(0));
744 LLVM_DEBUG(llvm::dbgs()
745 <<
"computeTileOffsets: " << offsets.back() <<
"\n");
754 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
761 LLVM_DEBUG(llvm::dbgs() <<
"computeTileSizes: " << sizes.back() <<
"\n");
767 if (op.hasPureBufferSemantics())
769 return llvm::to_vector(
770 llvm::map_range(op.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
771 return operands[opOperand.getOperandNumber()].getType();
778 if (op.hasPureBufferSemantics())
781 tensorResults.reserve(results.size());
783 unsigned resultIdx = 0;
784 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
787 Value outputTensor = operands[opOperand.getOperandNumber()];
788 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
790 builder, loc, sliceOp.getSource().getType(), results[resultIdx],
791 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
792 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
793 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
796 tensorResults.push_back(results[resultIdx]);
800 return tensorResults;
808 bool omitPartialTileCheck) {
809 assert(ivs.size() ==
static_cast<size_t>(llvm::count_if(
810 llvm::make_range(tileSizes.begin(), tileSizes.end()),
812 "expected as many ivs as non-zero sizes");
821 assert(
static_cast<int64_t>(valuesToTile.size()) <=
822 linalgOp->getNumOperands() &&
823 "more value to tile than operands.");
825 allSliceParams.reserve(valuesToTile.size());
826 for (
auto [opOperand, val] :
827 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
828 Value shapedOp = val;
829 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for operand " << shapedOp);
830 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
837 Type operandType = opOperand.get().getType();
838 if (!
isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
839 linalgOp.isDpsInit(&opOperand))) {
840 allSliceParams.push_back(std::nullopt);
841 LLVM_DEBUG(llvm::dbgs()
842 <<
": not tiled: use shape: " << operandType <<
"\n");
845 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subshape...\n");
848 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
849 omitPartialTileCheck));
852 return allSliceParams;
860 bool omitPartialTileCheck) {
863 tileSizes, sizeBounds, omitPartialTileCheck);
865 for (
auto item : llvm::zip(valuesToTile, allSliceParameter)) {
866 Value valueToTile = std::get<0>(item);
867 std::optional<SliceParameters> sliceParams = std::get<1>(item);
868 tiledShapes.push_back(
869 sliceParams.has_value()
885 if (!linalgOp.hasIndexSemantics())
888 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
889 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
892 b.setInsertionPointAfter(indexOp);
896 b, indexOp.getLoc(),
index + offset,
897 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
900 b.replaceUsesWithIf(indexOp, materialized, [&](
OpOperand &use) {
912std::optional<SmallVector<ReassociationIndices>>
916 for (
const auto &it : llvm::enumerate(mixedSizes)) {
917 auto dim = it.index();
918 auto size = it.value();
920 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
921 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
924 std::swap(reassociation.back(), curr);
929 if (!curr.empty() && !reassociation.empty())
930 reassociation.back().append(curr.begin(), curr.end());
931 return reassociation;
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted(the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
Affine binary operation expression.
AffineExpr getLHS() const
AffineExpr getRHS() const
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr > > exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
OpListType & getOperations()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
SmallVector< int64_t > getUnPackInverseSrcPerm(linalg::UnPackOp, PackingMetadata &metadata)
Compute inverse permutation for the source tensor (i.e.
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
@ CyclicNumProcsGeNumIters
Cyclic distribution where the number of processors can be assumed to be more than or equal to the num...
@ Cyclic
Cyclic distribution where no assumption is made about the dynamic relationship between number of proc...
@ CyclicNumProcsEqNumIters
Cyclic distribution where the number of processors can be assumed to be equal to the number of iterat...
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
SmallVector< int64_t > getPackInverseDestPerm(linalg::PackOp packOp, PackingMetadata &metadata)
Compute inverse permutation for the destination tensor (i.e.
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
detail::NameOpMatcher m_Op(StringRef opName)
Matches a named operation.
@ Mul
RHS of mul is always a constant or a symbolic expression.
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
llvm::TypeSwitch< T, ResultT > TypeSwitch
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::op_matcher< OpClass > m_Op()
Matches the given OpClass.
SmallVector< int64_t, 2 > ReassociationIndices
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
llvm::function_ref< Fn > function_ref
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Value _and(Value lhs, Value rhs)
Value slt(Value lhs, Value rhs)
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
static void doit(OpBuilder &b, Location loc, ArrayRef< Range > loopRanges, LinalgOp linalgOp, ArrayRef< utils::IteratorType > iteratorTypes, function_ref< scf::ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilderFn, ArrayRef< linalg::ProcInfo > procInfo={})
Callback function type used to get processor ID, and number of processors used for distribution for a...
DistributionMethod distributionMethod
static std::optional< BinaryOpKind > matchAsScalarBinaryOp(GenericOp op)
Matches the given linalg op if its body is performing binary operation on int or float scalar values ...
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > strides
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets