36 #include "llvm/ADT/TypeSwitch.h"
37 #include "llvm/Support/Debug.h"
40 #define DEBUG_TYPE "linalg-utils"
43 using namespace presburger;
68 assert(cast<AffineConstantExpr>(expr.
getRHS()).getValue() > 0 &&
69 "nonpositive multiplying coefficient");
80 TileCheck t(tileSizes);
95 std::optional<RegionMatcher::BinaryOpKind>
96 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
97 auto ®ion = op.getRegion();
98 if (!llvm::hasSingleElement(region))
115 auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
116 if (addPattern.match(&ops.back()))
117 return BinaryOpKind::IAdd;
133 for (
Range range : ranges) {
155 PackingMetadata &packingMetadata) {
158 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
159 packingMetadata = computePackingMetadata(rank,
innerDimsPos);
164 if (!outerPerm.empty())
171 return packInverseDestPermutation;
179 PackingMetadata pMetadata;
180 int64_t packedRank = packOp.getDestType().getRank();
185 return packInvDestPerm;
189 PackingMetadata metadata;
194 PackingMetadata &metadata) {
195 int64_t unpackRank = unpackOp.getSourceType().getRank();
200 return unpackInvSrcPerm;
204 return llvm::all_of(op.getIndexingMapsArray(), [](
AffineMap m) {
205 return m.isProjectedPermutation(true);
210 if (!llvm::hasSingleElement(r))
213 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
214 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
216 llvm::any_of(op.getResultTypes(),
217 [](
Type type) { return !type.isIntOrIndexOrFloat(); }))
224 if (op.getNumLoops() != op.getNumParallelLoops())
231 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
232 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
239 return iteratorType == utils::IteratorType::parallel;
243 return iteratorType == utils::IteratorType::reduction;
249 auto sliceOp = source.
getDefiningOp<tensor::ExtractSliceOp>();
254 Value current = sliceOp.getSource();
259 OpResult opResult = cast<OpResult>(current);
260 current = linalgOp.getDpsInitOperand(opResult.
getResultNumber())->get();
262 auto padOp = current ? current.
getDefiningOp<tensor::PadOp>() :
nullptr;
270 if (sliceOp.getSource().getType() != type)
274 if (llvm::any_of(padOp.getMixedLowPad(), [](
OpFoldResult ofr) {
275 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
281 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
283 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
289 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
290 [](std::tuple<OpFoldResult, OpFoldResult> it) {
291 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
297 Value padOpPad = padOp.getConstantPaddingValue();
303 return sliceOp.getSource();
307 auto memrefTypeTo = cast<MemRefType>(to.
getType());
309 auto memrefTypeFrom = cast<MemRefType>(from.
getType());
310 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
311 "`from` and `to` memref must have the same rank");
317 utils::IteratorType::parallel);
318 return b.
create<linalg::GenericOp>(
325 b.
create<linalg::YieldOp>(loc, args.front());
338 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
339 "expected as many entries for proc info as number of loops, even if "
340 "they are null entries");
342 if (!linalgOp.hasPureBufferSemantics())
343 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
347 b, loc, lbs, ubs, steps, iterArgInitValues,
349 assert(iterArgs.size() == iterArgInitValues.size() &&
350 "expect the number of output tensors and iter args to match");
352 if (!iterArgs.empty()) {
353 operandValuesToUse = linalgOp.getDpsInputs();
354 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
356 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
359 if (loopNest.
loops.empty() || procInfo.empty())
364 if (procInfo[loop.index()].distributionMethod ==
365 DistributionMethod::Cyclic) {
367 procInfo[loop.index()].nprocs);
382 if (!linalgOp.hasPureBufferSemantics())
383 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
384 assert(iterArgInitValues.empty() &&
"unexpected AffineForOp init values");
390 constantSteps.reserve(steps.size());
391 for (
Value v : steps) {
393 assert(constVal.has_value() &&
"Affine loops require constant steps");
394 constantSteps.push_back(constVal.value());
399 bodyBuilderFn(b, loc, ivs,
400 linalgOp->getOperands());
432 assert(lbs.size() == ubs.size());
433 assert(lbs.size() == steps.size());
434 assert(lbs.size() == iteratorTypes.size());
435 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
439 if (iteratorTypes.empty()) {
440 bodyBuilderFn(b, loc, ivStorage);
448 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
450 ivStorage.append(ivs.begin(), ivs.end());
451 generateParallelLoopNest(
452 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
453 iteratorTypes.drop_front(),
454 procInfo.empty() ? procInfo : procInfo.drop_front(),
455 bodyBuilderFn, ivStorage);
460 unsigned nLoops = iteratorTypes.size();
461 unsigned numProcessed = 0;
463 if (procInfo.empty()) {
466 distributionMethod = procInfo.front().distributionMethod;
475 auto remainderProcInfo =
476 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
477 switch (distributionMethod) {
481 b.
create<scf::ParallelOp>(
482 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
483 steps.take_front(numProcessed),
485 ivStorage.append(localIvs.begin(), localIvs.end());
487 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
488 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
489 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
490 bodyBuilderFn, ivStorage);
494 case DistributionMethod::Cyclic: {
497 b.
create<scf::ParallelOp>(
498 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
499 steps.take_front(numProcessed),
501 ivStorage.append(localIvs.begin(), localIvs.end());
503 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
504 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
505 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
506 bodyBuilderFn, ivStorage);
510 case DistributionMethod::CyclicNumProcsGeNumIters: {
513 Value cond = ab.
slt(lbs[0], ubs[0]);
514 for (
unsigned i = 1; i < numProcessed; ++i)
515 cond = ab.
_and(cond, ab.
slt(lbs[i], ubs[i]));
516 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
519 ubs.drop_front(numProcessed),
520 steps.drop_front(numProcessed),
521 iteratorTypes.drop_front(numProcessed),
522 remainderProcInfo, bodyBuilderFn, ivStorage);
527 case DistributionMethod::CyclicNumProcsEqNumIters:
530 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
532 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
533 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
534 remainderProcInfo, bodyBuilderFn, ivStorage);
549 if (!linalgOp.hasPureBufferSemantics())
550 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
551 assert(iterArgInitValues.empty() &&
"unexpected ParallelOp init values");
553 assert(iteratorTypes.size() >= loopRanges.size() &&
554 "expected iterator type for all ranges");
555 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
556 "expected proc information for all loops when present");
557 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
559 unsigned numLoops = iteratorTypes.size();
560 ivs.reserve(numLoops);
561 lbsStorage.reserve(numLoops);
562 ubsStorage.reserve(numLoops);
563 stepsStorage.reserve(numLoops);
566 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
572 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
573 ubsStorage[it.index()], stepsStorage[it.index()]);
576 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
578 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
580 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
584 assert(ivs.size() == iteratorTypes.size() &&
"did not generate enough loops");
590 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
592 .Case([&](MemRefType) {
593 return builder.
create<memref::SubViewOp>(
594 loc, valueToTile, sliceParams.
offsets,
597 .Case([&](RankedTensorType) {
598 return builder.
create<tensor::ExtractSliceOp>(
599 loc, valueToTile, sliceParams.
offsets,
603 llvm_unreachable(
"Unexpected shaped type");
613 bool omitPartialTileCheck) {
616 ubs, subShapeSizes, omitPartialTileCheck);
625 bool omitPartialTileCheck) {
626 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
627 assert(shapedType &&
"only shaped types can be tiled");
629 int64_t rank = shapedType.getRank();
633 sliceParams.
offsets.reserve(rank);
634 sliceParams.
sizes.reserve(rank);
635 sliceParams.
strides.reserve(rank);
636 for (
unsigned r = 0; r < rank; ++r) {
637 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: for dim#" << r);
641 sliceParams.
sizes.push_back(dim);
643 LLVM_DEBUG(llvm::dbgs() <<
": not tiled: use size: " << dim <<
"\n");
646 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subsize...\n");
651 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: submap: " << m <<
"\n");
656 [[maybe_unused]]
auto res = m.constantFold(zeros, mAtZero);
657 assert(succeeded(res) &&
"affine_map must be evaluatable (not symbols)");
659 cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
661 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
662 sliceParams.
offsets.push_back(offset);
670 LLVM_DEBUG(llvm::dbgs()
671 <<
"computeSliceParameters: raw size: " << size <<
"\n");
672 LLVM_DEBUG(llvm::dbgs()
673 <<
"computeSliceParameters: new offset: " << offset <<
"\n");
676 if (omitPartialTileCheck) {
679 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
680 sliceParams.
sizes.push_back(size);
689 int64_t shapeSize = shape[r];
691 auto hasTileSizeOne = sizeCst && *sizeCst == 1;
692 auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
693 ((shapeSize % *sizeCst) == 0);
694 if (!hasTileSizeOne && !dividesEvenly) {
695 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: shapeSize=" << shapeSize
696 <<
", size: " << size
697 <<
": make sure in bound with affine.min\n");
701 bindDims(context, dim0, dim1, dim2);
716 llvm::to_vector(llvm::map_range(ubs, [&](
OpFoldResult ub) {
732 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
733 sliceParams.
sizes.push_back(size);
742 for (
unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
743 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for loop#" << idx <<
"\n");
746 LLVM_DEBUG(llvm::dbgs()
747 <<
"computeTileOffsets: " << offsets.back() <<
"\n");
756 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
763 LLVM_DEBUG(llvm::dbgs() <<
"computeTileSizes: " << sizes.back() <<
"\n");
769 if (op.hasPureBufferSemantics())
771 return llvm::to_vector(
772 llvm::map_range(op.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
773 return operands[opOperand.getOperandNumber()].getType();
780 if (op.hasPureBufferSemantics())
783 tensorResults.reserve(results.size());
785 unsigned resultIdx = 0;
786 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
789 Value outputTensor = operands[opOperand.getOperandNumber()];
790 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
791 Value inserted = builder.
create<tensor::InsertSliceOp>(
792 loc, sliceOp.getSource().getType(), results[resultIdx],
793 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
794 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
795 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
796 tensorResults.push_back(inserted);
798 tensorResults.push_back(results[resultIdx]);
802 return tensorResults;
810 bool omitPartialTileCheck) {
811 assert(ivs.size() ==
static_cast<size_t>(llvm::count_if(
812 llvm::make_range(tileSizes.begin(), tileSizes.end()),
814 "expected as many ivs as non-zero sizes");
823 assert(
static_cast<int64_t
>(valuesToTile.size()) <=
824 linalgOp->getNumOperands() &&
825 "more value to tile than operands.");
827 allSliceParams.reserve(valuesToTile.size());
828 for (
auto [opOperand, val] :
829 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
830 Value shapedOp = val;
831 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for operand " << shapedOp);
832 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
839 Type operandType = opOperand.get().getType();
840 if (!
isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
841 linalgOp.isDpsInit(&opOperand))) {
842 allSliceParams.push_back(std::nullopt);
843 LLVM_DEBUG(llvm::dbgs()
844 <<
": not tiled: use shape: " << operandType <<
"\n");
847 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subshape...\n");
850 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
851 omitPartialTileCheck));
854 return allSliceParams;
862 bool omitPartialTileCheck) {
865 tileSizes, sizeBounds, omitPartialTileCheck);
867 for (
auto item : llvm::zip(valuesToTile, allSliceParameter)) {
868 Value valueToTile = std::get<0>(item);
869 std::optional<SliceParameters> sliceParams = std::get<1>(item);
870 tiledShapes.push_back(
871 sliceParams.has_value()
887 if (!linalgOp.hasIndexSemantics())
890 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
891 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
898 b, indexOp.getLoc(), index + offset,
899 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
914 std::optional<SmallVector<ReassociationIndices>>
919 auto dim = it.index();
920 auto size = it.value();
922 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
923 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
926 std::swap(reassociation.back(), curr);
931 if (!curr.empty() && !reassociation.empty())
932 reassociation.back().append(curr.begin(), curr.end());
933 return reassociation;
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
SmallVector< int64_t > innerDimsPos
Affine binary operation expression.
AffineExpr getLHS() const
AffineExpr getRHS() const
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
OpListType & getOperations()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceUsesWithIf(Value from, Value to, function_ref< bool(OpOperand &)> functor, bool *allUsesReplaced=nullptr)
Find uses of from and replace them with to if the functor returns true.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
SmallVector< int64_t > getPackInverseDestPerm(PackOp packOp)
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
SmallVector< int64_t > getUnPackInverseSrcPerm(UnPackOp unpackOp, PackingMetadata &metadata)
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, SmallVector< Value > dynOutDim={})
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
bool isZeroIndex(OpFoldResult v)
Return true if v is an IntegerAttr with value 0 of a ConstantIndexOp with attribute with value 0.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
@ Mul
RHS of mul is always a constant or a symbolic expression.
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Value _and(Value lhs, Value rhs)
Value slt(Value lhs, Value rhs)
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Callback function type used to get processor ID, and number of processors used for distribution for a...
DistributionMethod distributionMethod
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > strides
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets