33 #include "llvm/ADT/TypeSwitch.h"
34 #include "llvm/Support/Debug.h"
37 #define DEBUG_TYPE "linalg-utils"
40 using namespace presburger;
65 assert(cast<AffineConstantExpr>(expr.
getRHS()).getValue() > 0 &&
66 "nonpositive multiplying coefficient");
77 TileCheck t(tileSizes);
92 std::optional<RegionMatcher::BinaryOpKind>
93 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
94 auto ®ion = op.getRegion();
95 if (!region.hasOneBlock())
112 auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
113 if (addPattern.match(&ops.back()))
114 return BinaryOpKind::IAdd;
130 for (
Range range : ranges) {
152 PackingMetadata &packingMetadata) {
155 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
156 packingMetadata = computePackingMetadata(rank,
innerDimsPos);
161 if (!outerPerm.empty())
168 return packInverseDestPermutation;
176 PackingMetadata pMetadata;
177 int64_t packedRank = packOp.getDestType().getRank();
182 return packInvDestPerm;
186 PackingMetadata metadata;
191 PackingMetadata &metadata) {
192 int64_t unpackRank = unpackOp.getSourceType().getRank();
197 return unpackInvSrcPerm;
201 return llvm::all_of(op.getIndexingMapsArray(), [](
AffineMap m) {
202 return m.isProjectedPermutation(true);
210 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
211 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
213 llvm::any_of(op.getResultTypes(),
214 [](
Type type) { return !type.isIntOrIndexOrFloat(); }))
221 if (op.getNumLoops() != op.getNumParallelLoops())
228 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
229 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
236 return iteratorType == utils::IteratorType::parallel;
240 return iteratorType == utils::IteratorType::reduction;
247 auto sliceOp = source.
getDefiningOp<tensor::ExtractSliceOp>();
253 Value current = sliceOp.getSource();
258 OpResult opResult = cast<OpResult>(current);
259 current = linalgOp.getDpsInitOperand(opResult.
getResultNumber())->get();
261 auto padOp = current ? current.
getDefiningOp<tensor::PadOp>() :
nullptr;
270 if (sliceOp.getSource().getType() != type)
275 if (llvm::any_of(padOp.getMixedLowPad(), [](
OpFoldResult ofr) {
276 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
283 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
285 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
292 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
293 [](std::tuple<OpFoldResult, OpFoldResult> it) {
294 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
301 Value padOpPad = padOp.getConstantPaddingValue();
308 return sliceOp.getSource();
312 auto memrefTypeTo = cast<MemRefType>(to.
getType());
314 auto memrefTypeFrom = cast<MemRefType>(from.
getType());
315 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
316 "`from` and `to` memref must have the same rank");
322 utils::IteratorType::parallel);
323 return linalg::GenericOp::create(
330 linalg::YieldOp::create(b, loc, args.front());
343 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
344 "expected as many entries for proc info as number of loops, even if "
345 "they are null entries");
347 if (!linalgOp.hasPureBufferSemantics())
348 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
352 b, loc, lbs, ubs, steps, iterArgInitValues,
354 assert(iterArgs.size() == iterArgInitValues.size() &&
355 "expect the number of output tensors and iter args to match");
357 if (!iterArgs.empty()) {
358 operandValuesToUse = linalgOp.getDpsInputs();
359 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
361 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
364 if (loopNest.
loops.empty() || procInfo.empty())
369 if (procInfo[loop.index()].distributionMethod ==
370 DistributionMethod::Cyclic) {
372 procInfo[loop.index()].nprocs);
387 if (!linalgOp.hasPureBufferSemantics())
388 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
389 assert(iterArgInitValues.empty() &&
"unexpected AffineForOp init values");
395 constantSteps.reserve(steps.size());
396 for (
Value v : steps) {
398 assert(constVal.has_value() &&
"Affine loops require constant steps");
399 constantSteps.push_back(constVal.value());
404 bodyBuilderFn(b, loc, ivs,
405 linalgOp->getOperands());
437 assert(lbs.size() == ubs.size());
438 assert(lbs.size() == steps.size());
439 assert(lbs.size() == iteratorTypes.size());
440 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
444 if (iteratorTypes.empty()) {
445 bodyBuilderFn(b, loc, ivStorage);
453 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
455 ivStorage.append(ivs.begin(), ivs.end());
456 generateParallelLoopNest(
457 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
458 iteratorTypes.drop_front(),
459 procInfo.empty() ? procInfo : procInfo.drop_front(),
460 bodyBuilderFn, ivStorage);
465 unsigned nLoops = iteratorTypes.size();
466 unsigned numProcessed = 0;
468 if (procInfo.empty()) {
471 distributionMethod = procInfo.front().distributionMethod;
480 auto remainderProcInfo =
481 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
482 switch (distributionMethod) {
486 scf::ParallelOp::create(
487 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
488 steps.take_front(numProcessed),
490 ivStorage.append(localIvs.begin(), localIvs.end());
491 generateParallelLoopNest(
492 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
493 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
494 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
495 bodyBuilderFn, ivStorage);
499 case DistributionMethod::Cyclic: {
502 scf::ParallelOp::create(
503 b, loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
504 steps.take_front(numProcessed),
506 ivStorage.append(localIvs.begin(), localIvs.end());
507 generateParallelLoopNest(
508 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
509 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
510 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
511 bodyBuilderFn, ivStorage);
515 case DistributionMethod::CyclicNumProcsGeNumIters: {
518 Value cond = ab.
slt(lbs[0], ubs[0]);
519 for (
unsigned i = 1; i < numProcessed; ++i)
520 cond = ab.
_and(cond, ab.
slt(lbs[i], ubs[i]));
521 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
524 ubs.drop_front(numProcessed),
525 steps.drop_front(numProcessed),
526 iteratorTypes.drop_front(numProcessed),
527 remainderProcInfo, bodyBuilderFn, ivStorage);
532 case DistributionMethod::CyclicNumProcsEqNumIters:
535 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
537 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
538 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
539 remainderProcInfo, bodyBuilderFn, ivStorage);
554 if (!linalgOp.hasPureBufferSemantics())
555 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
556 assert(iterArgInitValues.empty() &&
"unexpected ParallelOp init values");
558 assert(iteratorTypes.size() >= loopRanges.size() &&
559 "expected iterator type for all ranges");
560 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
561 "expected proc information for all loops when present");
562 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
564 unsigned numLoops = iteratorTypes.size();
565 ivs.reserve(numLoops);
566 lbsStorage.reserve(numLoops);
567 ubsStorage.reserve(numLoops);
568 stepsStorage.reserve(numLoops);
571 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
577 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
578 ubsStorage[it.index()], stepsStorage[it.index()]);
581 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
583 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
585 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
589 assert(ivs.size() == iteratorTypes.size() &&
"did not generate enough loops");
595 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
597 .Case([&](MemRefType) {
598 return memref::SubViewOp::create(
599 builder, loc, valueToTile, sliceParams.
offsets,
602 .Case([&](RankedTensorType) {
603 return tensor::ExtractSliceOp::create(
604 builder, loc, valueToTile, sliceParams.
offsets,
608 llvm_unreachable(
"Unexpected shaped type");
618 bool omitPartialTileCheck) {
621 ubs, subShapeSizes, omitPartialTileCheck);
630 bool omitPartialTileCheck) {
631 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
632 assert(shapedType &&
"only shaped types can be tiled");
634 int64_t rank = shapedType.getRank();
638 sliceParams.
offsets.reserve(rank);
639 sliceParams.
sizes.reserve(rank);
640 sliceParams.
strides.reserve(rank);
641 for (
unsigned r = 0; r < rank; ++r) {
642 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: for dim#" << r);
646 sliceParams.
sizes.push_back(dim);
648 LLVM_DEBUG(llvm::dbgs() <<
": not tiled: use size: " << dim <<
"\n");
651 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subsize...\n");
656 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: submap: " << m <<
"\n");
661 [[maybe_unused]]
auto res = m.constantFold(zeros, mAtZero);
662 assert(succeeded(res) &&
"affine_map must be evaluatable (not symbols)");
664 cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
666 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
667 sliceParams.
offsets.push_back(offset);
675 LLVM_DEBUG(llvm::dbgs()
676 <<
"computeSliceParameters: raw size: " << size <<
"\n");
677 LLVM_DEBUG(llvm::dbgs()
678 <<
"computeSliceParameters: new offset: " << offset <<
"\n");
681 if (omitPartialTileCheck) {
684 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
685 sliceParams.
sizes.push_back(size);
694 int64_t shapeSize = shape[r];
696 auto hasTileSizeOne = sizeCst == 1;
697 auto dividesEvenly = sizeCst && ShapedType::isStatic(shapeSize) &&
698 ((shapeSize % *sizeCst) == 0);
699 if (!hasTileSizeOne && !dividesEvenly) {
700 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: shapeSize=" << shapeSize
701 <<
", size: " << size
702 <<
": make sure in bound with affine.min\n");
706 bindDims(context, dim0, dim1, dim2);
721 llvm::to_vector(llvm::map_range(ubs, [&](
OpFoldResult ub) {
737 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
738 sliceParams.
sizes.push_back(size);
747 for (
unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
748 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for loop#" << idx <<
"\n");
751 LLVM_DEBUG(llvm::dbgs()
752 <<
"computeTileOffsets: " << offsets.back() <<
"\n");
761 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
768 LLVM_DEBUG(llvm::dbgs() <<
"computeTileSizes: " << sizes.back() <<
"\n");
774 if (op.hasPureBufferSemantics())
776 return llvm::to_vector(
777 llvm::map_range(op.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
778 return operands[opOperand.getOperandNumber()].getType();
785 if (op.hasPureBufferSemantics())
788 tensorResults.reserve(results.size());
790 unsigned resultIdx = 0;
791 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
794 Value outputTensor = operands[opOperand.getOperandNumber()];
795 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
796 Value inserted = tensor::InsertSliceOp::create(
797 builder, loc, sliceOp.getSource().getType(), results[resultIdx],
798 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
799 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
800 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
801 tensorResults.push_back(inserted);
803 tensorResults.push_back(results[resultIdx]);
807 return tensorResults;
815 bool omitPartialTileCheck) {
816 assert(ivs.size() ==
static_cast<size_t>(llvm::count_if(
817 llvm::make_range(tileSizes.begin(), tileSizes.end()),
819 "expected as many ivs as non-zero sizes");
828 assert(
static_cast<int64_t
>(valuesToTile.size()) <=
829 linalgOp->getNumOperands() &&
830 "more value to tile than operands.");
832 allSliceParams.reserve(valuesToTile.size());
833 for (
auto [opOperand, val] :
834 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
835 Value shapedOp = val;
836 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for operand " << shapedOp);
837 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
844 Type operandType = opOperand.get().getType();
845 if (!
isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
846 linalgOp.isDpsInit(&opOperand))) {
847 allSliceParams.push_back(std::nullopt);
848 LLVM_DEBUG(llvm::dbgs()
849 <<
": not tiled: use shape: " << operandType <<
"\n");
852 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subshape...\n");
855 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
856 omitPartialTileCheck));
859 return allSliceParams;
867 bool omitPartialTileCheck) {
870 tileSizes, sizeBounds, omitPartialTileCheck);
872 for (
auto item : llvm::zip(valuesToTile, allSliceParameter)) {
873 Value valueToTile = std::get<0>(item);
874 std::optional<SliceParameters> sliceParams = std::get<1>(item);
875 tiledShapes.push_back(
876 sliceParams.has_value()
892 if (!linalgOp.hasIndexSemantics())
895 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
896 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
903 b, indexOp.getLoc(), index + offset,
904 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
919 std::optional<SmallVector<ReassociationIndices>>
924 auto dim = it.index();
925 auto size = it.value();
927 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
928 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
931 std::swap(reassociation.back(), curr);
936 if (!curr.empty() && !reassociation.empty())
937 reassociation.back().append(curr.begin(), curr.end());
938 return reassociation;
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
SmallVector< int64_t > innerDimsPos
Affine binary operation expression.
AffineExpr getLHS() const
AffineExpr getRHS() const
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
OpListType & getOperations()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceUsesWithIf(Value from, Value to, function_ref< bool(OpOperand &)> functor, bool *allUsesReplaced=nullptr)
Find uses of from and replace them with to if the functor returns true.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
SmallVector< int64_t > getPackInverseDestPerm(PackOp packOp)
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
SmallVector< int64_t > getUnPackInverseSrcPerm(UnPackOp unpackOp, PackingMetadata &metadata)
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
@ Mul
RHS of mul is always a constant or a symbolic expression.
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Value _and(Value lhs, Value rhs)
Value slt(Value lhs, Value rhs)
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Callback function type used to get processor ID, and number of processors used for distribution for a...
DistributionMethod distributionMethod
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > strides
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets