36 #include "llvm/ADT/TypeSwitch.h"
37 #include "llvm/Support/Debug.h"
40 #define DEBUG_TYPE "linalg-utils"
43 using namespace presburger;
68 assert(cast<AffineConstantExpr>(expr.
getRHS()).getValue() > 0 &&
69 "nonpositive multiplying coefficient");
80 TileCheck t(tileSizes);
95 std::optional<RegionMatcher::BinaryOpKind>
96 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
97 auto ®ion = op.getRegion();
98 if (!llvm::hasSingleElement(region))
115 auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
116 if (addPattern.match(&ops.back()))
117 return BinaryOpKind::IAdd;
133 for (
Range range : ranges) {
155 PackingMetadata &packingMetadata) {
158 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));
159 packingMetadata = computePackingMetadata(rank,
innerDimsPos);
164 if (!outerPerm.empty())
171 return packInverseDestPermutation;
179 PackingMetadata pMetadata;
180 int64_t packedRank = packOp.getDestType().getRank();
185 return packInvDestPerm;
189 PackingMetadata metadata;
194 PackingMetadata &metadata) {
195 int64_t unpackRank = unpackOp.getSourceType().getRank();
200 return unpackInvSrcPerm;
204 return llvm::all_of(op.getIndexingMapsArray(), [](
AffineMap m) {
205 return m.isProjectedPermutation(true);
210 if (!llvm::hasSingleElement(r))
213 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
214 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
216 llvm::any_of(op.getResultTypes(),
217 [](
Type type) { return !type.isIntOrIndexOrFloat(); }))
224 if (op.getNumLoops() != op.getNumParallelLoops())
231 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
232 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
239 return iteratorType == utils::IteratorType::parallel;
243 return iteratorType == utils::IteratorType::reduction;
250 auto sliceOp = source.
getDefiningOp<tensor::ExtractSliceOp>();
256 Value current = sliceOp.getSource();
261 OpResult opResult = cast<OpResult>(current);
262 current = linalgOp.getDpsInitOperand(opResult.
getResultNumber())->get();
264 auto padOp = current ? current.
getDefiningOp<tensor::PadOp>() :
nullptr;
273 if (sliceOp.getSource().getType() != type)
278 if (llvm::any_of(padOp.getMixedLowPad(), [](
OpFoldResult ofr) {
279 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
286 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
288 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
295 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
296 [](std::tuple<OpFoldResult, OpFoldResult> it) {
297 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
304 Value padOpPad = padOp.getConstantPaddingValue();
311 return sliceOp.getSource();
315 auto memrefTypeTo = cast<MemRefType>(to.
getType());
317 auto memrefTypeFrom = cast<MemRefType>(from.
getType());
318 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
319 "`from` and `to` memref must have the same rank");
325 utils::IteratorType::parallel);
326 return b.
create<linalg::GenericOp>(
333 b.
create<linalg::YieldOp>(loc, args.front());
346 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
347 "expected as many entries for proc info as number of loops, even if "
348 "they are null entries");
350 if (!linalgOp.hasPureBufferSemantics())
351 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
355 b, loc, lbs, ubs, steps, iterArgInitValues,
357 assert(iterArgs.size() == iterArgInitValues.size() &&
358 "expect the number of output tensors and iter args to match");
360 if (!iterArgs.empty()) {
361 operandValuesToUse = linalgOp.getDpsInputs();
362 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
364 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
367 if (loopNest.
loops.empty() || procInfo.empty())
372 if (procInfo[loop.index()].distributionMethod ==
373 DistributionMethod::Cyclic) {
375 procInfo[loop.index()].nprocs);
390 if (!linalgOp.hasPureBufferSemantics())
391 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
392 assert(iterArgInitValues.empty() &&
"unexpected AffineForOp init values");
398 constantSteps.reserve(steps.size());
399 for (
Value v : steps) {
401 assert(constVal.has_value() &&
"Affine loops require constant steps");
402 constantSteps.push_back(constVal.value());
407 bodyBuilderFn(b, loc, ivs,
408 linalgOp->getOperands());
440 assert(lbs.size() == ubs.size());
441 assert(lbs.size() == steps.size());
442 assert(lbs.size() == iteratorTypes.size());
443 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
447 if (iteratorTypes.empty()) {
448 bodyBuilderFn(b, loc, ivStorage);
456 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
458 ivStorage.append(ivs.begin(), ivs.end());
459 generateParallelLoopNest(
460 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
461 iteratorTypes.drop_front(),
462 procInfo.empty() ? procInfo : procInfo.drop_front(),
463 bodyBuilderFn, ivStorage);
468 unsigned nLoops = iteratorTypes.size();
469 unsigned numProcessed = 0;
471 if (procInfo.empty()) {
474 distributionMethod = procInfo.front().distributionMethod;
483 auto remainderProcInfo =
484 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
485 switch (distributionMethod) {
489 b.
create<scf::ParallelOp>(
490 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
491 steps.take_front(numProcessed),
493 ivStorage.append(localIvs.begin(), localIvs.end());
495 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
496 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
497 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
498 bodyBuilderFn, ivStorage);
502 case DistributionMethod::Cyclic: {
505 b.
create<scf::ParallelOp>(
506 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
507 steps.take_front(numProcessed),
509 ivStorage.append(localIvs.begin(), localIvs.end());
511 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
512 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
513 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
514 bodyBuilderFn, ivStorage);
518 case DistributionMethod::CyclicNumProcsGeNumIters: {
521 Value cond = ab.
slt(lbs[0], ubs[0]);
522 for (
unsigned i = 1; i < numProcessed; ++i)
523 cond = ab.
_and(cond, ab.
slt(lbs[i], ubs[i]));
524 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
527 ubs.drop_front(numProcessed),
528 steps.drop_front(numProcessed),
529 iteratorTypes.drop_front(numProcessed),
530 remainderProcInfo, bodyBuilderFn, ivStorage);
535 case DistributionMethod::CyclicNumProcsEqNumIters:
538 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
540 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
541 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
542 remainderProcInfo, bodyBuilderFn, ivStorage);
557 if (!linalgOp.hasPureBufferSemantics())
558 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
559 assert(iterArgInitValues.empty() &&
"unexpected ParallelOp init values");
561 assert(iteratorTypes.size() >= loopRanges.size() &&
562 "expected iterator type for all ranges");
563 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
564 "expected proc information for all loops when present");
565 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
567 unsigned numLoops = iteratorTypes.size();
568 ivs.reserve(numLoops);
569 lbsStorage.reserve(numLoops);
570 ubsStorage.reserve(numLoops);
571 stepsStorage.reserve(numLoops);
574 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
580 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
581 ubsStorage[it.index()], stepsStorage[it.index()]);
584 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
586 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
588 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
592 assert(ivs.size() == iteratorTypes.size() &&
"did not generate enough loops");
598 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
600 .Case([&](MemRefType) {
601 return builder.
create<memref::SubViewOp>(
602 loc, valueToTile, sliceParams.
offsets,
605 .Case([&](RankedTensorType) {
606 return builder.
create<tensor::ExtractSliceOp>(
607 loc, valueToTile, sliceParams.
offsets,
611 llvm_unreachable(
"Unexpected shaped type");
621 bool omitPartialTileCheck) {
624 ubs, subShapeSizes, omitPartialTileCheck);
633 bool omitPartialTileCheck) {
634 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
635 assert(shapedType &&
"only shaped types can be tiled");
637 int64_t rank = shapedType.getRank();
641 sliceParams.
offsets.reserve(rank);
642 sliceParams.
sizes.reserve(rank);
643 sliceParams.
strides.reserve(rank);
644 for (
unsigned r = 0; r < rank; ++r) {
645 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: for dim#" << r);
649 sliceParams.
sizes.push_back(dim);
651 LLVM_DEBUG(llvm::dbgs() <<
": not tiled: use size: " << dim <<
"\n");
654 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subsize...\n");
659 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: submap: " << m <<
"\n");
664 [[maybe_unused]]
auto res = m.constantFold(zeros, mAtZero);
665 assert(succeeded(res) &&
"affine_map must be evaluatable (not symbols)");
667 cast<IntegerAttr>(mAtZero[0]).getValue().getSExtValue();
669 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);
670 sliceParams.
offsets.push_back(offset);
678 LLVM_DEBUG(llvm::dbgs()
679 <<
"computeSliceParameters: raw size: " << size <<
"\n");
680 LLVM_DEBUG(llvm::dbgs()
681 <<
"computeSliceParameters: new offset: " << offset <<
"\n");
684 if (omitPartialTileCheck) {
687 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
688 sliceParams.
sizes.push_back(size);
697 int64_t shapeSize = shape[r];
699 auto hasTileSizeOne = sizeCst == 1;
700 auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
701 ((shapeSize % *sizeCst) == 0);
702 if (!hasTileSizeOne && !dividesEvenly) {
703 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: shapeSize=" << shapeSize
704 <<
", size: " << size
705 <<
": make sure in bound with affine.min\n");
709 bindDims(context, dim0, dim1, dim2);
724 llvm::to_vector(llvm::map_range(ubs, [&](
OpFoldResult ub) {
740 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
741 sliceParams.
sizes.push_back(size);
750 for (
unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
751 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for loop#" << idx <<
"\n");
754 LLVM_DEBUG(llvm::dbgs()
755 <<
"computeTileOffsets: " << offsets.back() <<
"\n");
764 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
771 LLVM_DEBUG(llvm::dbgs() <<
"computeTileSizes: " << sizes.back() <<
"\n");
777 if (op.hasPureBufferSemantics())
779 return llvm::to_vector(
780 llvm::map_range(op.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
781 return operands[opOperand.getOperandNumber()].getType();
788 if (op.hasPureBufferSemantics())
791 tensorResults.reserve(results.size());
793 unsigned resultIdx = 0;
794 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
797 Value outputTensor = operands[opOperand.getOperandNumber()];
798 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
799 Value inserted = builder.
create<tensor::InsertSliceOp>(
800 loc, sliceOp.getSource().getType(), results[resultIdx],
801 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
802 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
803 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
804 tensorResults.push_back(inserted);
806 tensorResults.push_back(results[resultIdx]);
810 return tensorResults;
818 bool omitPartialTileCheck) {
819 assert(ivs.size() ==
static_cast<size_t>(llvm::count_if(
820 llvm::make_range(tileSizes.begin(), tileSizes.end()),
822 "expected as many ivs as non-zero sizes");
831 assert(
static_cast<int64_t
>(valuesToTile.size()) <=
832 linalgOp->getNumOperands() &&
833 "more value to tile than operands.");
835 allSliceParams.reserve(valuesToTile.size());
836 for (
auto [opOperand, val] :
837 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
838 Value shapedOp = val;
839 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for operand " << shapedOp);
840 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
847 Type operandType = opOperand.get().getType();
848 if (!
isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
849 linalgOp.isDpsInit(&opOperand))) {
850 allSliceParams.push_back(std::nullopt);
851 LLVM_DEBUG(llvm::dbgs()
852 <<
": not tiled: use shape: " << operandType <<
"\n");
855 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subshape...\n");
858 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
859 omitPartialTileCheck));
862 return allSliceParams;
870 bool omitPartialTileCheck) {
873 tileSizes, sizeBounds, omitPartialTileCheck);
875 for (
auto item : llvm::zip(valuesToTile, allSliceParameter)) {
876 Value valueToTile = std::get<0>(item);
877 std::optional<SliceParameters> sliceParams = std::get<1>(item);
878 tiledShapes.push_back(
879 sliceParams.has_value()
895 if (!linalgOp.hasIndexSemantics())
898 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
899 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
906 b, indexOp.getLoc(), index + offset,
907 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
922 std::optional<SmallVector<ReassociationIndices>>
927 auto dim = it.index();
928 auto size = it.value();
930 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
931 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
934 std::swap(reassociation.back(), curr);
939 if (!curr.empty() && !reassociation.empty())
940 reassociation.back().append(curr.begin(), curr.end());
941 return reassociation;
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)
The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
SmallVector< int64_t > innerDimsPos
Affine binary operation expression.
AffineExpr getLHS() const
AffineExpr getRHS() const
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
OpListType & getOperations()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceUsesWithIf(Value from, Value to, function_ref< bool(OpOperand &)> functor, bool *allUsesReplaced=nullptr)
Find uses of from and replace them with to if the functor returns true.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
SmallVector< int64_t > getPackInverseDestPerm(PackOp packOp)
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
SmallVector< int64_t > getUnPackInverseSrcPerm(UnPackOp unpackOp, PackingMetadata &metadata)
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value padding, bool nofold, ValueRange typeDynDims={})
Create a tensor::PadOp that pads source to the shape of type whose sizes are assumed to be greater th...
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, ValueRange dynOutDims={})
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
@ Mul
RHS of mul is always a constant or a symbolic expression.
SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)
Return a permutation vector of size permSize that would result in moving positions into desiredPositi...
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Value _and(Value lhs, Value rhs)
Value slt(Value lhs, Value rhs)
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Callback function type used to get processor ID, and number of processors used for distribution for a...
DistributionMethod distributionMethod
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > strides
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets