36 #include "llvm/ADT/TypeSwitch.h"
37 #include "llvm/Support/Debug.h"
40 #define DEBUG_TYPE "linalg-utils"
43 using namespace presburger;
68 assert(cast<AffineConstantExpr>(expr.
getRHS()).getValue() > 0 &&
69 "nonpositive multiplying coefficient");
80 TileCheck t(tileSizes);
95 std::optional<RegionMatcher::BinaryOpKind>
96 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
98 if (!llvm::hasSingleElement(region))
115 auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
116 if (addPattern.match(&ops.back()))
117 return BinaryOpKind::IAdd;
133 for (
Range range : ranges) {
150 return llvm::all_of(op.getIndexingMapsArray(), [](
AffineMap m) {
151 return m.isProjectedPermutation(true);
156 if (!llvm::hasSingleElement(r))
159 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
160 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
163 [](
Type type) { return !type.isIntOrIndexOrFloat(); }))
170 if (op.getNumLoops() != op.getNumParallelLoops())
177 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
178 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
185 return iteratorType == utils::IteratorType::parallel;
189 return iteratorType == utils::IteratorType::reduction;
195 auto sliceOp = source.
getDefiningOp<tensor::ExtractSliceOp>();
200 Value current = sliceOp.getSource();
205 OpResult opResult = cast<OpResult>(current);
206 current = linalgOp.getDpsInitOperand(opResult.
getResultNumber())->get();
208 auto padOp = current ? current.
getDefiningOp<tensor::PadOp>() :
nullptr;
216 if (sliceOp.getSource().getType() != type)
220 if (llvm::any_of(padOp.getMixedLowPad(), [](
OpFoldResult ofr) {
221 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
227 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
229 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
235 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
236 [](std::tuple<OpFoldResult, OpFoldResult> it) {
237 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
243 Value padOpPad = padOp.getConstantPaddingValue();
249 return sliceOp.getSource();
255 auto resultTensorType = cast<RankedTensorType>(outputTensor.
getType());
256 Type elementType = resultTensorType.getElementType();
259 "expect transpose vector to be a permutation");
260 assert(transposeVector.size() ==
261 static_cast<size_t>(resultTensorType.getRank()) &&
262 "expect transpose vector size to match result tensor rank");
272 utils::IteratorType::parallel);
276 b.
create<GenericOp>(loc, resultTensorType, inputTensor, outputTensor,
277 indexingMaps, iteratorTypes);
281 Region &body = transposeOp.getRegion();
283 {elementType, elementType}, {loc, loc});
289 auto memrefTypeTo = cast<MemRefType>(to.
getType());
291 auto memrefTypeFrom = cast<MemRefType>(from.
getType());
292 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
293 "`from` and `to` memref must have the same rank");
299 utils::IteratorType::parallel);
300 return b.
create<linalg::GenericOp>(
307 b.
create<linalg::YieldOp>(loc, args.front());
320 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
321 "expected as many entries for proc info as number of loops, even if "
322 "they are null entries");
324 if (!linalgOp.hasPureBufferSemantics())
325 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
329 b, loc, lbs, ubs, steps, iterArgInitValues,
331 assert(iterArgs.size() == iterArgInitValues.size() &&
332 "expect the number of output tensors and iter args to match");
334 if (!iterArgs.empty()) {
335 operandValuesToUse = linalgOp.getDpsInputs();
336 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
338 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
341 if (loopNest.
loops.empty() || procInfo.empty())
346 if (procInfo[loop.index()].distributionMethod ==
347 DistributionMethod::Cyclic) {
349 procInfo[loop.index()].nprocs);
364 if (!linalgOp.hasPureBufferSemantics())
365 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
366 assert(iterArgInitValues.empty() &&
"unexpected AffineForOp init values");
372 constantSteps.reserve(steps.size());
373 for (
Value v : steps) {
375 assert(constVal.has_value() &&
"Affine loops require constant steps");
376 constantSteps.push_back(constVal.value());
381 bodyBuilderFn(b, loc, ivs,
382 linalgOp->getOperands());
414 assert(lbs.size() == ubs.size());
415 assert(lbs.size() == steps.size());
416 assert(lbs.size() == iteratorTypes.size());
417 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
421 if (iteratorTypes.empty()) {
422 bodyBuilderFn(b, loc, ivStorage);
430 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
432 ivStorage.append(ivs.begin(), ivs.end());
433 generateParallelLoopNest(
434 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
435 iteratorTypes.drop_front(),
436 procInfo.empty() ? procInfo : procInfo.drop_front(),
437 bodyBuilderFn, ivStorage);
442 unsigned nLoops = iteratorTypes.size();
443 unsigned numProcessed = 0;
445 if (procInfo.empty()) {
448 distributionMethod = procInfo.front().distributionMethod;
457 auto remainderProcInfo =
458 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
459 switch (distributionMethod) {
463 b.
create<scf::ParallelOp>(
464 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
465 steps.take_front(numProcessed),
467 ivStorage.append(localIvs.begin(), localIvs.end());
469 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
470 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
471 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
472 bodyBuilderFn, ivStorage);
476 case DistributionMethod::Cyclic: {
479 b.
create<scf::ParallelOp>(
480 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
481 steps.take_front(numProcessed),
483 ivStorage.append(localIvs.begin(), localIvs.end());
485 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
486 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
487 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
488 bodyBuilderFn, ivStorage);
492 case DistributionMethod::CyclicNumProcsGeNumIters: {
495 Value cond = ab.
slt(lbs[0], ubs[0]);
496 for (
unsigned i = 1; i < numProcessed; ++i)
497 cond = ab.
_and(cond, ab.
slt(lbs[i], ubs[i]));
498 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
501 ubs.drop_front(numProcessed),
502 steps.drop_front(numProcessed),
503 iteratorTypes.drop_front(numProcessed),
504 remainderProcInfo, bodyBuilderFn, ivStorage);
509 case DistributionMethod::CyclicNumProcsEqNumIters:
512 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
514 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
515 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
516 remainderProcInfo, bodyBuilderFn, ivStorage);
531 if (!linalgOp.hasPureBufferSemantics())
532 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
533 assert(iterArgInitValues.empty() &&
"unexpected ParallelOp init values");
535 assert(iteratorTypes.size() >= loopRanges.size() &&
536 "expected iterator type for all ranges");
537 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
538 "expected proc information for all loops when present");
539 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
541 unsigned numLoops = iteratorTypes.size();
542 ivs.reserve(numLoops);
543 lbsStorage.reserve(numLoops);
544 ubsStorage.reserve(numLoops);
545 stepsStorage.reserve(numLoops);
548 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
554 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
555 ubsStorage[it.index()], stepsStorage[it.index()]);
558 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
560 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
562 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
566 assert(ivs.size() == iteratorTypes.size() &&
"did not generate enough loops");
572 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
574 .Case([&](MemRefType) {
575 return builder.
create<memref::SubViewOp>(
576 loc, valueToTile, sliceParams.
offsets,
579 .Case([&](RankedTensorType) {
580 return builder.
create<tensor::ExtractSliceOp>(
581 loc, valueToTile, sliceParams.
offsets,
585 llvm_unreachable(
"Unexpected shaped type");
587 return sliceOp->getResult(0);
594 bool omitPartialTileCheck) {
597 ubs, subShapeSizes, omitPartialTileCheck);
606 bool omitPartialTileCheck) {
607 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
608 assert(shapedType &&
"only shaped types can be tiled");
610 int64_t rank = shapedType.getRank();
614 sliceParams.
offsets.reserve(rank);
615 sliceParams.
sizes.reserve(rank);
616 sliceParams.
strides.reserve(rank);
617 for (
unsigned r = 0; r < rank; ++r) {
618 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: for dim#" << r);
622 sliceParams.
sizes.push_back(dim);
624 LLVM_DEBUG(llvm::dbgs() <<
": not tiled: use size: " << dim <<
"\n");
627 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subsize...\n");
632 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: submap: " << m <<
"\n");
635 sliceParams.
offsets.push_back(offset);
642 LLVM_DEBUG(llvm::dbgs()
643 <<
"computeSliceParameters: raw size: " << size <<
"\n");
644 LLVM_DEBUG(llvm::dbgs()
645 <<
"computeSliceParameters: new offset: " << offset <<
"\n");
648 if (omitPartialTileCheck) {
651 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
652 sliceParams.
sizes.push_back(size);
661 int64_t shapeSize = shape[r];
663 auto hasTileSizeOne = sizeCst && *sizeCst == 1;
664 auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
665 ((shapeSize % *sizeCst) == 0);
666 if (!hasTileSizeOne && !dividesEvenly) {
667 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: shapeSize=" << shapeSize
668 <<
", size: " << size
669 <<
": make sure in bound with affine.min\n");
673 bindDims(context, dim0, dim1, dim2);
688 llvm::to_vector(llvm::map_range(ubs, [&](
OpFoldResult ub) {
704 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
705 sliceParams.
sizes.push_back(size);
714 for (
unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
715 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for loop#" << idx <<
"\n");
718 LLVM_DEBUG(llvm::dbgs()
719 <<
"computeTileOffsets: " << offsets.back() <<
"\n");
728 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
735 LLVM_DEBUG(llvm::dbgs() <<
"computeTileSizes: " << sizes.back() <<
"\n");
741 if (op.hasPureBufferSemantics())
743 return llvm::to_vector(
744 llvm::map_range(op.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
745 return operands[opOperand.getOperandNumber()].getType();
752 if (op.hasPureBufferSemantics())
755 tensorResults.reserve(results.size());
757 unsigned resultIdx = 0;
758 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
761 Value outputTensor = operands[opOperand.getOperandNumber()];
762 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
763 Value inserted = builder.
create<tensor::InsertSliceOp>(
764 loc, sliceOp.getSource().getType(), results[resultIdx],
765 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
766 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
767 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
768 tensorResults.push_back(inserted);
770 tensorResults.push_back(results[resultIdx]);
774 return tensorResults;
782 bool omitPartialTileCheck) {
783 assert(ivs.size() ==
static_cast<size_t>(llvm::count_if(
784 llvm::make_range(tileSizes.begin(), tileSizes.end()),
786 "expected as many ivs as non-zero sizes");
795 assert(
static_cast<int64_t
>(valuesToTile.size()) <=
796 linalgOp->getNumOperands() &&
797 "more value to tile than operands.");
799 allSliceParams.reserve(valuesToTile.size());
800 for (
auto [opOperand, val] :
801 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
802 Value shapedOp = val;
803 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for operand " << shapedOp);
804 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
811 Type operandType = opOperand.get().getType();
812 if (!
isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
813 linalgOp.isDpsInit(&opOperand))) {
814 allSliceParams.push_back(std::nullopt);
815 LLVM_DEBUG(llvm::dbgs()
816 <<
": not tiled: use shape: " << operandType <<
"\n");
819 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subshape...\n");
822 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
823 omitPartialTileCheck));
826 return allSliceParams;
834 bool omitPartialTileCheck) {
837 tileSizes, sizeBounds, omitPartialTileCheck);
839 for (
auto item : llvm::zip(valuesToTile, allSliceParameter)) {
840 Value valueToTile = std::get<0>(item);
841 std::optional<SliceParameters> sliceParams = std::get<1>(item);
842 tiledShapes.push_back(
843 sliceParams.has_value()
858 if (!linalgOp.hasIndexSemantics())
861 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
862 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
869 b, indexOp.getLoc(), index + offset,
870 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
885 std::optional<SmallVector<ReassociationIndices>>
890 auto dim = it.index();
891 auto size = it.value();
893 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
894 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
897 std::swap(reassociation.back(), curr);
902 if (!curr.empty() && !reassociation.empty())
903 reassociation.back().append(curr.begin(), curr.end());
904 return reassociation;
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Affine binary operation expression.
AffineExpr getLHS() const
AffineExpr getRHS() const
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
OpListType & getOperations()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=std::nullopt, ArrayRef< Location > locs=std::nullopt)
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
result_type_range getResultTypes()
This class contains a list of basic blocks and a link to the parent operation it is attached to.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceUsesWithIf(Value from, Value to, function_ref< bool(OpOperand &)> functor, bool *allUsesReplaced=nullptr)
Find uses of from and replace them with to if the functor returns true.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, Value outputTensor, ArrayRef< int64_t > transposeVector)
Returns a GenericOp that transposes inputTensor into outputTensor using transposeVector to permute th...
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
static Value materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
PadOp createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder)
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
bool isZeroIndex(OpFoldResult v)
Return true if v is an IntegerAttr with value 0 of a ConstantIndexOp with attribute with value 0.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
@ Mul
RHS of mul is always a constant or a symbolic expression.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
bool isPermutationVector(ArrayRef< int64_t > interchange)
Method to check if an interchange vector is a permutation.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Value _and(Value lhs, Value rhs)
Value slt(Value lhs, Value rhs)
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Callback function type used to get processor ID, and number of processors used for distribution for a...
DistributionMethod distributionMethod
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > strides
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets