36 #include "llvm/ADT/TypeSwitch.h"
37 #include "llvm/Support/Debug.h"
40 #define DEBUG_TYPE "linalg-utils"
43 using namespace presburger;
69 "nonpositive multiplying coefficient");
80 TileCheck t(tileSizes);
95 std::optional<RegionMatcher::BinaryOpKind>
96 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
98 if (!llvm::hasSingleElement(region))
115 auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
116 if (addPattern.match(&ops.back()))
117 return BinaryOpKind::IAdd;
133 for (
Range range : ranges) {
150 return llvm::all_of(op.getIndexingMapsArray(), [](
AffineMap m) {
151 return m.isProjectedPermutation(true);
156 if (!llvm::hasSingleElement(r))
159 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
160 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
163 [](
Type type) { return !type.isIntOrIndexOrFloat(); }))
170 if (op.getNumLoops() != op.getNumParallelLoops())
177 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
178 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
185 return iteratorType == utils::IteratorType::parallel;
189 return iteratorType == utils::IteratorType::reduction;
195 auto sliceOp = source.
getDefiningOp<tensor::ExtractSliceOp>();
200 Value current = sliceOp.getSource();
205 OpResult opResult = cast<OpResult>(current);
206 current = linalgOp.getDpsInitOperand(opResult.
getResultNumber())->get();
208 auto padOp = current ? current.
getDefiningOp<tensor::PadOp>() :
nullptr;
216 if (sliceOp.getSource().getType() != type)
220 if (llvm::any_of(padOp.getMixedLowPad(), [](
OpFoldResult ofr) {
221 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
227 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
229 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
235 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
236 [](std::tuple<OpFoldResult, OpFoldResult> it) {
237 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
243 Value padOpPad = padOp.getConstantPaddingValue();
249 return sliceOp.getSource();
255 auto resultTensorType = cast<RankedTensorType>(outputTensor.
getType());
256 Type elementType = resultTensorType.getElementType();
259 "expect transpose vector to be a permutation");
260 assert(transposeVector.size() ==
261 static_cast<size_t>(resultTensorType.getRank()) &&
262 "expect transpose vector size to match result tensor rank");
272 utils::IteratorType::parallel);
276 b.
create<GenericOp>(loc, resultTensorType, inputTensor, outputTensor,
277 indexingMaps, iteratorTypes);
278 Region &body = transposeOp.getRegion();
280 body.front().addArguments({elementType, elementType}, {loc, loc});
285 b.
create<YieldOp>(loc, transposeOp.getRegion().front().getArgument(0));
290 auto memrefTypeTo = cast<MemRefType>(to.
getType());
292 auto memrefTypeFrom = cast<MemRefType>(from.
getType());
293 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
294 "`from` and `to` memref must have the same rank");
300 utils::IteratorType::parallel);
301 return b.
create<linalg::GenericOp>(
308 b.
create<linalg::YieldOp>(loc, args.front());
321 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
322 "expected as many entries for proc info as number of loops, even if "
323 "they are null entries");
325 if (!linalgOp.hasBufferSemantics())
326 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
330 b, loc, lbs, ubs, steps, iterArgInitValues,
332 assert(iterArgs.size() == iterArgInitValues.size() &&
333 "expect the number of output tensors and iter args to match");
335 if (!iterArgs.empty()) {
336 operandValuesToUse = linalgOp.getDpsInputs();
337 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
339 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
342 if (loopNest.
loops.empty() || procInfo.empty())
347 if (procInfo[loop.index()].distributionMethod ==
348 DistributionMethod::Cyclic) {
350 procInfo[loop.index()].nprocs);
365 if (!linalgOp.hasBufferSemantics())
366 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
367 assert(iterArgInitValues.empty() &&
"unexpected AffineForOp init values");
373 constantSteps.reserve(steps.size());
374 for (
Value v : steps) {
376 assert(constVal.has_value() &&
"Affine loops require constant steps");
377 constantSteps.push_back(constVal.value());
382 bodyBuilderFn(b, loc, ivs,
383 linalgOp->getOperands());
415 assert(lbs.size() == ubs.size());
416 assert(lbs.size() == steps.size());
417 assert(lbs.size() == iteratorTypes.size());
418 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
422 if (iteratorTypes.empty()) {
423 bodyBuilderFn(b, loc, ivStorage);
431 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
433 ivStorage.append(ivs.begin(), ivs.end());
434 generateParallelLoopNest(
435 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
436 iteratorTypes.drop_front(),
437 procInfo.empty() ? procInfo : procInfo.drop_front(),
438 bodyBuilderFn, ivStorage);
443 unsigned nLoops = iteratorTypes.size();
444 unsigned numProcessed = 0;
446 if (procInfo.empty()) {
449 distributionMethod = procInfo.front().distributionMethod;
458 auto remainderProcInfo =
459 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
460 switch (distributionMethod) {
464 b.
create<scf::ParallelOp>(
465 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
466 steps.take_front(numProcessed),
468 ivStorage.append(localIvs.begin(), localIvs.end());
470 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
471 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
472 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
473 bodyBuilderFn, ivStorage);
477 case DistributionMethod::Cyclic: {
480 b.
create<scf::ParallelOp>(
481 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
482 steps.take_front(numProcessed),
484 ivStorage.append(localIvs.begin(), localIvs.end());
486 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
487 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
488 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
489 bodyBuilderFn, ivStorage);
493 case DistributionMethod::CyclicNumProcsGeNumIters: {
496 Value cond = ab.
slt(lbs[0], ubs[0]);
497 for (
unsigned i = 1; i < numProcessed; ++i)
498 cond = ab.
_and(cond, ab.
slt(lbs[i], ubs[i]));
499 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
502 ubs.drop_front(numProcessed),
503 steps.drop_front(numProcessed),
504 iteratorTypes.drop_front(numProcessed),
505 remainderProcInfo, bodyBuilderFn, ivStorage);
510 case DistributionMethod::CyclicNumProcsEqNumIters:
513 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
515 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
516 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
517 remainderProcInfo, bodyBuilderFn, ivStorage);
532 if (!linalgOp.hasBufferSemantics())
533 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
534 assert(iterArgInitValues.empty() &&
"unexpected ParallelOp init values");
536 assert(iteratorTypes.size() >= loopRanges.size() &&
537 "expected iterator type for all ranges");
538 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
539 "expected proc information for all loops when present");
540 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
542 unsigned numLoops = iteratorTypes.size();
543 ivs.reserve(numLoops);
544 lbsStorage.reserve(numLoops);
545 ubsStorage.reserve(numLoops);
546 stepsStorage.reserve(numLoops);
549 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
555 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
556 ubsStorage[it.index()], stepsStorage[it.index()]);
559 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
561 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
563 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
567 assert(ivs.size() == iteratorTypes.size() &&
"did not generate enough loops");
573 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
575 .Case([&](MemRefType) {
576 return builder.
create<memref::SubViewOp>(
577 loc, valueToTile, sliceParams.
offsets,
580 .Case([&](RankedTensorType) {
581 return builder.
create<tensor::ExtractSliceOp>(
582 loc, valueToTile, sliceParams.
offsets,
586 llvm_unreachable(
"Unexpected shaped type");
588 return sliceOp->getResult(0);
595 bool omitPartialTileCheck) {
598 ubs, subShapeSizes, omitPartialTileCheck);
607 bool omitPartialTileCheck) {
608 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
609 assert(shapedType &&
"only shaped types can be tiled");
611 int64_t rank = shapedType.getRank();
615 sliceParams.
offsets.reserve(rank);
616 sliceParams.
sizes.reserve(rank);
617 sliceParams.
strides.reserve(rank);
618 for (
unsigned r = 0; r < rank; ++r) {
619 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: for dim#" << r);
623 sliceParams.
sizes.push_back(dim);
625 LLVM_DEBUG(llvm::dbgs() <<
": not tiled: use size: " << dim <<
"\n");
628 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subsize...\n");
633 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: submap: " << m <<
"\n");
636 sliceParams.
offsets.push_back(offset);
643 LLVM_DEBUG(llvm::dbgs()
644 <<
"computeSliceParameters: raw size: " << size <<
"\n");
645 LLVM_DEBUG(llvm::dbgs()
646 <<
"computeSliceParameters: new offset: " << offset <<
"\n");
649 if (omitPartialTileCheck) {
652 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
653 sliceParams.
sizes.push_back(size);
662 int64_t shapeSize = shape[r];
664 auto hasTileSizeOne = sizeCst && *sizeCst == 1;
665 auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
666 ((shapeSize % *sizeCst) == 0);
667 if (!hasTileSizeOne && !dividesEvenly) {
668 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: shapeSize=" << shapeSize
669 <<
", size: " << size
670 <<
": make sure in bound with affine.min\n");
688 llvm::to_vector(llvm::map_range(ubs, [&](
OpFoldResult ub) {
704 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
705 sliceParams.
sizes.push_back(size);
714 for (
unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
715 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for loop#" << idx <<
"\n");
718 LLVM_DEBUG(llvm::dbgs()
719 <<
"computeTileOffsets: " << offsets.back() <<
"\n");
728 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
735 LLVM_DEBUG(llvm::dbgs() <<
"computeTileSizes: " << sizes.back() <<
"\n");
741 if (op.hasBufferSemantics())
743 return llvm::to_vector(
744 llvm::map_range(op.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
745 return operands[opOperand.getOperandNumber()].getType();
752 if (op.hasBufferSemantics())
755 tensorResults.reserve(results.size());
757 unsigned resultIdx = 0;
758 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
761 Value outputTensor = operands[opOperand.getOperandNumber()];
762 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
763 Value inserted = builder.
create<tensor::InsertSliceOp>(
764 loc, sliceOp.getSource().getType(), results[resultIdx],
765 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
766 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
767 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
768 tensorResults.push_back(inserted);
770 tensorResults.push_back(results[resultIdx]);
774 return tensorResults;
782 bool omitPartialTileCheck) {
783 assert(ivs.size() ==
static_cast<size_t>(llvm::count_if(
784 llvm::make_range(tileSizes.begin(), tileSizes.end()),
786 "expected as many ivs as non-zero sizes");
795 assert(
static_cast<int64_t
>(valuesToTile.size()) <=
796 linalgOp->getNumOperands() &&
797 "more value to tile than operands.");
799 allSliceParams.reserve(valuesToTile.size());
800 for (
auto [opOperand, val] :
801 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
802 Value shapedOp = val;
803 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for operand " << shapedOp);
804 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
811 Type operandType = opOperand.get().getType();
812 if (!
isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
813 linalgOp.isDpsInit(&opOperand))) {
814 allSliceParams.push_back(std::nullopt);
815 LLVM_DEBUG(llvm::dbgs()
816 <<
": not tiled: use shape: " << operandType <<
"\n");
819 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subshape...\n");
822 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
823 omitPartialTileCheck));
826 return allSliceParams;
834 bool omitPartialTileCheck) {
837 tileSizes, sizeBounds, omitPartialTileCheck);
839 for (
auto item : llvm::zip(valuesToTile, allSliceParameter)) {
840 Value valueToTile = std::get<0>(item);
841 std::optional<SliceParameters> sliceParams = std::get<1>(item);
842 tiledShapes.push_back(
843 sliceParams.has_value()
858 if (!linalgOp.hasIndexSemantics())
861 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
862 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
869 b, indexOp.getLoc(), index + offset,
870 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
885 std::optional<SmallVector<ReassociationIndices>>
890 auto dim = it.index();
891 auto size = it.value();
893 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
894 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
897 std::swap(reassociation.back(), curr);
902 if (!curr.empty() && !reassociation.empty())
903 reassociation.back().append(curr.begin(), curr.end());
904 return reassociation;
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Affine binary operation expression.
AffineExpr getLHS() const
AffineExpr getRHS() const
An integer constant appearing in affine expression.
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
Base class for AffineExpr visitors/walkers.
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
AffineExpr getResult(unsigned idx) const
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
OpListType & getOperations()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
result_type_range getResultTypes()
This class contains a list of basic blocks and a link to the parent operation it is attached to.
void push_back(Block *block)
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void replaceOpWithIf(Operation *op, ValueRange newValues, bool *allUsesReplaced, llvm::unique_function< bool(OpOperand &) const > functor)
This method replaces the uses of the results of op with the values in newValues when the provided fun...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor, Value outputTensor, ArrayRef< int64_t > transposeVector)
Returns a GenericOp that transposes inputTensor into outputTensor using transposeVector to permute th...
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
static Value materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
PadOp createPadHighOp(RankedTensorType type, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder)
This header declares functions that assist transformations in the MemRef dialect.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
bool isZeroIndex(OpFoldResult v)
Return true if v is an IntegerAttr with value 0 of a ConstantIndexOp with attribute with value 0.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
@ Mul
RHS of mul is always a constant or a symbolic expression.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
bool isPermutationVector(ArrayRef< int64_t > interchange)
Method to check if an interchange vector is a permutation.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Value _and(Value lhs, Value rhs)
Value slt(Value lhs, Value rhs)
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Callback function type used to get processor ID, and number of processors used for distribution for a...
DistributionMethod distributionMethod
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > strides
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets