36 #include "llvm/ADT/TypeSwitch.h"
37 #include "llvm/Support/Debug.h"
40 #define DEBUG_TYPE "linalg-utils"
43 using namespace presburger;
68 assert(cast<AffineConstantExpr>(expr.
getRHS()).getValue() > 0 &&
69 "nonpositive multiplying coefficient");
80 TileCheck t(tileSizes);
95 std::optional<RegionMatcher::BinaryOpKind>
96 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {
97 auto ®ion = op.getRegion();
98 if (!llvm::hasSingleElement(region))
115 auto addPattern = m_Op<linalg::YieldOp>(m_Op<arith::AddIOp>(a, b));
116 if (addPattern.match(&ops.back()))
117 return BinaryOpKind::IAdd;
133 for (
Range range : ranges) {
150 return llvm::all_of(op.getIndexingMapsArray(), [](
AffineMap m) {
151 return m.isProjectedPermutation(true);
156 if (!llvm::hasSingleElement(r))
159 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,
160 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||
162 llvm::any_of(op.getResultTypes(),
163 [](
Type type) { return !type.isIntOrIndexOrFloat(); }))
170 if (op.getNumLoops() != op.getNumParallelLoops())
177 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
178 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())
185 return iteratorType == utils::IteratorType::parallel;
189 return iteratorType == utils::IteratorType::reduction;
195 auto sliceOp = source.
getDefiningOp<tensor::ExtractSliceOp>();
200 Value current = sliceOp.getSource();
205 OpResult opResult = cast<OpResult>(current);
206 current = linalgOp.getDpsInitOperand(opResult.
getResultNumber())->get();
208 auto padOp = current ? current.
getDefiningOp<tensor::PadOp>() :
nullptr;
216 if (sliceOp.getSource().getType() != type)
220 if (llvm::any_of(padOp.getMixedLowPad(), [](
OpFoldResult ofr) {
221 return getConstantIntValue(ofr) != static_cast<int64_t>(0);
227 auto padOpSliceOp = padOp.getSource().getDefiningOp<tensor::ExtractSliceOp>();
229 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
235 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
236 [](std::tuple<OpFoldResult, OpFoldResult> it) {
237 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
243 Value padOpPad = padOp.getConstantPaddingValue();
249 return sliceOp.getSource();
253 auto memrefTypeTo = cast<MemRefType>(to.
getType());
255 auto memrefTypeFrom = cast<MemRefType>(from.
getType());
256 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&
257 "`from` and `to` memref must have the same rank");
263 utils::IteratorType::parallel);
264 return b.
create<linalg::GenericOp>(
271 b.
create<linalg::YieldOp>(loc, args.front());
284 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
285 "expected as many entries for proc info as number of loops, even if "
286 "they are null entries");
288 if (!linalgOp.hasPureBufferSemantics())
289 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
293 b, loc, lbs, ubs, steps, iterArgInitValues,
295 assert(iterArgs.size() == iterArgInitValues.size() &&
296 "expect the number of output tensors and iter args to match");
298 if (!iterArgs.empty()) {
299 operandValuesToUse = linalgOp.getDpsInputs();
300 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());
302 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);
305 if (loopNest.
loops.empty() || procInfo.empty())
310 if (procInfo[loop.index()].distributionMethod ==
311 DistributionMethod::Cyclic) {
313 procInfo[loop.index()].nprocs);
328 if (!linalgOp.hasPureBufferSemantics())
329 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
330 assert(iterArgInitValues.empty() &&
"unexpected AffineForOp init values");
336 constantSteps.reserve(steps.size());
337 for (
Value v : steps) {
339 assert(constVal.has_value() &&
"Affine loops require constant steps");
340 constantSteps.push_back(constVal.value());
345 bodyBuilderFn(b, loc, ivs,
346 linalgOp->getOperands());
378 assert(lbs.size() == ubs.size());
379 assert(lbs.size() == steps.size());
380 assert(lbs.size() == iteratorTypes.size());
381 assert(procInfo.empty() || (lbs.size() == procInfo.size()));
385 if (iteratorTypes.empty()) {
386 bodyBuilderFn(b, loc, ivStorage);
394 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),
396 ivStorage.append(ivs.begin(), ivs.end());
397 generateParallelLoopNest(
398 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),
399 iteratorTypes.drop_front(),
400 procInfo.empty() ? procInfo : procInfo.drop_front(),
401 bodyBuilderFn, ivStorage);
406 unsigned nLoops = iteratorTypes.size();
407 unsigned numProcessed = 0;
409 if (procInfo.empty()) {
412 distributionMethod = procInfo.front().distributionMethod;
421 auto remainderProcInfo =
422 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);
423 switch (distributionMethod) {
427 b.
create<scf::ParallelOp>(
428 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
429 steps.take_front(numProcessed),
431 ivStorage.append(localIvs.begin(), localIvs.end());
433 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
434 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
435 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
436 bodyBuilderFn, ivStorage);
440 case DistributionMethod::Cyclic: {
443 b.
create<scf::ParallelOp>(
444 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),
445 steps.take_front(numProcessed),
447 ivStorage.append(localIvs.begin(), localIvs.end());
449 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),
450 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),
451 iteratorTypes.drop_front(numProcessed), remainderProcInfo,
452 bodyBuilderFn, ivStorage);
456 case DistributionMethod::CyclicNumProcsGeNumIters: {
459 Value cond = ab.
slt(lbs[0], ubs[0]);
460 for (
unsigned i = 1; i < numProcessed; ++i)
461 cond = ab.
_and(cond, ab.
slt(lbs[i], ubs[i]));
462 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
465 ubs.drop_front(numProcessed),
466 steps.drop_front(numProcessed),
467 iteratorTypes.drop_front(numProcessed),
468 remainderProcInfo, bodyBuilderFn, ivStorage);
473 case DistributionMethod::CyclicNumProcsEqNumIters:
476 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));
478 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),
479 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),
480 remainderProcInfo, bodyBuilderFn, ivStorage);
495 if (!linalgOp.hasPureBufferSemantics())
496 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());
497 assert(iterArgInitValues.empty() &&
"unexpected ParallelOp init values");
499 assert(iteratorTypes.size() >= loopRanges.size() &&
500 "expected iterator type for all ranges");
501 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&
502 "expected proc information for all loops when present");
503 iteratorTypes = iteratorTypes.take_front(loopRanges.size());
505 unsigned numLoops = iteratorTypes.size();
506 ivs.reserve(numLoops);
507 lbsStorage.reserve(numLoops);
508 ubsStorage.reserve(numLoops);
509 stepsStorage.reserve(numLoops);
512 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);
518 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],
519 ubsStorage[it.index()], stepsStorage[it.index()]);
522 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);
524 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,
526 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());
530 assert(ivs.size() == iteratorTypes.size() &&
"did not generate enough loops");
536 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
538 .Case([&](MemRefType) {
539 return builder.
create<memref::SubViewOp>(
540 loc, valueToTile, sliceParams.
offsets,
543 .Case([&](RankedTensorType) {
544 return builder.
create<tensor::ExtractSliceOp>(
545 loc, valueToTile, sliceParams.
offsets,
549 llvm_unreachable(
"Unexpected shaped type");
559 bool omitPartialTileCheck) {
562 ubs, subShapeSizes, omitPartialTileCheck);
571 bool omitPartialTileCheck) {
572 auto shapedType = dyn_cast<ShapedType>(valueToTile.
getType());
573 assert(shapedType &&
"only shaped types can be tiled");
575 int64_t rank = shapedType.getRank();
579 sliceParams.
offsets.reserve(rank);
580 sliceParams.
sizes.reserve(rank);
581 sliceParams.
strides.reserve(rank);
582 for (
unsigned r = 0; r < rank; ++r) {
583 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: for dim#" << r);
587 sliceParams.
sizes.push_back(dim);
589 LLVM_DEBUG(llvm::dbgs() <<
": not tiled: use size: " << dim <<
"\n");
592 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subsize...\n");
597 LLVM_DEBUG(llvm::dbgs() <<
"computeSliceParameters: submap: " << m <<
"\n");
600 sliceParams.
offsets.push_back(offset);
607 LLVM_DEBUG(llvm::dbgs()
608 <<
"computeSliceParameters: raw size: " << size <<
"\n");
609 LLVM_DEBUG(llvm::dbgs()
610 <<
"computeSliceParameters: new offset: " << offset <<
"\n");
613 if (omitPartialTileCheck) {
616 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
617 sliceParams.
sizes.push_back(size);
626 int64_t shapeSize = shape[r];
628 auto hasTileSizeOne = sizeCst && *sizeCst == 1;
629 auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&
630 ((shapeSize % *sizeCst) == 0);
631 if (!hasTileSizeOne && !dividesEvenly) {
632 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: shapeSize=" << shapeSize
633 <<
", size: " << size
634 <<
": make sure in bound with affine.min\n");
638 bindDims(context, dim0, dim1, dim2);
653 llvm::to_vector(llvm::map_range(ubs, [&](
OpFoldResult ub) {
669 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShape: new size: " << size <<
"\n");
670 sliceParams.
sizes.push_back(size);
679 for (
unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {
680 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for loop#" << idx <<
"\n");
683 LLVM_DEBUG(llvm::dbgs()
684 <<
"computeTileOffsets: " << offsets.back() <<
"\n");
693 for (
unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {
700 LLVM_DEBUG(llvm::dbgs() <<
"computeTileSizes: " << sizes.back() <<
"\n");
706 if (op.hasPureBufferSemantics())
708 return llvm::to_vector(
709 llvm::map_range(op.getDpsInitsMutable(), [&](
OpOperand &opOperand) {
710 return operands[opOperand.getOperandNumber()].getType();
717 if (op.hasPureBufferSemantics())
720 tensorResults.reserve(results.size());
722 unsigned resultIdx = 0;
723 for (
OpOperand &opOperand : op.getDpsInitsMutable()) {
726 Value outputTensor = operands[opOperand.getOperandNumber()];
727 if (
auto sliceOp = outputTensor.
getDefiningOp<tensor::ExtractSliceOp>()) {
728 Value inserted = builder.
create<tensor::InsertSliceOp>(
729 loc, sliceOp.getSource().getType(), results[resultIdx],
730 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),
731 sliceOp.getStrides(), sliceOp.getStaticOffsets(),
732 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
733 tensorResults.push_back(inserted);
735 tensorResults.push_back(results[resultIdx]);
739 return tensorResults;
747 bool omitPartialTileCheck) {
748 assert(ivs.size() ==
static_cast<size_t>(llvm::count_if(
749 llvm::make_range(tileSizes.begin(), tileSizes.end()),
751 "expected as many ivs as non-zero sizes");
760 assert(
static_cast<int64_t
>(valuesToTile.size()) <=
761 linalgOp->getNumOperands() &&
762 "more value to tile than operands.");
764 allSliceParams.reserve(valuesToTile.size());
765 for (
auto [opOperand, val] :
766 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {
767 Value shapedOp = val;
768 LLVM_DEBUG(llvm::dbgs() <<
"makeTiledShapes: for operand " << shapedOp);
769 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);
776 Type operandType = opOperand.get().getType();
777 if (!
isTiled(map, tileSizes) && !(isa<RankedTensorType>(operandType) &&
778 linalgOp.isDpsInit(&opOperand))) {
779 allSliceParams.push_back(std::nullopt);
780 LLVM_DEBUG(llvm::dbgs()
781 <<
": not tiled: use shape: " << operandType <<
"\n");
784 LLVM_DEBUG(llvm::dbgs() <<
": tiled: figure out subshape...\n");
787 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,
788 omitPartialTileCheck));
791 return allSliceParams;
799 bool omitPartialTileCheck) {
802 tileSizes, sizeBounds, omitPartialTileCheck);
804 for (
auto item : llvm::zip(valuesToTile, allSliceParameter)) {
805 Value valueToTile = std::get<0>(item);
806 std::optional<SliceParameters> sliceParams = std::get<1>(item);
807 tiledShapes.push_back(
808 sliceParams.has_value()
824 if (!linalgOp.hasIndexSemantics())
827 for (IndexOp indexOp : linalgOp.getBlock()->getOps<IndexOp>()) {
828 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])
835 b, indexOp.getLoc(), index + offset,
836 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});
851 std::optional<SmallVector<ReassociationIndices>>
856 auto dim = it.index();
857 auto size = it.value();
859 auto attr = llvm::dyn_cast_if_present<Attribute>(size);
860 if (attr && cast<IntegerAttr>(attr).getInt() == 1)
863 std::swap(reassociation.back(), curr);
868 if (!curr.empty() && !reassociation.empty())
869 reassociation.back().append(curr.begin(), curr.end());
870 return reassociation;
static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)
static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)
Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...
static void visit(Operation *op, DenseSet< Operation * > &visited)
Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.
Affine binary operation expression.
AffineExpr getLHS() const
AffineExpr getRHS() const
A dimensional identifier appearing in an affine expression.
unsigned getPosition() const
See documentation for AffineExprVisitorBase.
Base type for affine expression.
AffineExprKind getKind() const
Return the classification for this type.
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
AffineExpr getResult(unsigned idx) const
AffineMap getSubMap(ArrayRef< unsigned > resultPos) const
Returns the map consisting of the resultPos subset.
static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList, MLIRContext *context)
Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...
Attributes are known-constant values of operations.
Block represents an ordered list of Operations.
BlockArgument getArgument(unsigned i)
unsigned getNumArguments()
OpListType & getOperations()
iterator_range< iterator > without_terminator()
Return an iterator range over the operation within this block excluding the terminator operation at t...
IntegerAttr getIndexAttr(int64_t value)
MLIRContext * getContext() const
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
unsigned getResultNumber() const
Returns the number of this result.
Operation is the basic unit of execution within MLIR.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
void replaceUsesWithIf(Value from, Value to, function_ref< bool(OpOperand &)> functor, bool *allUsesReplaced=nullptr)
Find uses of from and replace them with to if the functor returns true.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isSignlessIntOrFloat() const
Return true of this is a signless integer or a float type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Operation * getOwner() const
Return the owner of this operand.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)
Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
bool allIndexingsAreProjectedPermutation(LinalgOp op)
Check if all indexing maps are projected permutations.
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)
Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).
GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)
Returns GenericOp that copies an n-D memref.
static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)
Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.
SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)
Computes tile offsets, given a list of loop ivs and tileSizes.
bool isReductionIterator(utils::IteratorType iteratorType)
Check if iterator type has "reduction" semantics.
bool hasOnlyScalarElementwiseOp(Region &r)
Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.
static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)
Create one memref::DimOp or tensor::DimOp depending on the type of val.
DistributionMethod
Scheme used to distribute loops to processors.
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
bool isElementwise(LinalgOp op)
Check if a LinalgOp is an element-wise operation.
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...
Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Creates an extract_slice/subview op for a single valueToTile with builder.
Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)
Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...
void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)
Update the lb, ub and step to get per processor lb, ub and step.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)
Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, SmallVector< Value > dynOutDim={})
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
bool isZeroIndex(OpFoldResult v)
Return true if v is an IntegerAttr with value 0 of a ConstantIndexOp with attribute with value 0.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
@ Mul
RHS of mul is always a constant or a symbolic expression.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)
Helper struct to build simple arithmetic quantities with minimal type inference support.
Value _and(Value lhs, Value rhs)
Value slt(Value lhs, Value rhs)
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...
Callback function type used to get processor ID, and number of processors used for distribution for a...
DistributionMethod distributionMethod
A struct containg offsets-sizes-strides arguments of the tiled shape.
SmallVector< OpFoldResult > strides
SmallVector< OpFoldResult > sizes
SmallVector< OpFoldResult > offsets