doxygen/TensorTilingInterfaceImpl_8cpp_source.html

 //===- TensorTilingInterface.cpp - Tiling Interface  models *- C++ ------*-===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"

 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Affine/Utils.h"

 #include "mlir/Dialect/Arith/Utils/Utils.h"

 #include "mlir/Dialect/Linalg/Utils/Utils.h"

 #include "mlir/Dialect/SCF/IR/SCF.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Interfaces/InferTypeOpInterface.h"

 #include "mlir/Interfaces/TilingInterface.h"


 using namespace mlir;

 using namespace mlir::tensor;


 namespace {


 struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {


   SmallVector<utils::IteratorType> getLoopIteratorTypes(Operation *op) const {

     auto padOp = cast<PadOp>(op);

     SmallVector<utils::IteratorType> iteratorTypes(

         padOp.getResultType().getRank(), utils::IteratorType::parallel);

     return iteratorTypes;

   }


   SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {

     ReifiedRankedShapedTypeDims reifiedShapes;

     (void)reifyResultShapes(b, op, reifiedShapes);

     OpFoldResult zero = b.getIndexAttr(0);

     OpFoldResult one = b.getIndexAttr(1);

     // Initialize all the ranges to {zero, one, one}. All the `ub`s are

     // overwritten.

     SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});

     for (const auto &ub : enumerate(reifiedShapes[0]))

       loopRanges[ub.index()].size = ub.value();

     return loopRanges;

   }


   FailureOr<TilingResult>

   getTiledImplementation(Operation *op, OpBuilder &b,

                          ArrayRef<OpFoldResult> offsets,

                          ArrayRef<OpFoldResult> sizes) const {

     FailureOr<TilingResult> result =

         tensor::bubbleUpPadSlice(b, cast<PadOp>(op), offsets, sizes);

     if (failed(result))

       return failure();

     return result.value();

   }


   LogicalResult

   getResultTilePosition(Operation *op, OpBuilder &b, unsigned resultNumber,

                         ArrayRef<OpFoldResult> offsets,

                         ArrayRef<OpFoldResult> sizes,

                         SmallVector<OpFoldResult> &resultOffsets,

                         SmallVector<OpFoldResult> &resultSizes) const {

     resultOffsets.assign(offsets.begin(), offsets.end());

     resultSizes.assign(sizes.begin(), sizes.end());

     return success();

   }


   LogicalResult getIterationDomainTileFromResultTile(

       Operation *op, OpBuilder &b, unsigned resultNumber,

       ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,

       SmallVectorImpl<OpFoldResult> &iterDomainOffsets,

       SmallVectorImpl<OpFoldResult> &iterDomainSizes) const {

     iterDomainOffsets.assign(offsets.begin(), offsets.end());

     iterDomainSizes.assign(sizes.begin(), sizes.end());

     return success();

   }


   FailureOr<TilingResult>

   generateResultTileValue(Operation *op, OpBuilder &b, unsigned resultNumber,

                           ArrayRef<OpFoldResult> offsets,

                           ArrayRef<OpFoldResult> sizes) const {

     return getTiledImplementation(op, b, offsets, sizes);

   }

 };


 } // namespace


 FailureOr<TilingResult> tensor::bubbleUpPadSlice(OpBuilder &b,

                                                  tensor::PadOp padOp,

                                                  ArrayRef<OpFoldResult> offsets,

                                                  ArrayRef<OpFoldResult> sizes,

                                                  bool generateZeroSliceGuard) {

   // Only constant padding value supported.

   Value padValue = padOp.getConstantPaddingValue();

   if (!padValue)

     return failure();


   // Helper variables and functions for various arithmetic operations. These

   // are used extensively for computing new offset/length and padding values.

   Location loc = padOp->getLoc();

   AffineExpr dim0, dim1;

   bindDims(b.getContext(), dim0, dim1);

   // Subtract two integers.

   auto subMap = AffineMap::get(2, 0, {dim0 - dim1});

   auto sub = [&](OpFoldResult v1, OpFoldResult v2) {

     return affine::makeComposedFoldedAffineApply(b, loc, subMap, {v1, v2});

   };

   // Take the minimum of two integers.

   auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());

   auto min = [&](OpFoldResult v1, OpFoldResult v2) {

     return affine::makeComposedFoldedAffineMin(b, loc, idMap, {v1, v2});

   };

   // Take the maximum of two integers.

   auto max = [&](OpFoldResult v1, OpFoldResult v2) {

     return affine::makeComposedFoldedAffineMax(b, loc, idMap, {v1, v2});

   };

   // Zero index-typed integer.

   OpFoldResult zero = b.getIndexAttr(0);


   // Compute new offsets, lengths, low padding, high padding.

   SmallVector<OpFoldResult> newOffsets, newLengths;

   SmallVector<OpFoldResult> newLows, newHighs;

   // Set to true if the original data source is not read at all.

   bool hasZeroLen = false;

   // Same as hasZeroLen, but for dynamic dimension sizes. This condition

   // is true if the original data source turns out to be unused at runtime.

   Value dynHasZeroLenCond;


   int64_t rank = padOp.getSourceType().getRank();

   // Only unit stride supported.

   SmallVector<OpFoldResult> newStrides(rank, b.getIndexAttr(1));

   for (unsigned dim = 0; dim < rank; ++dim) {

     auto low = padOp.getMixedLowPad()[dim];

     bool hasLowPad = !isZeroInteger(low);

     auto high = padOp.getMixedHighPad()[dim];

     bool hasHighPad = !isZeroInteger(high);

     auto offset = offsets[dim];

     auto length = sizes[dim];

     // If the dim has no padding, we dont need to calculate new values for that

     // dim as the exisiting ones are correct even after the pattern.

     if (!hasLowPad && !hasHighPad) {

       newOffsets.push_back(offset);

       newLengths.push_back(length);

       newLows.push_back(low);

       newHighs.push_back(high);

       continue;

     }


     auto srcSize = tensor::getMixedSize(b, loc, padOp.getSource(), dim);


     // The new amount of low padding is `low - offset`. Except for the case

     // where none of the low padding is read. In that case, the new amount of

     // low padding is zero.

     //

     // Optimization: If low = 0, then newLow = 0.

     OpFoldResult newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;

     newLows.push_back(newLow);


     // Start reading the data from position `offset - low`. Since the original

     // read may have started in the low padding zone, this value could be

     // negative. Therefore, start reading from:

     //

     // max(offset - low, 0)

     //

     // The original read could also have started in the high padding zone.

     // In that case, set the offset to the end of source tensor. The new

     // ExtractSliceOp length will be zero in that case. (Effectively reading

     // no data from the source.)

     //

     // Optimization: If low = 0, then the formula can be simplified.

     OpFoldResult newOffset = hasLowPad

                                  ? min(max(sub(offset, low), zero), srcSize)

                                  : min(offset, srcSize);

     newOffsets.push_back(newOffset);


     // The original ExtractSliceOp was reading until position `offset +

     // length`. Therefore, the corresponding position within the source tensor

     // is:

     //

     // offset + length - low

     //

     // In case the original ExtractSliceOp stopped reading within the low

     // padding zone, this value can be negative. In that case, the end

     // position of the read should be zero. (Similar to newOffset.)

     //

     // The original read could also have stopped in the high padding zone.

     // In that case, set the end positition of the read should be the end of

     // the source tensor. (Similar to newOffset.)

     // srcSize - newOffset represents how much length we have available

     // and length - newLow represents how much length we want at most.

     // Note that there are many ways to order this indexing math to compute

     // newLength, but we want to make sure that the final affine.min ops in the

     // sequence are bounding the index to as small a value as possible. If

     // ValueBoundsOpInterface is used, this calculation will get upper bounds

     // from the affine.min ops, so we want to use the smallest known value to

     // set the bound at the end of the computation sequence. In this case, the

     // index will be upper bounded by length - newLow.

     OpFoldResult newLength = min(sub(srcSize, newOffset), sub(length, newLow));

     // Optimization: If low = 0, then newLow = 0. then newLength >= 0 assuming

     // length >= 0.

     if (hasLowPad)

       newLength = max(newLength, zero);

     newLengths.push_back(newLength);


     // Check if newLength is zero. In that case, no SubTensorOp should be

     // executed.

     if (isZeroInteger(newLength)) {

       hasZeroLen = true;

     } else if (!hasZeroLen) {

       Value check = arith::CmpIOp::create(

           b, loc, arith::CmpIPredicate::eq,

           getValueOrCreateConstantIndexOp(b, loc, newLength),

           getValueOrCreateConstantIndexOp(b, loc, zero));

       dynHasZeroLenCond =

           dynHasZeroLenCond

               ? arith::OrIOp::create(b, loc, check, dynHasZeroLenCond)

               : check;

     }


     // The amount of high padding is simply the number of elements remaining,

     // so that the result has the same length as the original ExtractSliceOp.

     // As an optimization, if the original high padding is zero, then the new

     // high padding must also be zero.

     OpFoldResult newHigh =

         hasHighPad ? sub(sub(length, newLength), newLow) : zero;

     newHighs.push_back(newHigh);

   }


   // The shape of the result can be obtained from the sizes passed in.

   SmallVector<Value> dynDims;

   SmallVector<int64_t> shape;

   dispatchIndexOpFoldResults(sizes, dynDims, shape);

   RankedTensorType resultType =

       RankedTensorType::get(shape, padOp.getResultType().getElementType());


   // Insert cast to ensure that types match. (May be folded away.)

   auto castResult = [&](Value val) -> Value {

     if (resultType == val.getType())

       return val;

     return tensor::CastOp::create(b, loc, resultType, val);

   };


   // In cases where the original data source is unused: Emit a GenerateOp and

   // do not generate a SliceOp. (The result shape of the SliceOp would

   // have a dimension of size 0, the semantics of which is unclear.)

   auto createGenerateOp = [&]() {

     // Create GenerateOp.

     auto generateOp = tensor::GenerateOp::create(

         b, loc, resultType, dynDims,

         [&](OpBuilder &builder, Location gLoc, ValueRange indices) {

           tensor::YieldOp::create(builder, gLoc, padValue);

         });

     return generateOp;

   };


   // Emit a SliceOp and a PadOp. Should not be used in cases where

   // the result shape of the new SliceOp has a zero dimension.

   auto createPadOfExtractSlice = [&]() {

     // Create pad(extract_slice(x)).

     auto newSliceOp = tensor::ExtractSliceOp::create(

         b, loc, padOp.getSource(), newOffsets, newLengths, newStrides);

     auto newPadOp = PadOp::create(

         b, loc, Type(), newSliceOp, newLows, newHighs,

         /*nofold=*/padOp.getNofold(),

         getPrunedAttributeList(padOp, PadOp::getAttributeNames()));


     // Copy region to new PadOp.

     IRMapping bvm;

     padOp.getRegion().cloneInto(&newPadOp.getRegion(), bvm);


     // Cast result and return.

     return std::make_tuple(newPadOp, newSliceOp);

   };


   // Rewrite extract_slice(pad(x)) into a GenerateOp it is statically known that

   // the original data source x is not used.

   if (hasZeroLen) {

     Operation *generateOp = createGenerateOp();

     return TilingResult{{generateOp},

                         {castResult(generateOp->getResult(0))},

                         /*generatedSlices=*/{}};

   }


   // If there are dynamic dimensions: Generate an scf.if check to avoid

   // creating SliceOps with result dimensions of size 0 at runtime.

   if (generateZeroSliceGuard && dynHasZeroLenCond) {

     Operation *thenOp;

     Operation *elseOp;

     Operation *sliceOp;

     auto result = scf::IfOp::create(

         b, loc, dynHasZeroLenCond,

         /*thenBuilder=*/

         [&](OpBuilder &b, Location loc) {

           thenOp = createGenerateOp();

           scf::YieldOp::create(b, loc, castResult(thenOp->getResult(0)));

         },

         /*elseBuilder=*/

         [&](OpBuilder &b, Location loc) {

           std::tie(elseOp, sliceOp) = createPadOfExtractSlice();

           scf::YieldOp::create(b, loc, castResult(elseOp->getResult(0)));

         });

     return TilingResult{

         {elseOp}, SmallVector<Value>(result->getResults()), {sliceOp}};

   }


   auto [newPadOp, sliceOp] = createPadOfExtractSlice();

   return TilingResult{

       {newPadOp}, {castResult(newPadOp->getResult(0))}, {sliceOp}};

 }


 void mlir::tensor::registerTilingInterfaceExternalModels(

     DialectRegistry &registry) {

   registry.addExtension(+[](MLIRContext *ctx, TensorDialect *dialect) {

     tensor::PadOp::attachInterface<PadOpTiling>(*ctx);

   });

 }

AffineOps.h

Utils.h

Utils.h

Utils.h

InferTypeOpInterface.h

max
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:212

min
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:204

TensorTilingInterfaceImpl.h

getResultTilePosition
static LogicalResult getResultTilePosition(RewriterBase &rewriter, ReductionTilingStrategy reductionStrategy, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize)
Definition: TileUsingInterface.cpp:848

getTiledImplementation
static FailureOr< TilingResult > getTiledImplementation(RewriterBase &rewriter, TilingInterface op, ReductionTilingStrategy reductionStrategy, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, ValueRange ivs, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > givenTileSizes, const SetVector< unsigned > &reductionDims)
Definition: TileUsingInterface.cpp:821

TilingInterface.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVectorImpl
Definition: LLVM.h:74

llvm::SmallVector
Definition: LLVM.h:72

mlir::AffineExpr
Base type for affine expression.
Definition: AffineExpr.h:68

mlir::AffineMap::getMultiDimIdentityMap
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
Definition: AffineMap.cpp:330

mlir::AffineMap::get
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Definition: MLIRContext.cpp:1224

mlir::Builder::getIndexAttr
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:108

mlir::Builder::getContext
MLIRContext * getContext() const
Definition: Builders.h:56

mlir::DialectRegistry
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
Definition: DialectRegistry.h:139

mlir::DialectRegistry::addExtension
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Definition: DialectRegistry.h:215

mlir::IRMapping
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:207

mlir::OpFoldResult
This class represents a single result from folding an operation.
Definition: OpDefinition.h:272

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getResult
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

SCF.h

Tensor.h

mlir::affine::makeComposedFoldedAffineMax
OpFoldResult makeComposedFoldedAffineMax(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a maximum across the results of applying map to operands,...
Definition: AffineOps.cpp:1579

mlir::affine::makeComposedFoldedAffineApply
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1469

mlir::affine::makeComposedFoldedAffineMin
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
Definition: AffineOps.cpp:1572

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir::remark::failed
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:561

mlir::tensor
Definition: BufferizationTransformOps.h:19

mlir::tensor::bubbleUpPadSlice
FailureOr< TilingResult > bubbleUpPadSlice(OpBuilder &b, tensor::PadOp padOp, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, bool generateZeroSliceGuard=true)
Bubbles up a slice of this pad by taking the slice first and then performing the padding.
Definition: TensorTilingInterfaceImpl.cpp:88

mlir::tensor::registerTilingInterfaceExternalModels
void registerTilingInterfaceExternalModels(mlir::DialectRegistry &registry)
Registers external models for Tiling interface for tensor ops.
Definition: TensorTilingInterfaceImpl.cpp:311

mlir::tensor::getMixedSize
OpFoldResult getMixedSize(OpBuilder &builder, Location loc, Value value, int64_t dim)
Return the dimension of the given tensor value.
Definition: TensorOps.cpp:57

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::reifyResultShapes
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
Definition: InferTypeOpInterface.cpp:27

mlir::bindDims
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311

mlir::isZeroInteger
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
Definition: StaticValueUtils.cpp:20

mlir::dispatchIndexOpFoldResults
void dispatchIndexOpFoldResults(ArrayRef< OpFoldResult > ofrs, SmallVectorImpl< Value > &dynamicVec, SmallVectorImpl< int64_t > &staticVec)
Helper function to dispatch multiple OpFoldResults according to the behavior of dispatchIndexOpFoldRe...
Definition: StaticValueUtils.cpp:72

mlir::getValueOrCreateConstantIndexOp
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:111

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::getPrunedAttributeList
SmallVector< NamedAttribute > getPrunedAttributeList(Operation *op, ArrayRef< StringRef > elidedAttrs)
Definition: StructuredOpsUtils.cpp:220

mlir::TilingResult
Container for result values of tiling.
Definition: TilingInterface.h:33