doxygen/TosaFolders_8cpp_source.html

 //===- TosaFolders.cpp ----------------------------------------------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // Fold TOSA operations

 //

 //===----------------------------------------------------------------------===//


 #include <functional>

 #include <numeric>


 #include "mlir/Dialect/Tosa/IR/TosaOps.h"

 #include "mlir/Dialect/Tosa/Transforms/Passes.h"

 #include "mlir/Dialect/Utils/IndexingUtils.h"

 #include "mlir/IR/BuiltinAttributes.h"

 #include "mlir/IR/BuiltinTypes.h"

 #include "mlir/IR/DialectResourceBlobManager.h"

 #include "mlir/IR/Matchers.h"

 #include "mlir/Pass/Pass.h"

 #include "llvm/ADT/APFloat.h"

 #include "llvm/ADT/FloatingPointMode.h"

 #include "llvm/ADT/SmallVector.h"


 using namespace mlir;

 using namespace mlir::tosa;


 namespace {


 /// Apply the given transformation \p toApply to every element of the tensor to

 /// be transformed \p toTransform.

 ///

 /// Elements of \p toTransform are extracted as \p SrcValueType.

 ///

 /// \returns A tensor with the same size as \p toTransform, containing

 /// \p TargetValueType values of type \p TargetType.

 template <class SrcValType, class TargetValType, class TargetType>

 DenseElementsAttr applyElementWise(

     const DenseElementsAttr &toTransform,

     const std::function<TargetValType(const SrcValType &)> &toApply,

     TargetType targetType) {

   SmallVector<TargetValType> transformedValues;

   // We already know the amount of values we will insert, reserve space for

   // all of them to avoid dynamic resizing

   transformedValues.reserve(toTransform.getNumElements());

   for (auto val : toTransform.getValues<SrcValType>()) {

     auto transformedVal = toApply(val);

     transformedValues.push_back(transformedVal);

   }


   // Make sure that the output tensor has the expected output type

   auto inShape = toTransform.getType();

   auto outTy = inShape.cloneWith({}, targetType);


   return DenseElementsAttr::get(outTy, transformedValues);

 }


 template DenseElementsAttr applyElementWise<APFloat, APFloat, FloatType>(

     const DenseElementsAttr &toTransform,

     const std::function<APFloat(const APFloat &)> &toApply,

     FloatType targetType);


 /// Function that checks if the type contained in \p toCheck is float.

 LogicalResult notifyIfNotFloat(TypedValue<TensorType> toCheck, TosaOp location,

                                PatternRewriter &rewriter) {

   if (isa<FloatType>(toCheck.getType().getElementType())) {

     return success();

   }

   return rewriter.notifyMatchFailure(location,

                                      "Unexpected input tensor type: the "

                                      "TOSA spec only allows floats");

 }


 /// Function that checks if \p toCheck is a dense TOSA constant tensor.

 LogicalResult notifyIfNoTosaDenseConstantTensor(TypedValue<TensorType> toCheck,

                                                 TosaOp location,

                                                 PatternRewriter &rewriter) {

   // Check whether the tensor is constant and dense

   // TODO We currently ensure the tensor is dense by using the correct type for

   // the bind_value, however we do not actually need this value. It would be

   // nicer to only have a check here.

   DenseElementsAttr tmp;

   if (!matchPattern(toCheck, m_Constant(&tmp))) {

     return rewriter.notifyMatchFailure(location,

                                        "Non-const or non-dense input tensor");

   }


   // Make sure it actually is a TOSA constant (the match allows for other

   // constants as well)

   if (isa<ConstOp>(toCheck.getDefiningOp())) {

     return success();

   }


   return rewriter.notifyMatchFailure(location,

                                      "The reciprocal can only be folded if "

                                      "it operates on a TOSA constant");

 }


 /// Function that checks if \p toCheck is a dense TOSA constant float tensor.

 LogicalResult notifyIfNotConstantFloatTosaTensor(TypedValue<TensorType> toCheck,

                                                  TosaOp location,

                                                  PatternRewriter &rewriter) {

   auto floatCheck = notifyIfNotFloat(toCheck, location, rewriter);

   if (failed(floatCheck)) {

     return floatCheck;

   }

   return notifyIfNoTosaDenseConstantTensor(toCheck, location, rewriter);

 }


 /// Heuristic to decide when to replace a unary operation on a constant with the

 /// folded value.

 /// Folding operations on constants can lead to an increased memory usage

 /// whenever the input cannot be replaced but a new constant is inserted. Hence,

 /// this will currently only suggest folding when the memory impact is

 /// negligible.

 /// Takes the \p unaryOp and the constant input \p values.

 /// \returns Whether folding should be applied.

 bool constantUnaryOpShouldBeFolded(TosaOp unaryOp, DenseElementsAttr values) {

   assert(unaryOp->getNumOperands() == 1);

   auto inputOp = unaryOp->getOperand(0);


   // If the input is a splat, we don't care for the number of users

   if (isa<SplatElementsAttr>(values)) {

     return true;

   }


   // If this is the only use of the tensor it should be replaced as no

   // additional memory is required

   return inputOp.hasOneUse();

 }


 template <typename RangeType>

 DenseElementsAttr transposeType(const RangeType &data, ShapedType inputType,

                                 ShapedType outputType,

                                 llvm::ArrayRef<int64_t> permValues) {

   using ElementType = std::decay_t<decltype(*std::begin(data))>;


   assert(inputType.getElementType() == outputType.getElementType());


   if (inputType.getNumElements() == 0)

     return DenseElementsAttr::get(outputType, llvm::ArrayRef<ElementType>{});


   auto inputShape = inputType.getShape();


   // The inverted permutation map and strides of the output are used to compute

   // the contribution of a given dimension to the destination linear index in

   // an order-independent way.

   auto outputStrides = computeStrides(outputType.getShape());

   auto invertedPermValues = invertPermutationVector(permValues);


   auto initialValue = *std::begin(data);

   SmallVector<ElementType> outputValues(inputType.getNumElements(),

                                         initialValue);


   for (const auto &it : llvm::enumerate(data)) {

     auto srcLinearIndex = it.index();


     uint64_t dstLinearIndex = 0;

     for (int64_t dim = inputShape.size() - 1; dim >= 0; --dim) {

       // Compute the index into the current dimension of the source vector.

       auto sourceIndexForDim = srcLinearIndex % inputShape[dim];

       srcLinearIndex /= inputShape[dim];


       // Add the contribution of the current dimension to the output using the

       // permutation map.

       dstLinearIndex +=

           outputStrides[invertedPermValues[dim]] * sourceIndexForDim;

     }


     outputValues[dstLinearIndex] = it.value();

   }


   return DenseElementsAttr::get(outputType,

                                 llvm::ArrayRef<ElementType>(outputValues));

 }


 // Try to get the values of a DenseResourceElementsAttr construct

 template <typename T>

 std::optional<ArrayRef<T>> tryGetDenseResourceValues(ElementsAttr attr) {

   if (auto denseResource = dyn_cast<DenseResourceElementsAttr>(attr)) {

     // Check that the resource memory blob exists

     AsmResourceBlob *blob = denseResource.getRawHandle().getBlob();

     if (!blob)

       return std::nullopt;


     // Check that the data are in a valid form

     bool isSplat = false;

     if (!DenseElementsAttr::isValidRawBuffer(attr.getShapedType(),

                                              blob->getData(), isSplat)) {

       return std::nullopt;

     }


     return blob->template getDataAs<T>();

   }


   return std::nullopt;

 }


 // A type specialized transposition of an ElementsAttr.

 // This implementation tries to operate on the underlying data in its raw

 // representation when possible to avoid allocating a large number of Attribute

 // objects.

 DenseElementsAttr transpose(ElementsAttr attr, ShapedType inputType,

                             ShapedType outputType,

                             llvm::ArrayRef<int64_t> permValues) {

   // Handle generic ElementsAttr

   if (auto data = attr.tryGetValues<bool>())

     return transposeType(*data, inputType, outputType, permValues);


   if (auto data = attr.tryGetValues<int8_t>())

     return transposeType(*data, inputType, outputType, permValues);


   if (auto data = attr.tryGetValues<int16_t>())

     return transposeType(*data, inputType, outputType, permValues);


   if (auto data = attr.tryGetValues<int32_t>())

     return transposeType(*data, inputType, outputType, permValues);


   if (auto data = attr.tryGetValues<int64_t>())

     return transposeType(*data, inputType, outputType, permValues);


   if (auto data = attr.tryGetValues<float>())

     return transposeType(*data, inputType, outputType, permValues);


   if (auto data = attr.tryGetValues<APFloat>())

     return transposeType(*data, inputType, outputType, permValues);


   // Handle DenseResourceElementsAttr

   if (isa<DenseResourceElementsAttr>(attr)) {

     auto elementTy = attr.getElementType();


     if (auto data = tryGetDenseResourceValues<bool>(attr);

         data && elementTy.isInteger(1))

       return transposeType(*data, inputType, outputType, permValues);


     if (auto data = tryGetDenseResourceValues<int8_t>(attr);

         data && elementTy.isInteger(8))

       return transposeType(*data, inputType, outputType, permValues);


     if (auto data = tryGetDenseResourceValues<int16_t>(attr);

         data && elementTy.isInteger(16))

       return transposeType(*data, inputType, outputType, permValues);


     if (auto data = tryGetDenseResourceValues<int32_t>(attr);

         data && elementTy.isInteger(32))

       return transposeType(*data, inputType, outputType, permValues);


     if (auto data = tryGetDenseResourceValues<int64_t>(attr);

         data && elementTy.isInteger(64))

       return transposeType(*data, inputType, outputType, permValues);


     if (auto data = tryGetDenseResourceValues<float>(attr);

         data && elementTy.isF32())

       return transposeType(*data, inputType, outputType, permValues);

   }


   return nullptr;

 }


 struct TosaFoldConstantTranspose : public OpRewritePattern<tosa::TransposeOp> {

   using OpRewritePattern::OpRewritePattern;


   LogicalResult matchAndRewrite(tosa::TransposeOp op,

                                 PatternRewriter &rewriter) const override {

     auto outputType = cast<ShapedType>(op.getType());

     // TOSA supports quantized types.

     if (!outputType.getElementType().isIntOrIndexOrFloat())

       return failure();


     ElementsAttr inputValues;

     if (!matchPattern(op.getInput1(), m_Constant(&inputValues)))

       return failure();

     // Make sure the input is a constant that has a single user.

     if (!llvm::hasSingleElement(op.getInput1().getDefiningOp()->getUsers()))

       return failure();


     auto permValues = llvm::map_to_vector(

         op.getPerms(), [](const int32_t v) { return static_cast<int64_t>(v); });


     auto inputType = cast<ShapedType>(op.getInput1().getType());


     auto resultAttr = transpose(inputValues, inputType, outputType, permValues);

     if (!resultAttr) {

       return rewriter.notifyMatchFailure(

           op, "unsupported attribute or element type");

     }


     rewriter.replaceOpWithNewOp<tosa::ConstOp>(op, outputType, resultAttr);

     return success();

   }

 };


 struct TosaFoldConstantReciprocal : public OpRewritePattern<ReciprocalOp> {


   using OpRewritePattern::OpRewritePattern;


   LogicalResult matchAndRewrite(ReciprocalOp recip,

                                 PatternRewriter &rewriter) const override {

     auto inputTensor = recip.getInput1();


     // Check that we can apply folding

     auto preCondCheck =

         notifyIfNotConstantFloatTosaTensor(inputTensor, recip, rewriter);

     if (failed(preCondCheck)) {

       return preCondCheck;

     }


     // Extract the tensor values

     DenseElementsAttr inputValues;

     matchPattern(inputTensor, m_Constant(&inputValues));


     // Check whether this should be folded.

     if (!constantUnaryOpShouldBeFolded(recip, inputValues)) {

       return rewriter.notifyMatchFailure(

           recip, "Currently, reciprocals will only be folded if the input "

                  "tensor has a single user");

     }


     // Create a new tensor with the updated values

     auto newTensor = applyElementWise<APFloat, APFloat, FloatType>(

         inputValues, &ReciprocalOp::calcOneElement,

         cast<FloatType>(inputValues.getElementType()));


     // Replace the use of the reciprocal with the transformed tensor

     rewriter.replaceOpWithNewOp<ConstOp>(recip, newTensor.getType(), newTensor);

     return success();

   }

 };


 /// Getting the axes position of the element which is located

 /// in the tensor at the counter index


 llvm::SmallVector<int64_t>

 getPositionFromIndex(int64_t index, llvm::ArrayRef<int64_t> tensorShape) {

   int64_t remaining = index;

   llvm::SmallVector<int64_t> position(tensorShape.size(), 0);

   for (int64_t i = tensorShape.size() - 1; i >= 0; --i) {

     position[i] = remaining % tensorShape[i];

     remaining /= tensorShape[i];

   }

   return position;

 }


 /// Getting the index of the element which is located at the

 /// axes position in the tensor


 int64_t getIndexFromPosition(llvm::ArrayRef<int64_t> position,

                              llvm::ArrayRef<int64_t> tensorShape) {

   int64_t index = 0;

   int64_t multiplierTmp = 1;

   for (int64_t i = position.size() - 1; i >= 0; --i) {

     index += position[i] * multiplierTmp;

     multiplierTmp *= tensorShape[i];

   }

   return index;

 }


 template <typename OperationType>

 llvm::APInt calculateReducedValue(const mlir::ElementsAttr &oldTensorAttr,

                                   llvm::ArrayRef<int64_t> oldShape,

                                   int64_t reductionAxis,

                                   int64_t reductionIndex) {


   llvm::SmallVector<int64_t> newShape(oldShape);

   newShape[reductionAxis] = 1;

   /// Let's calculate the position of the index

   llvm::SmallVector<int64_t> position =

       getPositionFromIndex(reductionIndex, newShape);

   auto oldTensor = oldTensorAttr.getValues<llvm::APInt>();

   /// Starting from the first positon along the reduction axis

   position[reductionAxis] = 0;

   int64_t indexAtOldTensor = getIndexFromPosition(position, oldShape);

   llvm::APInt reducedValue = oldTensor[indexAtOldTensor];


   for (int64_t reductionAxisVal = 1; reductionAxisVal < oldShape[reductionAxis];

        ++reductionAxisVal) {


     int64_t stride = std::accumulate(oldShape.begin() + reductionAxis + 1,

                                      oldShape.end(), 1, std::multiplies<int>());

     int64_t index = indexAtOldTensor + stride * reductionAxisVal;

     reducedValue =

         OperationType::calcOneElement(reducedValue, oldTensor[index]);

   }

   return reducedValue;

 }


 template <typename OperationType>

 struct ReduceConstantOptimization : public OpRewritePattern<OperationType> {


   ReduceConstantOptimization(MLIRContext *context,

                              bool aggressiveReduceConstant)

       : OpRewritePattern<OperationType>(context),

         aggressiveReduceConstant(aggressiveReduceConstant) {}


   using OpRewritePattern<OperationType>::OpRewritePattern;


   LogicalResult matchAndRewrite(OperationType op,

                                 PatternRewriter &rewriter) const override {

     Value inputOp = op.getInput();

     auto constOp = inputOp.getDefiningOp<tosa::ConstOp>();


     if (!constOp)

       return rewriter.notifyMatchFailure(

           op, "reduce input must be const operation");


     if (!inputOp.hasOneUse() && !this->aggressiveReduceConstant)

       return rewriter.notifyMatchFailure(

           op, "input operation has more than one user");


     auto resultType = cast<ShapedType>(op.getOutput().getType());


     if (!resultType.hasStaticShape())

       return rewriter.notifyMatchFailure(op, "result type shape is not static");


     auto reductionAxis = op.getAxis();

     const auto denseElementsAttr = constOp.getValues();

     const auto shapedOldElementsValues =

         cast<ShapedType>(denseElementsAttr.getType());


     if (!llvm::isa<IntegerType>(shapedOldElementsValues.getElementType()))

       return rewriter.notifyMatchFailure(

           op, "reduce input currently supported with integer type");


     auto oldShape = shapedOldElementsValues.getShape();

     auto newShape = resultType.getShape();


     auto newNumOfElements = std::accumulate(newShape.begin(), newShape.end(), 1,

                                             std::multiplies<int>());

     llvm::SmallVector<APInt> newReducedTensor(newNumOfElements);


     for (int64_t reductionIndex = 0; reductionIndex < newNumOfElements;

          ++reductionIndex) {


       /// Let's reduce all the elements along this reduction axis

       newReducedTensor[reductionIndex] = calculateReducedValue<OperationType>(

           denseElementsAttr, oldShape, reductionAxis, reductionIndex);

     }


     auto rankedTensorType = cast<RankedTensorType>(resultType);

     auto denseAttr =

         mlir::DenseElementsAttr::get(rankedTensorType, newReducedTensor);

     rewriter.replaceOpWithNewOp<tosa::ConstOp>(op, rankedTensorType, denseAttr);

     return success();

   }

   const bool aggressiveReduceConstant;

 };


 } // namespace


 void mlir::tosa::populateTosaConstantReduction(MLIRContext *ctx,

                                                RewritePatternSet &patterns,

                                                bool aggressiveReduceConstant) {

   patterns.add<ReduceConstantOptimization<ReduceAllOp>>(

       ctx, aggressiveReduceConstant);

   patterns.add<ReduceConstantOptimization<ReduceAnyOp>>(

       ctx, aggressiveReduceConstant);

   patterns.add<ReduceConstantOptimization<ReduceMaxOp>>(

       ctx, aggressiveReduceConstant);

   patterns.add<ReduceConstantOptimization<ReduceMinOp>>(

       ctx, aggressiveReduceConstant);

   patterns.add<ReduceConstantOptimization<ReduceProductOp>>(

       ctx, aggressiveReduceConstant);

   patterns.add<ReduceConstantOptimization<ReduceSumOp>>(

       ctx, aggressiveReduceConstant);

 }


 void mlir::tosa::populateTosaFoldConstantTransposePatterns(

     MLIRContext *ctx, RewritePatternSet &patterns) {

   patterns.add<TosaFoldConstantTranspose>(ctx);

 }


 void mlir::tosa::populateTosaFoldConstantReciprocalPatterns(

     MLIRContext *ctx, RewritePatternSet &patterns) {

   patterns.add<TosaFoldConstantReciprocal>(ctx);

 }

DialectResourceBlobManager.h

Passes.h

IndexingUtils.h

Matchers.h

TosaOps.h

llvm::ArrayRef< int64_t >

llvm::SmallVector
Definition: LLVM.h:72

mlir::AsmResourceBlob
This class represents a processed binary blob of data.
Definition: AsmState.h:91

mlir::AsmResourceBlob::getData
ArrayRef< char > getData() const
Return the raw underlying data of this blob.
Definition: AsmState.h:145

mlir::DenseElementsAttr
An attribute that represents a reference to a dense vector or tensor object.
Definition: BuiltinAttributes.h:82

mlir::DenseElementsAttr::getValues
auto getValues() const
Return the held element values as a range of the given type.
Definition: BuiltinAttributes.h:421

mlir::DenseElementsAttr::getNumElements
int64_t getNumElements() const
Returns the number of elements held by this attribute.
Definition: BuiltinAttributes.cpp:1308

mlir::DenseElementsAttr::getElementType
Type getElementType() const
Return the element type of this DenseElementsAttr.
Definition: BuiltinAttributes.cpp:1304

mlir::DenseElementsAttr::get
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
Definition: BuiltinAttributes.cpp:911

mlir::DenseElementsAttr::getType
ShapedType getType() const
Return the type of this ElementsAttr, guaranteed to be a vector or tensor with static shape.
Definition: BuiltinAttributes.cpp:1300

mlir::DenseElementsAttr::isValidRawBuffer
static bool isValidRawBuffer(ShapedType type, ArrayRef< char > rawBuffer, bool &detectedSplat)
Returns true if the given buffer is a valid raw buffer for the given type.
Definition: BuiltinAttributes.cpp:1078

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:749

mlir::RewritePatternSet
Definition: PatternMatch.h:772

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:682

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:500

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::hasOneUse
bool hasOneUse() const
Returns true if this value has exactly one use.
Definition: Value.h:197

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20

Pass.h

BuiltinAttributes.h

BuiltinTypes.h

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir::tosa
Definition: TosaToArith.h:23

mlir::tosa::populateTosaConstantReduction
void populateTosaConstantReduction(MLIRContext *ctx, RewritePatternSet &patterns, bool aggressiveReduceConstant)
Definition: TosaFolders.cpp:453

mlir::tosa::populateTosaFoldConstantReciprocalPatterns
void populateTosaFoldConstantReciprocalPatterns(MLIRContext *ctx, RewritePatternSet &patterns)
Definition: TosaFolders.cpp:475

mlir::tosa::populateTosaFoldConstantTransposePatterns
void populateTosaFoldConstantTransposePatterns(MLIRContext *ctx, RewritePatternSet &patterns)
Definition: TosaFolders.cpp:470

mlir::xegpu::transpose
static void transpose(llvm::ArrayRef< int64_t > trans, SmallVector< int64_t > &shape)
Definition: XeGPUOps.cpp:23

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::matchPattern
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:490

mlir::TypedValue
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition: Value.h:488

mlir::computeStrides
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Definition: IndexingUtils.h:47

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::m_Constant
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369

mlir::invertPermutationVector
SmallVector< int64_t > invertPermutationVector(ArrayRef< int64_t > permutation)
Helper method to apply to inverse a permutation.
Definition: IndexingUtils.cpp:205

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314

mlir::OpRewritePattern::OpRewritePattern
OpRewritePattern(MLIRContext *context, PatternBenefit benefit=1, ArrayRef< StringRef > generatedNames={})
Patterns must specify the root operation name they match against, and can also specify the benefit of...
Definition: PatternMatch.h:319