doxygen/DecomposeGenericByUnfoldingPermutation_8cpp_source.html

 //===- DecomposeGenericByUnfoldingPermutation.cpp                   -------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 #include "mlir/Dialect/Linalg/IR/Linalg.h"

 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"

 #include <map>

 #include <utility>


 using namespace mlir;

 using namespace mlir::linalg;


 namespace {


 /// This pattern decomposes the input operand(s) of a linalg.generic that has

 /// a `transpose`, `broadcast`, or a mixture of two, into explicit transpose

 /// and broadcast. Having them folded into the linalg.generic is a good

 /// optimization but sometimes we may want to unwrap, i.e., `unfold` them as

 /// explicit transpose and broadcast. This rewrite pattern helps do it for

 /// each input operand. This is useful for instance when trying to recognize

 /// named ops.

 ///

 /// The transpose, broadcast, or mixture of both, are expressed in the affine

 /// map of the operand. Technically it is essentially `projected permutation`.

 ///

 ///  Example

 ///

 /// ```mlir

 ///

 /// #projection = affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d1)>

 /// #identity   = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>

 /// ...

 ///    %res = linalg.generic

 ///       { indexing_maps = [#projection, #identity, #identity],

 ///       iterator_types = ["parallel", "parallel", "parallel",

 ///                         "parallel", "parallel"]}

 ///       ins(%x, %y : tensor<7x8x9xf32>, tensor<5x9x7x8x10xf32>)

 ///       outs(%z : tensor<5x9x7x8x10xf32>) {

 ///         ^bb0(%in: f32, %in_1: f32, %out: f32):

 ///              %div = arith.divf %in, %in_1 : f32

 ///              linalg.yield %div : f32

 ///    } -> tensor<5x9x7x8x10xf32>

 /// ```

 ///

 /// In the above IR operand `%x` map is a projected-permutation. This can be

 /// unfolded as:

 ///

 /// ```mlir

 ///   ...

 ///   %x_trans = linalg.transpose

 ///                   ins(%x : tensor<7x8x9xf32>)

 ///                   outs(%e1 : tensor<9x7x8xf32>) permutation = [2, 0, 1]

 ///   ...

 ///   %x_trans_bc = linalg.broadcast

 ///                   ins(%x_trans : tensor<9x7x8xf32>)

 ///                   outs(%e2 : tensor<5x9x7x8x10xf32>) dimensions = [0, 4]

 ///   %2 = linalg.div

 ///           ins(%x_trans_bc, %y :

 ///                  tensor<5x9x7x8x10xf32>, tensor<5x9x7x8x10xf32>)

 ///           outs(%arg2 : tensor<5x9x7x8x10xf32>) -> tensor<5x9x7x8x10xf32>

 ///

 /// Note that linalg.generic has been 'specialized' to linalg.div.

 ///

 /// To unfold it, it is more optimal to transpose first and then do the

 /// broadcast. However, if transpose is done first, the permutation map needs

 /// to be expressed in terms of reduced dimension as broadcast hasn't happened

 /// yet. Also, the broadcast dimensions in a linalg.generic come from other

 /// operands (those not broadcasted along that particular dimension). We work

 /// this out by computing the convex-polyhedron shape of the linalg.generic

 /// iteration space from shapes of all the operands, both inputs and outputs.

 ///

 struct DecomposeProjectedPermutation : public OpRewritePattern<GenericOp> {

   using OpRewritePattern<GenericOp>::OpRewritePattern;


   LogicalResult matchAndRewrite(GenericOp genericOp,

                                 PatternRewriter &rewriter) const override;

 };


 /// For the given `map`, determine what dimensions are transposed and what

 /// dimensions are broadcasted.

 /// Returns :

 ///   transpose-permutation, broadcast-dimensions` (empty if not needed)

 ///

 std::pair<SmallVector<int64_t>, SmallVector<int64_t>>

 computeTransposeBroadcast(AffineMap &map) {

   assert(map.isProjectedPermutation(false) && "not a projection");


   // As the map is a projection it likely operates on a smaller set of

   // dimensions as far as the transpose is concerned (rest are broadcast).

   int64_t minorSize = map.getNumResults();


   SmallVector<int64_t> minorResult;

   for (int64_t i = 0; i < minorSize; ++i) {

     auto expr = cast<AffineDimExpr>(map.getResults()[i]);

     minorResult.push_back(expr.getPosition());

   }


   // If dims are not monotonically increasing then transpose is present.

   SmallVector<int64_t> sortedResMap(minorResult);

   llvm::sort(sortedResMap);

   bool hasTranspose = !std::equal(minorResult.begin(), minorResult.end(),

                                   sortedResMap.begin(), sortedResMap.end());


   // Walk the sorted map result to determine which dimensions are broadcasted.

   SmallVector<int64_t> broadcast;

   for (int64_t i = 0, j = 0; i < map.getNumInputs(); ++i) {

     if (j < minorSize && sortedResMap[j] == i) {

       j++;

       continue;

     }

     broadcast.push_back(i);

   }


   SmallVector<int64_t> permutation;

   if (hasTranspose) {

     // Consider an operand `x : tensor<7x8x9>` of a genericOp that has

     // affine map `affine_map<(d0, d1, d2, d3, d4) -> (d2, d3, d1)>`

     // `x`s access is both transposed and broadcast. But when specifying

     // the `linalg.transpose(x : tensor<7x8x9>)` the dimensions need to be

     // specified as `affine_map<(d0,d1,d2) -> (d1, d2, d0)` instead of

     // refering to d3, d4. Therefore, re-base the transpose dimensions so

     // that they start from d0.

     permutation.resize(minorSize);

     std::map<int64_t, int64_t> minorMap;

     for (int64_t i = 0; i < minorSize; ++i)

       minorMap.insert({sortedResMap[i], i});


     // Re-map the dimensions.

     SmallVector<int64_t> remappedResult(minorSize);

     for (int64_t i = 0; i < minorSize; ++i)

       remappedResult[i] = minorMap[minorResult[i]];


     /// Calculate the permutation for the transpose.

     for (unsigned i = 0; i < minorSize; ++i) {

       permutation[remappedResult[i]] = i;

     }

   }

   return {permutation, broadcast};

 }


 LogicalResult DecomposeProjectedPermutation::matchAndRewrite(

     GenericOp op, PatternRewriter &rewriter) const {

   if (!op.hasPureTensorSemantics() || op.isSingleInputOutput() ||

       op.isSingleYieldOp() || !op.isAllParallelLoops())

     return failure();


   // If the map of an operand is not a `projected permutation` then

   // it cannot be decomposed to mere transpose and broadcast.

   // The requirement that all maps be `projected permutation` may be

   // over-restrictive but since we need to determine shape of the

   // iteration space as well, reject if any map violates assumption.

   for (auto &opOperand : op->getOpOperands()) {

     auto map = op.getMatchingIndexingMap(&opOperand);

     if (!map.isProjectedPermutation(false))

       return failure();

   }


   // Decomposing linalg.generic involves creating `tensor.empty`

   // which can have dynamic shapes but then we would have to work

   // out which operand can supply that runtime-value (tensor.dim).

   // Leaving it as a future TODO.

   if (llvm::any_of(op->getOpOperands(), [](OpOperand &oper) {

         auto opType = cast<RankedTensorType>(oper.get().getType());

         return ShapedType::isDynamicShape(opType.getShape());

       }))

     return failure();


   auto outputShape = op.getStaticLoopRanges();


   auto loc = op.getLoc();

   bool isChanged = false;

   SmallVector<Value> newInitValues = op.getDpsInputs();

   SmallVector<AffineMap> newMap = op.getIndexingMapsArray();


   // Walk over each input operand and unfold if it is transposed, broadcast

   // or mix of two via operand's affine-map.

   for (int64_t i = 0; i < op.getNumDpsInputs(); ++i) {

     auto &map = newMap[i];

     auto inputRTType = cast<RankedTensorType>(newInitValues[i].getType());

     auto elType = inputRTType.getElementType();


     /// Nothing to do if map is already an identity.

     if (map.isIdentity())

       continue;


     auto [permutation, broadcastedDims] = computeTransposeBroadcast(map);


     // Does it need transpose?

     if (!permutation.empty()) {

       /// linalg.transpose permutes the dimensions of input using

       /// rule: dim(result, i) = dim(input, permutation[i])

       SmallVector<int64_t> transposedShape(map.getNumResults());

       for (int64_t i = 0; i < map.getNumResults(); ++i)

         transposedShape[i] = inputRTType.getShape()[permutation[i]];


       Value emptyTensor =

           tensor::EmptyOp::create(rewriter, loc, transposedShape, elType);


       auto transposeOp = TransposeOp::create(rewriter, loc, newInitValues[i],

                                              emptyTensor, permutation);

       newInitValues[i] = transposeOp->getResult(0);

       isChanged = true;

     }


     // Does it require broadcast?

     if (!broadcastedDims.empty()) {

       assert(broadcastedDims.size() && "should have non size broadcast");

       Value emptyTensor = tensor::EmptyOp::create(rewriter, loc, outputShape,

                                                   inputRTType.getElementType());


       auto broadcastOp = linalg::BroadcastOp::create(

           rewriter, loc, newInitValues[i], emptyTensor, broadcastedDims);


       newInitValues[i] = broadcastOp->getResult(0);

       isChanged = true;

     }

     newMap[i] = rewriter.getMultiDimIdentityMap(map.getNumDims());

   }


   if (!isChanged)

     return failure();


   SmallVector<Value> operands = op->getOperands();

   ValueRange operandsRef(operands);


   auto newOp = linalg::GenericOp::create(

       rewriter,

       /*location=*/op.getLoc(),

       /*resultTensorTypes=*/op->getResultTypes(),

       /*inputs=*/newInitValues,

       /*outputs=*/operandsRef.drop_front(op.getNumDpsInputs()),

       /*indexingMaps=*/newMap,

       /*iteratorTypes=*/op.getIteratorTypesArray());

   newOp.getRegion().takeBody(op->getRegion(0));

   rewriter.replaceOp(op, newOp->getResults());

   return success();

 }


 } // namespace


 void mlir::linalg::populateDecomposeProjectedPermutationPatterns(

     RewritePatternSet &patterns) {

   patterns.insert<DecomposeProjectedPermutation>(patterns.getContext());

 }

broadcast
static Value broadcast(Location loc, Value toBroadcast, unsigned numElements, const TypeConverter &typeConverter, ConversionPatternRewriter &rewriter)
Broadcasts the value to vector with numElements number of elements.
Definition: SPIRVToLLVM.cpp:148

llvm::SmallVector
Definition: LLVM.h:72

mlir::AffineMap
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46

mlir::AffineMap::isProjectedPermutation
bool isProjectedPermutation(bool allowZeroInResults=false) const
Returns true if the AffineMap represents a subset (i.e.
Definition: AffineMap.cpp:611

mlir::AffineMap::getNumDims
unsigned getNumDims() const
Definition: AffineMap.cpp:390

mlir::AffineMap::getResults
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:403

mlir::AffineMap::getNumResults
unsigned getNumResults() const
Definition: AffineMap.cpp:398

mlir::AffineMap::getNumInputs
unsigned getNumInputs() const
Definition: AffineMap.cpp:399

mlir::AffineMap::isIdentity
bool isIdentity() const
Returns true if this affine map is an identity affine map.
Definition: AffineMap.cpp:341

mlir::Builder::getMultiDimIdentityMap
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:382

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:257

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:769

mlir::RewritePatternSet
Definition: PatternMatch.h:792

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition: PatternMatch.cpp:127

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

Linalg.h

Transforms.h

mlir::linalg
Definition: LinalgToStandard.h:24

mlir::linalg::populateDecomposeProjectedPermutationPatterns
void populateDecomposeProjectedPermutationPatterns(RewritePatternSet &patterns)
Add patterns to make explicit broadcasts and transforms in the input operands of a genericOp.
Definition: DecomposeGenericByUnfoldingPermutation.cpp:245

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::getType
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:304

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314

j
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.