doxygen/VectorUnroll_8cpp_source.html

 //===- VectorUnrollDistribute.cpp - patterns to do vector unrolling -------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements patterns to do vector unrolling and vector distribution.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Utils/IndexingUtils.h"

 #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"

 #include "mlir/Interfaces/VectorInterfaces.h"

 #include "llvm/ADT/MapVector.h"

 #include "llvm/ADT/STLExtras.h"

 #include "llvm/Support/Debug.h"

 #include "llvm/Support/InterleavedRange.h"

 #include <optional>


 #define DEBUG_TYPE "vector-unroll"

 #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")

 #define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")


 using namespace mlir;

 using namespace mlir::vector;


 /// Compute the indices of the slice `index` for a transfer op.

 static SmallVector<Value> sliceTransferIndices(ArrayRef<int64_t> elementOffsets,

                                                ArrayRef<Value> indices,

                                                AffineMap permutationMap,

                                                Location loc,

                                                OpBuilder &builder) {

   MLIRContext *ctx = builder.getContext();

   auto isBroadcast = [](AffineExpr expr) {

     if (auto constExpr = dyn_cast<AffineConstantExpr>(expr))

       return constExpr.getValue() == 0;

     return false;

   };

   // Compute 'sliceIndices' by adding 'sliceOffsets[i]' to 'indices[i]'.

   SmallVector<Value> slicedIndices(indices);

   for (const auto &dim : llvm::enumerate(permutationMap.getResults())) {

     if (isBroadcast(dim.value()))

       continue;

     unsigned pos = cast<AffineDimExpr>(dim.value()).getPosition();

     auto expr = getAffineDimExpr(0, builder.getContext()) +

                 getAffineConstantExpr(elementOffsets[dim.index()], ctx);

     auto map = AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, expr);

     slicedIndices[pos] =

         builder.create<affine::AffineApplyOp>(loc, map, indices[pos]);

   }

   return slicedIndices;

 }


 // Clones `op` into a new operations that takes `operands` and returns

 // `resultTypes`.

 static Operation *cloneOpWithOperandsAndTypes(OpBuilder &builder, Location loc,

                                               Operation *op,

                                               ArrayRef<Value> operands,

                                               ArrayRef<Type> resultTypes) {

   return builder.create(loc, op->getName().getIdentifier(), operands,

                         resultTypes, op->getAttrs());

 }


 /// Return the target shape for unrolling for the given `op`. Return

 /// std::nullopt if the op shouldn't be or cannot be unrolled.

 static std::optional<SmallVector<int64_t>>

 getTargetShape(const vector::UnrollVectorOptions &options, Operation *op) {

   LDBG("");

   LDBG("Get unroll shape for op " << op->getName().getStringRef());

   if (options.filterConstraint && failed(options.filterConstraint(op))) {

     LDBG("--no filter constraint -> BAIL");

     return std::nullopt;

   }

   assert(options.nativeShape &&

          "vector unrolling expects the native shape or native"

          "shape call back function to be set");

   auto unrollableVectorOp = dyn_cast<VectorUnrollOpInterface>(op);

   if (!unrollableVectorOp) {

     LDBG("--not an unrollable op -> BAIL");

     return std::nullopt;

   }

   auto maybeUnrollShape = unrollableVectorOp.getShapeForUnroll();

   if (!maybeUnrollShape) {

     LDBG("--could not get shape of op " << *op << " -> BAIL");

     return std::nullopt;

   }

   LDBG("--vector op shape: " << llvm::interleaved(*maybeUnrollShape));


   std::optional<SmallVector<int64_t>> targetShape = options.nativeShape(op);

   if (!targetShape) {

     LDBG("--no unrolling target shape defined " << *op << "-> SKIP");

     return std::nullopt;

   }

   LDBG("--target shape: " << llvm::interleaved(*targetShape));


   auto maybeShapeRatio = computeShapeRatio(*maybeUnrollShape, *targetShape);

   if (!maybeShapeRatio) {

     LDBG("--could not compute integral shape ratio -> BAIL");

     return std::nullopt;

   }

   if (llvm::all_of(*maybeShapeRatio, [](int64_t v) { return v == 1; })) {

     LDBG("--no unrolling needed -> SKIP");

     return std::nullopt;

   }

   LDBG("--found an integral shape ratio to unroll to -> SUCCESS");

   return targetShape;

 }


 static SmallVector<int64_t>

 getUnrollOrder(unsigned numLoops, Operation *op,

                const vector::UnrollVectorOptions &options) {

   SmallVector<int64_t> loopOrder =

       llvm::to_vector(llvm::seq<int64_t>(0, static_cast<int64_t>(numLoops)));

   if (options.traversalOrderCallback != nullptr) {

     std::optional<SmallVector<int64_t>> order =

         options.traversalOrderCallback(op);

     if (order) {

       loopOrder = std::move(*order);

     }

   }

   return loopOrder;

 }


 namespace {


 struct UnrollTransferReadPattern

     : public OpRewritePattern<vector::TransferReadOp> {

   UnrollTransferReadPattern(MLIRContext *context,

                             const vector::UnrollVectorOptions &options,

                             PatternBenefit benefit = 1)

       : OpRewritePattern<vector::TransferReadOp>(context, benefit),

         options(options) {}


   LogicalResult matchAndRewrite(vector::TransferReadOp readOp,

                                 PatternRewriter &rewriter) const override {

     // TODO: support 0-d corner case.

     if (readOp.getTransferRank() == 0)

       return failure();

     if (readOp.getMask())

       return failure();

     auto targetShape = getTargetShape(options, readOp);

     if (!targetShape)

       return failure();

     auto sourceVectorType = readOp.getVectorType();

     SmallVector<int64_t> strides(targetShape->size(), 1);

     Location loc = readOp.getLoc();

     ArrayRef<int64_t> originalSize = readOp.getVectorType().getShape();


     // Prepare the result vector;

     Value result = rewriter.create<arith::ConstantOp>(

         loc, sourceVectorType, rewriter.getZeroAttr(sourceVectorType));

     auto targetType =

         VectorType::get(*targetShape, sourceVectorType.getElementType());

     SmallVector<Value> originalIndices(readOp.getIndices().begin(),

                                        readOp.getIndices().end());

     SmallVector<int64_t> loopOrder =

         getUnrollOrder(originalSize.size(), readOp, options);

     for (SmallVector<int64_t> elementOffsets :

          StaticTileOffsetRange(originalSize, *targetShape, loopOrder)) {

       SmallVector<Value> indices =

           sliceTransferIndices(elementOffsets, originalIndices,

                                readOp.getPermutationMap(), loc, rewriter);

       auto slicedRead = rewriter.create<vector::TransferReadOp>(

           loc, targetType, readOp.getBase(), indices,

           readOp.getPermutationMapAttr(), readOp.getPadding(), readOp.getMask(),

           readOp.getInBoundsAttr());


       result = rewriter.createOrFold<vector::InsertStridedSliceOp>(

           loc, slicedRead, result, elementOffsets, strides);

     }

     rewriter.replaceOp(readOp, result);

     return success();

   }


 private:

   vector::UnrollVectorOptions options;

 };


 struct UnrollTransferWritePattern

     : public OpRewritePattern<vector::TransferWriteOp> {

   UnrollTransferWritePattern(MLIRContext *context,

                              const vector::UnrollVectorOptions &options,

                              PatternBenefit benefit = 1)

       : OpRewritePattern<vector::TransferWriteOp>(context, benefit),

         options(options) {}


   LogicalResult matchAndRewrite(vector::TransferWriteOp writeOp,

                                 PatternRewriter &rewriter) const override {

     // TODO: support 0-d corner case.

     if (writeOp.getTransferRank() == 0)

       return failure();


     if (writeOp.getMask())

       return failure();

     auto targetShape = getTargetShape(options, writeOp);

     if (!targetShape)

       return failure();

     auto sourceVectorType = writeOp.getVectorType();

     SmallVector<int64_t> strides(targetShape->size(), 1);

     Location loc = writeOp.getLoc();

     ArrayRef<int64_t> originalSize = sourceVectorType.getShape();

     SmallVector<Value> originalIndices(writeOp.getIndices().begin(),

                                        writeOp.getIndices().end());

     SmallVector<int64_t> loopOrder =

         getUnrollOrder(originalSize.size(), writeOp, options);

     Value resultTensor;

     for (SmallVector<int64_t> elementOffsets :

          StaticTileOffsetRange(originalSize, *targetShape, loopOrder)) {

       Value slicedVector = rewriter.createOrFold<vector::ExtractStridedSliceOp>(

           loc, writeOp.getVector(), elementOffsets, *targetShape, strides);

       SmallVector<Value> indices =

           sliceTransferIndices(elementOffsets, originalIndices,

                                writeOp.getPermutationMap(), loc, rewriter);

       Operation *slicedWrite = rewriter.create<vector::TransferWriteOp>(

           loc, slicedVector, resultTensor ? resultTensor : writeOp.getBase(),

           indices, writeOp.getPermutationMapAttr(), writeOp.getInBoundsAttr());

       // For the tensor case update the destination for the next transfer write.

       if (!slicedWrite->getResults().empty())

         resultTensor = slicedWrite->getResult(0);

     }

     if (resultTensor)

       rewriter.replaceOp(writeOp, resultTensor);

     else

       rewriter.eraseOp(writeOp);

     return success();

   }


 private:

   vector::UnrollVectorOptions options;

 };


 struct OffsetMapInfo {

   static SmallVector<int64_t> getEmptyKey() { return {int64_t(-1)}; }


   static SmallVector<int64_t> getTombstoneKey() { return {int64_t(-2)}; }


   static unsigned getHashValue(const SmallVector<int64_t> &v) {

     return static_cast<unsigned>(llvm::hash_combine_range(v));

   }


   static bool isEqual(const SmallVector<int64_t> &lhs,

                       const SmallVector<int64_t> &rhs) {

     return lhs == rhs;

   }

 };


 struct UnrollContractionPattern

     : public OpRewritePattern<vector::ContractionOp> {

   UnrollContractionPattern(MLIRContext *context,

                            const vector::UnrollVectorOptions &options,

                            PatternBenefit benefit = 1)

       : OpRewritePattern<vector::ContractionOp>(context, benefit),

         options(options) {}


   LogicalResult matchAndRewrite(vector::ContractionOp contractOp,

                                 PatternRewriter &rewriter) const override {

     auto targetShape = getTargetShape(options, contractOp);

     if (!targetShape)

       return failure();

     auto dstVecType = cast<VectorType>(contractOp.getResultType());

     SmallVector<int64_t> originalSize = *contractOp.getShapeForUnroll();


     Location loc = contractOp.getLoc();

     unsigned accIndex = vector::ContractionOp::getAccOperandIndex();

     AffineMap dstAffineMap = contractOp.getIndexingMapsArray()[accIndex];

     llvm::MapVector<

         SmallVector<int64_t>, Value,

         llvm::DenseMap<SmallVector<int64_t>, unsigned, OffsetMapInfo>>

         accCache;


     SmallVector<int64_t> loopOrder = getUnrollOrder(

         contractOp.getIteratorTypes().size(), contractOp, options);


     for (SmallVector<int64_t> offsets :

          StaticTileOffsetRange(originalSize, *targetShape, loopOrder)) {

       SmallVector<Value> slicesOperands(contractOp.getNumOperands());


       // Helper to compute the new shape of each operand and extract the slice.

       auto extractOperand = [&](unsigned index, Value operand,

                                 AffineMap permutationMap,

                                 ArrayRef<int64_t> operandOffets) {

         SmallVector<int64_t> operandShape = applyPermutationMap(

             permutationMap, ArrayRef<int64_t>(*targetShape));

         SmallVector<int64_t> operandStrides(operandOffets.size(), 1);

         slicesOperands[index] =

             rewriter.createOrFold<vector::ExtractStridedSliceOp>(

                 loc, operand, operandOffets, operandShape, operandStrides);

       };


       // Extract the new lhs operand.

       AffineMap lhsPermutationMap = contractOp.getIndexingMapsArray()[0];

       SmallVector<int64_t> lhsOffets =

           applyPermutationMap(lhsPermutationMap, ArrayRef<int64_t>(offsets));

       extractOperand(0, contractOp.getLhs(), lhsPermutationMap, lhsOffets);


       // Extract the new rhs operand.

       AffineMap rhsPermutationMap = contractOp.getIndexingMapsArray()[1];

       SmallVector<int64_t> rhsOffets =

           applyPermutationMap(rhsPermutationMap, ArrayRef<int64_t>(offsets));

       extractOperand(1, contractOp.getRhs(), rhsPermutationMap, rhsOffets);


       AffineMap accPermutationMap = contractOp.getIndexingMapsArray()[2];

       SmallVector<int64_t> accOffets =

           applyPermutationMap(accPermutationMap, ArrayRef<int64_t>(offsets));

       // If a version of the accumulator has already been computed, use it

       // otherwise extract the first version from the original operand.

       auto *accIt = accCache.find(accOffets);

       if (accIt != accCache.end())

         slicesOperands[2] = accIt->second;

       else

         extractOperand(2, contractOp.getAcc(), accPermutationMap, accOffets);


       SmallVector<int64_t> dstShape =

           applyPermutationMap(dstAffineMap, ArrayRef<int64_t>(*targetShape));

       auto targetType = VectorType::get(dstShape, dstVecType.getElementType());

       Operation *newOp = cloneOpWithOperandsAndTypes(

           rewriter, loc, contractOp, slicesOperands, targetType);


       SmallVector<int64_t> dstOffets =

           applyPermutationMap(dstAffineMap, ArrayRef<int64_t>(offsets));

       // Save the accumulated value untill all the loops are unrolled since

       // reduction loop keep updating the accumulator.

       accCache[dstOffets] = newOp->getResult(0);

     }

     // Assemble back the accumulator into a single vector.

     Value result = rewriter.create<arith::ConstantOp>(

         loc, dstVecType, rewriter.getZeroAttr(dstVecType));

     for (const auto &it : accCache) {

       SmallVector<int64_t> dstStrides(it.first.size(), 1);

       result = rewriter.createOrFold<vector::InsertStridedSliceOp>(

           loc, it.second, result, it.first, dstStrides);

     }

     rewriter.replaceOp(contractOp, result);

     return success();

   }


 private:

   vector::UnrollVectorOptions options;

 };


 struct UnrollMultiReductionPattern

     : public OpRewritePattern<vector::MultiDimReductionOp> {

   UnrollMultiReductionPattern(MLIRContext *context,

                               const vector::UnrollVectorOptions &options,

                               PatternBenefit benefit = 1)

       : OpRewritePattern<vector::MultiDimReductionOp>(context, benefit),

         options(options) {}


   LogicalResult matchAndRewrite(vector::MultiDimReductionOp reductionOp,

                                 PatternRewriter &rewriter) const override {

     auto resultType = reductionOp->getResult(0).getType();

     if (resultType.isIntOrFloat()) {

       return rewriter.notifyMatchFailure(reductionOp,

                                          "Unrolling scalars is not supported");

     }

     std::optional<SmallVector<int64_t>> targetShape =

         getTargetShape(options, reductionOp);

     if (!targetShape)

       return failure();

     SmallVector<int64_t> originalSize = *reductionOp.getShapeForUnroll();

     llvm::MapVector<

         SmallVector<int64_t>, Value,

         llvm::DenseMap<SmallVector<int64_t>, unsigned, OffsetMapInfo>>

         accCache;

     Location loc = reductionOp.getLoc();


     // Stride of the ratios, this gives us the offsets of sliceCount in a basis

     // of multiples of the targetShape.

     for (SmallVector<int64_t> offsets :

          StaticTileOffsetRange(originalSize, *targetShape)) {

       SmallVector<Value> operands;

       SmallVector<int64_t> operandStrides(offsets.size(), 1);

       Value slicedOperand =

           rewriter.createOrFold<vector::ExtractStridedSliceOp>(

               loc, reductionOp.getSource(), offsets, *targetShape,

               operandStrides);

       operands.push_back(slicedOperand);

       SmallVector<int64_t> dstShape;

       SmallVector<int64_t> destOffset;

       for (size_t i : llvm::seq(size_t(0), targetShape->size())) {

         if (!reductionOp.isReducedDim(i)) {

           destOffset.push_back(offsets[i]);

           dstShape.push_back((*targetShape)[i]);

         }

       }

       Value acc;

       SmallVector<int64_t> accStrides(destOffset.size(), 1);

       // If a version of the accumulator has already been computed, use it

       // otherwise extract the first version from the original operand.

       auto *accIt = accCache.find(destOffset);

       if (accIt != accCache.end())

         acc = accIt->second;

       else

         acc = rewriter.createOrFold<vector::ExtractStridedSliceOp>(

             loc, reductionOp.getAcc(), destOffset, dstShape, accStrides);

       operands.push_back(acc);

       auto targetType = VectorType::get(

           dstShape, reductionOp.getSourceVectorType().getElementType());

       Operation *newOp = cloneOpWithOperandsAndTypes(rewriter, loc, reductionOp,

                                                      operands, targetType);

       Value result = newOp->getResult(0);

       accCache[destOffset] = result;

     }

     // Assemble back the accumulator into a single vector.

     Value result = rewriter.create<arith::ConstantOp>(

         loc, reductionOp.getDestType(),

         rewriter.getZeroAttr(reductionOp.getDestType()));

     for (const auto &it : accCache) {

       SmallVector<int64_t> dstStrides(it.first.size(), 1);

       result = rewriter.createOrFold<vector::InsertStridedSliceOp>(

           loc, it.second, result, it.first, dstStrides);

     }

     rewriter.replaceOp(reductionOp, result);

     return success();

   }


 private:

   vector::UnrollVectorOptions options;

 };


 struct UnrollElementwisePattern : public RewritePattern {

   UnrollElementwisePattern(MLIRContext *context,

                            const vector::UnrollVectorOptions &options,

                            PatternBenefit benefit = 1)

       : RewritePattern(MatchAnyOpTypeTag(), benefit, context),

         options(options) {}


   LogicalResult matchAndRewrite(Operation *op,

                                 PatternRewriter &rewriter) const override {

     if (!OpTrait::hasElementwiseMappableTraits(op) || op->getNumResults() != 1)

       return failure();

     auto targetShape = getTargetShape(options, op);

     if (!targetShape)

       return failure();

     auto dstVecType = cast<VectorType>(op->getResult(0).getType());

     SmallVector<int64_t> originalSize =

         *cast<VectorUnrollOpInterface>(op).getShapeForUnroll();

     // Bail-out if rank(source) != rank(target). The main limitation here is the

     // fact that `ExtractStridedSlice` requires the rank for the input and

     // output to match. If needed, we can relax this later.

     if (originalSize.size() != targetShape->size())

       return rewriter.notifyMatchFailure(

           op, "expected input vector rank to match target shape rank");

     Location loc = op->getLoc();

     // Prepare the result vector.

     Value result = rewriter.create<arith::ConstantOp>(

         loc, dstVecType, rewriter.getZeroAttr(dstVecType));

     SmallVector<int64_t> strides(targetShape->size(), 1);

     VectorType newVecType =

         VectorType::get(*targetShape, dstVecType.getElementType());


     // Create the unrolled computation.

     for (SmallVector<int64_t> offsets :

          StaticTileOffsetRange(originalSize, *targetShape)) {

       SmallVector<Value> extractOperands;

       for (OpOperand &operand : op->getOpOperands()) {

         auto vecType = dyn_cast<VectorType>(operand.get().getType());

         if (!vecType) {

           extractOperands.push_back(operand.get());

           continue;

         }

         extractOperands.push_back(

             rewriter.createOrFold<vector::ExtractStridedSliceOp>(

                 loc, operand.get(), offsets, *targetShape, strides));

       }

       Operation *newOp = cloneOpWithOperandsAndTypes(

           rewriter, loc, op, extractOperands, newVecType);

       result = rewriter.createOrFold<vector::InsertStridedSliceOp>(

           loc, newOp->getResult(0), result, offsets, strides);

     }

     rewriter.replaceOp(op, result);

     return success();

   }


 private:

   vector::UnrollVectorOptions options;

 };


 struct UnrollReductionPattern : public OpRewritePattern<vector::ReductionOp> {

   UnrollReductionPattern(MLIRContext *context,

                          const vector::UnrollVectorOptions &options,

                          PatternBenefit benefit = 1)

       : OpRewritePattern<vector::ReductionOp>(context, benefit),

         options(options) {}


   LogicalResult matchAndRewrite(vector::ReductionOp reductionOp,

                                 PatternRewriter &rewriter) const override {

     std::optional<SmallVector<int64_t>> targetShape =

         getTargetShape(options, reductionOp);

     if (!targetShape)

       return failure();

     SmallVector<int64_t> originalSize = *reductionOp.getShapeForUnroll();


     // Create unrolled vector reduction.

     Location loc = reductionOp.getLoc();

     Value accumulator = nullptr;

     for (SmallVector<int64_t> offsets :

          StaticTileOffsetRange(originalSize, *targetShape)) {

       SmallVector<int64_t> strides(offsets.size(), 1);

       Value slicedOperand =

           rewriter.createOrFold<vector::ExtractStridedSliceOp>(

               loc, reductionOp.getVector(), offsets, *targetShape, strides);

       Operation *newOp = cloneOpWithOperandsAndTypes(

           rewriter, loc, reductionOp, slicedOperand, reductionOp.getType());

       Value result = newOp->getResult(0);


       if (!accumulator) {

         // This is the first reduction.

         accumulator = result;

       } else {

         // On subsequent reduction, combine with the accumulator.

         accumulator = makeArithReduction(rewriter, loc, reductionOp.getKind(),

                                          accumulator, result);

       }

     }


     rewriter.replaceOp(reductionOp, accumulator);

     return success();

   }


 private:

   const vector::UnrollVectorOptions options;

 };


 struct UnrollTransposePattern : public OpRewritePattern<vector::TransposeOp> {

   UnrollTransposePattern(MLIRContext *context,

                          const vector::UnrollVectorOptions &options,

                          PatternBenefit benefit = 1)

       : OpRewritePattern<vector::TransposeOp>(context, benefit),

         options(options) {}


   LogicalResult matchAndRewrite(vector::TransposeOp transposeOp,

                                 PatternRewriter &rewriter) const override {

     if (transposeOp.getResultVectorType().getRank() == 0)

       return failure();

     auto targetShape = getTargetShape(options, transposeOp);

     if (!targetShape)

       return failure();

     auto originalVectorType = transposeOp.getResultVectorType();

     SmallVector<int64_t> strides(targetShape->size(), 1);

     Location loc = transposeOp.getLoc();

     ArrayRef<int64_t> originalSize = originalVectorType.getShape();


     // Prepare the result vector;

     Value result = rewriter.create<arith::ConstantOp>(

         loc, originalVectorType, rewriter.getZeroAttr(originalVectorType));

     ArrayRef<int64_t> permutation = transposeOp.getPermutation();


     // Unroll the computation.

     for (SmallVector<int64_t> elementOffsets :

          StaticTileOffsetRange(originalSize, *targetShape)) {

       SmallVector<int64_t> permutedOffsets(elementOffsets.size());

       SmallVector<int64_t> permutedShape(elementOffsets.size());

       // Compute the source offsets and shape.

       for (auto indices : llvm::enumerate(permutation)) {

         permutedOffsets[indices.value()] = elementOffsets[indices.index()];

         permutedShape[indices.value()] = (*targetShape)[indices.index()];

       }

       Value slicedOperand =

           rewriter.createOrFold<vector::ExtractStridedSliceOp>(

               loc, transposeOp.getVector(), permutedOffsets, permutedShape,

               strides);

       Value transposedSlice = rewriter.createOrFold<vector::TransposeOp>(

           loc, slicedOperand, permutation);

       result = rewriter.createOrFold<vector::InsertStridedSliceOp>(

           loc, transposedSlice, result, elementOffsets, strides);

     }

     rewriter.replaceOp(transposeOp, result);

     return success();

   }


 private:

   vector::UnrollVectorOptions options;

 };


 struct UnrollGatherPattern : public OpRewritePattern<vector::GatherOp> {

   UnrollGatherPattern(MLIRContext *context,

                       const vector::UnrollVectorOptions &options,

                       PatternBenefit benefit = 1)

       : OpRewritePattern<vector::GatherOp>(context, benefit), options(options) {

   }


   LogicalResult matchAndRewrite(vector::GatherOp gatherOp,

                                 PatternRewriter &rewriter) const override {

     VectorType sourceVectorType = gatherOp.getVectorType();

     if (sourceVectorType.getRank() == 0)

       return failure();

     auto targetShape = getTargetShape(options, gatherOp);

     if (!targetShape)

       return failure();

     SmallVector<int64_t> strides(targetShape->size(), 1);

     Location loc = gatherOp.getLoc();

     ArrayRef<int64_t> originalSize = gatherOp.getVectorType().getShape();


     // Prepare the result vector;

     Value result = rewriter.create<arith::ConstantOp>(

         loc, sourceVectorType, rewriter.getZeroAttr(sourceVectorType));

     auto targetType =

         VectorType::get(*targetShape, sourceVectorType.getElementType());


     SmallVector<int64_t> loopOrder =

         getUnrollOrder(originalSize.size(), gatherOp, options);

     for (SmallVector<int64_t> elementOffsets :

          StaticTileOffsetRange(originalSize, *targetShape, loopOrder)) {

       // To get the unrolled gather, extract the same slice based on the

       // decomposed shape from each of the index, mask, and pass-through

       // vectors.

       Value indexSubVec = rewriter.createOrFold<vector::ExtractStridedSliceOp>(

           loc, gatherOp.getIndexVec(), elementOffsets, *targetShape, strides);

       Value maskSubVec = rewriter.createOrFold<vector::ExtractStridedSliceOp>(

           loc, gatherOp.getMask(), elementOffsets, *targetShape, strides);

       Value passThruSubVec =

           rewriter.createOrFold<vector::ExtractStridedSliceOp>(

               loc, gatherOp.getPassThru(), elementOffsets, *targetShape,

               strides);

       auto slicedGather = rewriter.create<vector::GatherOp>(

           loc, targetType, gatherOp.getBase(), gatherOp.getIndices(),

           indexSubVec, maskSubVec, passThruSubVec);


       result = rewriter.createOrFold<vector::InsertStridedSliceOp>(

           loc, slicedGather, result, elementOffsets, strides);

     }

     rewriter.replaceOp(gatherOp, result);

     return success();

   }


 private:

   vector::UnrollVectorOptions options;

 };


 struct UnrollBroadcastPattern : public OpRewritePattern<vector::BroadcastOp> {

   UnrollBroadcastPattern(MLIRContext *context,

                          const vector::UnrollVectorOptions &options,

                          PatternBenefit benefit = 1)

       : OpRewritePattern<vector::BroadcastOp>(context, benefit),

         options(options) {}


   LogicalResult matchAndRewrite(vector::BroadcastOp broadcastOp,

                                 PatternRewriter &rewriter) const override {

     auto targetShape = getTargetShape(options, broadcastOp);

     if (!targetShape)

       return failure();


     Location loc = broadcastOp.getLoc();

     VectorType srcType = dyn_cast<VectorType>(broadcastOp.getSourceType());

     VectorType resType = broadcastOp.getResultVectorType();

     VectorType targetType =

         resType.cloneWith(*targetShape, resType.getElementType());

     Value result = rewriter.create<arith::ConstantOp>(

         loc, resType, rewriter.getZeroAttr(resType));


     SmallVector<int64_t> originalShape = *broadcastOp.getShapeForUnroll();

     SmallVector<int64_t> strides(originalShape.size(), 1);


     for (SmallVector<int64_t> offsets :

          StaticTileOffsetRange(originalShape, *targetShape)) {

       Value newSrc;

       if (!srcType) {

         // Scalar to vector broadcast.

         newSrc = broadcastOp.getSource();

       } else {

         // Vector to vector broadcast.

         int64_t rank = srcType.getRank();

         SmallVector<int64_t> srcOffsets(offsets.end() - rank, offsets.end());

         SmallVector<int64_t> srcShape(targetShape->end() - rank,

                                       targetShape->end());

         SmallVector<int64_t> srcStrides(strides.end() - rank, strides.end());

         // adjust the offset and shape for src if the corresponding dim is 1.

         for (int64_t i = 0; i < rank; ++i) {

           if (srcType.getDimSize(i) == 1) {

             srcOffsets[i] = 0;

             srcShape[i] = 1;

           }

         }

         newSrc = rewriter.createOrFold<vector::ExtractStridedSliceOp>(

             loc, broadcastOp.getSource(), srcOffsets, srcShape, srcStrides);

       }


       Operation *newOp = cloneOpWithOperandsAndTypes(rewriter, loc, broadcastOp,

                                                      newSrc, targetType);


       result = rewriter.createOrFold<vector::InsertStridedSliceOp>(

           loc, newOp->getResult(0), result, offsets, strides);

     }


     rewriter.replaceOp(broadcastOp, result);

     return success();

   }


 private:

   vector::UnrollVectorOptions options;

 };


 } // namespace


 void mlir::vector::populateVectorUnrollPatterns(

     RewritePatternSet &patterns, const UnrollVectorOptions &options,

     PatternBenefit benefit) {

   patterns

       .add<UnrollTransferReadPattern, UnrollTransferWritePattern,

            UnrollContractionPattern, UnrollElementwisePattern,

            UnrollReductionPattern, UnrollMultiReductionPattern,

            UnrollTransposePattern, UnrollGatherPattern, UnrollBroadcastPattern>(

           patterns.getContext(), options, benefit);

 }

AffineOps.h

IndexingUtils.h

options
static llvm::ManagedStatic< PassManagerOptions > options
Definition: PassManagerOptions.cpp:89

VectorInterfaces.h

VectorTransforms.h

sliceTransferIndices
static SmallVector< Value > sliceTransferIndices(ArrayRef< int64_t > elementOffsets, ArrayRef< Value > indices, AffineMap permutationMap, Location loc, OpBuilder &builder)
Compute the indices of the slice index for a transfer op.
Definition: VectorUnroll.cpp:31

getTargetShape
static std::optional< SmallVector< int64_t > > getTargetShape(const vector::UnrollVectorOptions &options, Operation *op)
Return the target shape for unrolling for the given op.
Definition: VectorUnroll.cpp:70

cloneOpWithOperandsAndTypes
static Operation * cloneOpWithOperandsAndTypes(OpBuilder &builder, Location loc, Operation *op, ArrayRef< Value > operands, ArrayRef< Type > resultTypes)
Definition: VectorUnroll.cpp:59

getUnrollOrder
static SmallVector< int64_t > getUnrollOrder(unsigned numLoops, Operation *op, const vector::UnrollVectorOptions &options)
Definition: VectorUnroll.cpp:113

LDBG
#define LDBG(X)
Definition: VectorUnroll.cpp:25

llvm::ArrayRef
Definition: LLVM.h:48

llvm::DenseMap
Definition: LLVM.h:55

llvm::SmallVector
Definition: LLVM.h:72

mlir::AffineExpr
Base type for affine expression.
Definition: AffineExpr.h:68

mlir::AffineMap
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:46

mlir::AffineMap::get
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Definition: MLIRContext.cpp:1206

mlir::AffineMap::getResults
ArrayRef< AffineExpr > getResults() const
Definition: AffineMap.cpp:407

mlir::Builder::getZeroAttr
TypedAttr getZeroAttr(Type type)
Definition: Builders.cpp:322

mlir::Builder::getContext
MLIRContext * getContext() const
Definition: Builders.h:55

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpBuilder::createOrFold
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:518

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:455

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:257

mlir::OperationName::getStringRef
StringRef getStringRef() const
Return the name of this operation. This always succeeds.
Definition: OperationSupport.h:473

mlir::OperationName::getIdentifier
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Definition: OperationSupport.h:476

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getResult
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407

mlir::Operation::getLoc
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223

mlir::Operation::getAttrs
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition: Operation.h:512

mlir::Operation::getName
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119

mlir::Operation::getOpOperands
MutableArrayRef< OpOperand > getOpOperands()
Definition: Operation.h:383

mlir::Operation::getResults
result_range getResults()
Definition: Operation.h:415

mlir::Operation::getNumResults
unsigned getNumResults()
Return the number of results held by this operation.
Definition: Operation.h:404

mlir::PatternBenefit
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:34

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:749

mlir::RewritePatternSet
Definition: PatternMatch.h:772

mlir::RewritePattern
RewritePattern is the common base class for all DAG to DAG replacements.
Definition: PatternMatch.h:238

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:682

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition: PatternMatch.cpp:129

mlir::RewriterBase::eraseOp
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Definition: PatternMatch.cpp:157

mlir::StaticTileOffsetRange
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
Definition: IndexingUtils.h:376

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::OpTrait::hasElementwiseMappableTraits
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
Definition: Operation.cpp:1395

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir::vector
Definition: ConvertVectorToLLVM.h:28

mlir::vector::makeArithReduction
Value makeArithReduction(OpBuilder &b, Location loc, CombiningKind kind, Value v1, Value acc, arith::FastMathFlagsAttr fastmath=nullptr, Value mask=nullptr)
Returns the result value of reducing two scalar/vector values with the corresponding arith operation.

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::applyPermutationMap
SmallVector< T > applyPermutationMap(AffineMap map, llvm::ArrayRef< T > source)
Apply a permutation from map to source and return the result.
Definition: AffineMap.h:675

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::getAffineConstantExpr
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)
Definition: AffineExpr.cpp:645

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::computeShapeRatio
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
Definition: IndexingUtils.cpp:116

mlir::getAffineDimExpr
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:621

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314

mlir::vector::UnrollVectorOptions
Options that control the vector unrolling.
Definition: VectorRewritePatterns.h:36