doxygen/LowerVectorGather_8cpp_source.html

 //===- LowerVectorGather.cpp - Lower 'vector.gather' operation ------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements target-independent rewrites and utilities to lower the

 // 'vector.gather' operation.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/Arith/Utils/Utils.h"

 #include "mlir/Dialect/Linalg/IR/Linalg.h"

 #include "mlir/Dialect/MemRef/IR/MemRef.h"

 #include "mlir/Dialect/SCF/IR/SCF.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Dialect/Utils/IndexingUtils.h"

 #include "mlir/Dialect/Utils/StructuredOpsUtils.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h"

 #include "mlir/Dialect/Vector/Utils/VectorUtils.h"

 #include "mlir/IR/BuiltinAttributeInterfaces.h"

 #include "mlir/IR/BuiltinTypes.h"

 #include "mlir/IR/ImplicitLocOpBuilder.h"

 #include "mlir/IR/Location.h"

 #include "mlir/IR/Matchers.h"

 #include "mlir/IR/PatternMatch.h"

 #include "mlir/IR/TypeUtilities.h"

 #include "mlir/Interfaces/VectorInterfaces.h"


 #define DEBUG_TYPE "vector-broadcast-lowering"


 using namespace mlir;

 using namespace mlir::vector;


 namespace {

 /// Unrolls 2 or more dimensional `vector.gather` ops by unrolling the

 /// outermost dimension. For example:

 /// ```

 /// %g = vector.gather %base[%c0][%v], %mask, %pass_thru :

 ///        ... into vector<2x3xf32>

 ///

 /// ==>

 ///

 /// %0   = arith.constant dense<0.0> : vector<2x3xf32>

 /// %g0  = vector.gather %base[%c0][%v0], %mask0, %pass_thru0 : ...

 /// %1   = vector.insert %g0, %0 [0] : vector<3xf32> into vector<2x3xf32>

 /// %g1  = vector.gather %base[%c0][%v1], %mask1, %pass_thru1 : ...

 /// %g   = vector.insert %g1, %1 [1] : vector<3xf32> into vector<2x3xf32>

 /// ```

 ///

 /// When applied exhaustively, this will produce a sequence of 1-d gather ops.

 ///

 /// Supports vector types with a fixed leading dimension.

 struct UnrollGather : OpRewritePattern<vector::GatherOp> {

   using OpRewritePattern::OpRewritePattern;


   LogicalResult matchAndRewrite(vector::GatherOp op,

                                 PatternRewriter &rewriter) const override {

     VectorType resultTy = op.getType();

     if (resultTy.getRank() < 2)

       return rewriter.notifyMatchFailure(op, "already 1-D");


     // Unrolling doesn't take vscale into account. Pattern is disabled for

     // vectors with leading scalable dim(s).

     if (resultTy.getScalableDims().front())

       return rewriter.notifyMatchFailure(op, "cannot unroll scalable dim");


     Location loc = op.getLoc();

     Value indexVec = op.getIndexVec();

     Value maskVec = op.getMask();

     Value passThruVec = op.getPassThru();


     Value result = rewriter.create<arith::ConstantOp>(

         loc, resultTy, rewriter.getZeroAttr(resultTy));


     VectorType subTy = VectorType::Builder(resultTy).dropDim(0);


     for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) {

       int64_t thisIdx[1] = {i};


       Value indexSubVec =

           rewriter.create<vector::ExtractOp>(loc, indexVec, thisIdx);

       Value maskSubVec =

           rewriter.create<vector::ExtractOp>(loc, maskVec, thisIdx);

       Value passThruSubVec =

           rewriter.create<vector::ExtractOp>(loc, passThruVec, thisIdx);

       Value subGather = rewriter.create<vector::GatherOp>(

           loc, subTy, op.getBase(), op.getIndices(), indexSubVec, maskSubVec,

           passThruSubVec);

       result =

           rewriter.create<vector::InsertOp>(loc, subGather, result, thisIdx);

     }


     rewriter.replaceOp(op, result);

     return success();

   }

 };


 /// Rewrites a vector.gather of a strided MemRef as a gather of a non-strided

 /// MemRef with updated indices that model the strided access.

 ///

 /// ```mlir

 ///   %subview = memref.subview %M (...)

 ///     : memref<100x3xf32> to memref<100xf32, strided<[3]>>

 ///   %gather = vector.gather %subview[%idxs] (...)

 ///     : memref<100xf32, strided<[3]>>

 /// ```

 /// ==>

 /// ```mlir

 ///   %collapse_shape = memref.collapse_shape %M (...)

 ///     : memref<100x3xf32> into memref<300xf32>

 ///   %new_idxs = arith.muli %idxs, %c3 : vector<4xindex>

 ///   %gather = vector.gather %collapse_shape[%new_idxs] (...)

 ///     : memref<300xf32> (...)

 /// ```

 ///

 /// ATM this is effectively limited to reading a 1D Vector from a 2D MemRef,

 /// but should be fairly straightforward to extend beyond that.

 struct RemoveStrideFromGatherSource : OpRewritePattern<vector::GatherOp> {

   using OpRewritePattern::OpRewritePattern;


   LogicalResult matchAndRewrite(vector::GatherOp op,

                                 PatternRewriter &rewriter) const override {

     Value base = op.getBase();


     // TODO: Strided accesses might be coming from other ops as well

     auto subview = base.getDefiningOp<memref::SubViewOp>();

     if (!subview)

       return failure();


     auto sourceType = subview.getSource().getType();


     // TODO: Allow ranks > 2.

     if (sourceType.getRank() != 2)

       return failure();


     // Get strides

     auto layout = subview.getResult().getType().getLayout();

     auto stridedLayoutAttr = llvm::dyn_cast<StridedLayoutAttr>(layout);

     if (!stridedLayoutAttr)

       return failure();


     // TODO: Allow the access to be strided in multiple dimensions.

     if (stridedLayoutAttr.getStrides().size() != 1)

       return failure();


     int64_t srcTrailingDim = sourceType.getShape().back();


     // Assume that the stride matches the trailing dimension of the source

     // memref.

     // TODO: Relax this assumption.

     if (stridedLayoutAttr.getStrides()[0] != srcTrailingDim)

       return failure();


     // 1. Collapse the input memref so that it's "flat".

     SmallVector<ReassociationIndices> reassoc = {{0, 1}};

     Value collapsed = rewriter.create<memref::CollapseShapeOp>(

         op.getLoc(), subview.getSource(), reassoc);


     // 2. Generate new gather indices that will model the

     // strided access.

     IntegerAttr stride = rewriter.getIndexAttr(srcTrailingDim);

     VectorType vType = op.getIndexVec().getType();

     Value mulCst = rewriter.create<arith::ConstantOp>(

         op.getLoc(), vType, DenseElementsAttr::get(vType, stride));


     Value newIdxs =

         rewriter.create<arith::MulIOp>(op.getLoc(), op.getIndexVec(), mulCst);


     // 3. Create an updated gather op with the collapsed input memref and the

     // updated indices.

     Value newGather = rewriter.create<vector::GatherOp>(

         op.getLoc(), op.getResult().getType(), collapsed, op.getIndices(),

         newIdxs, op.getMask(), op.getPassThru());

     rewriter.replaceOp(op, newGather);


     return success();

   }

 };


 /// Turns 1-d `vector.gather` into a scalarized sequence of `vector.loads` or

 /// `tensor.extract`s. To avoid out-of-bounds memory accesses, these

 /// loads/extracts are made conditional using `scf.if` ops.

 struct Gather1DToConditionalLoads : OpRewritePattern<vector::GatherOp> {

   using OpRewritePattern::OpRewritePattern;


   LogicalResult matchAndRewrite(vector::GatherOp op,

                                 PatternRewriter &rewriter) const override {

     VectorType resultTy = op.getType();

     if (resultTy.getRank() != 1)

       return rewriter.notifyMatchFailure(op, "unsupported rank");


     if (resultTy.isScalable())

       return rewriter.notifyMatchFailure(op, "not a fixed-width vector");


     Location loc = op.getLoc();

     Type elemTy = resultTy.getElementType();

     // Vector type with a single element. Used to generate `vector.loads`.

     VectorType elemVecTy = VectorType::get({1}, elemTy);


     Value condMask = op.getMask();

     Value base = op.getBase();


     // vector.load requires the most minor memref dim to have unit stride

     // (unless reading exactly 1 element)

     if (auto memType = dyn_cast<MemRefType>(base.getType())) {

       if (auto stridesAttr =

               dyn_cast_if_present<StridedLayoutAttr>(memType.getLayout())) {

         if (stridesAttr.getStrides().back() != 1 &&

             resultTy.getNumElements() != 1)

           return failure();

       }

     }


     Value indexVec = rewriter.createOrFold<arith::IndexCastOp>(

         loc, op.getIndexVectorType().clone(rewriter.getIndexType()),

         op.getIndexVec());

     auto baseOffsets = llvm::to_vector(op.getIndices());

     Value lastBaseOffset = baseOffsets.back();


     Value result = op.getPassThru();


     // Emit a conditional access for each vector element.

     for (int64_t i = 0, e = resultTy.getNumElements(); i < e; ++i) {

       int64_t thisIdx[1] = {i};

       Value condition =

           rewriter.create<vector::ExtractOp>(loc, condMask, thisIdx);

       Value index = rewriter.create<vector::ExtractOp>(loc, indexVec, thisIdx);

       baseOffsets.back() =

           rewriter.createOrFold<arith::AddIOp>(loc, lastBaseOffset, index);


       auto loadBuilder = [&](OpBuilder &b, Location loc) {

         Value extracted;

         if (isa<MemRefType>(base.getType())) {

           // `vector.load` does not support scalar result; emit a vector load

           // and extract the single result instead.

           Value load =

               b.create<vector::LoadOp>(loc, elemVecTy, base, baseOffsets);

           int64_t zeroIdx[1] = {0};

           extracted = b.create<vector::ExtractOp>(loc, load, zeroIdx);

         } else {

           extracted = b.create<tensor::ExtractOp>(loc, base, baseOffsets);

         }


         Value newResult =

             b.create<vector::InsertOp>(loc, extracted, result, thisIdx);

         b.create<scf::YieldOp>(loc, newResult);

       };

       auto passThruBuilder = [result](OpBuilder &b, Location loc) {

         b.create<scf::YieldOp>(loc, result);

       };


       result =

           rewriter

               .create<scf::IfOp>(loc, condition, /*thenBuilder=*/loadBuilder,

                                  /*elseBuilder=*/passThruBuilder)

               .getResult(0);

     }


     rewriter.replaceOp(op, result);

     return success();

   }

 };

 } // namespace


 void mlir::vector::populateVectorGatherLoweringPatterns(

     RewritePatternSet &patterns, PatternBenefit benefit) {

   patterns.add<UnrollGather>(patterns.getContext(), benefit);

 }


 void mlir::vector::populateVectorGatherToConditionalLoadPatterns(

     RewritePatternSet &patterns, PatternBenefit benefit) {

   patterns.add<RemoveStrideFromGatherSource, Gather1DToConditionalLoads>(

       patterns.getContext(), benefit);

 }

AffineOps.h

BuiltinAttributeInterfaces.h

Utils.h

ImplicitLocOpBuilder.h

IndexingUtils.h

Location.h

LoweringPatterns.h

Matchers.h

PatternMatch.h

StructuredOpsUtils.h

TypeUtilities.h

VectorInterfaces.h

VectorOps.h

VectorUtils.h

llvm::SmallVector
Definition: LLVM.h:72

mlir::Builder::getIndexAttr
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:104

mlir::Builder::getZeroAttr
TypedAttr getZeroAttr(Type type)
Definition: Builders.cpp:320

mlir::Builder::getIndexType
IndexType getIndexType()
Definition: Builders.cpp:51

mlir::DenseElementsAttr::get
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
Definition: BuiltinAttributes.cpp:911

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:204

mlir::OpBuilder::createOrFold
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:517

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:453

mlir::PatternBenefit
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:34

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:749

mlir::RewritePatternSet
Definition: PatternMatch.h:772

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:682

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition: PatternMatch.cpp:129

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20

mlir::VectorType::Builder
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:270

mlir::VectorType::Builder::dropDim
Builder & dropDim(unsigned pos)
Erase a dim from shape @pos.
Definition: BuiltinTypes.h:295

Arith.h

Linalg.h

MemRef.h

SCF.h

Tensor.h

BuiltinTypes.h

mlir::vector
Definition: ConvertVectorToLLVM.h:28

mlir::vector::populateVectorGatherLoweringPatterns
void populateVectorGatherLoweringPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate the pattern set with the following patterns:
Definition: LowerVectorGather.cpp:271

mlir::vector::populateVectorGatherToConditionalLoadPatterns
void populateVectorGatherToConditionalLoadPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Populate the pattern set with the following patterns:
Definition: LowerVectorGather.cpp:276

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314

mlir::OpRewritePattern::OpRewritePattern
OpRewritePattern(MLIRContext *context, PatternBenefit benefit=1, ArrayRef< StringRef > generatedNames={})
Patterns must specify the root operation name they match against, and can also specify the benefit of...
Definition: PatternMatch.h:319