doxygen/VectorContractToPackedTypeDotProduct_8cpp_source.html

//===- VectorContractToPackedTypeDotProduct.cpp ---------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "mlir/Dialect/Linalg/IR/Linalg.h"

#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"

#include "mlir/Dialect/Vector/IR/VectorOps.h"

#include "mlir/Dialect/Vector/Utils/VectorUtils.h"

#include "mlir/Dialect/X86Vector/Transforms.h"

#include "mlir/Dialect/X86Vector/Utils/X86VectorUtils.h"

#include "mlir/Dialect/X86Vector/X86VectorDialect.h"


#include "mlir/IR/BuiltinAttributes.h"

#include "mlir/IR/Dominance.h"

#include "mlir/IR/PatternMatch.h"


#include "mlir/Pass/Pass.h"

#include "mlir/Transforms/GreedyPatternRewriteDriver.h"


using namespace mlir;

using namespace mlir::vector;

using namespace mlir::x86vector;


namespace {


// Implements packed type outer product contraction as a sequence

// of broadcast and packed dot-product operations.

//

// For example - for F32 type:

// ```

//   vector.contract <1x1x2xbf16>, <1x16x2xbf16> into <1x16xf32>

// ```

// to

// ```

//   vector.broadcast %lhs to <32xbf16>

//   x86vector.avx512.dot vector<32xbf16> -> vector<16xf32>

// ```

struct VectorContractToPackedTypeDotProduct

    : public OpRewritePattern<vector::ContractionOp> {

  using OpRewritePattern<vector::ContractionOp>::OpRewritePattern;


  LogicalResult matchAndRewrite(vector::ContractionOp contractOp,

                                PatternRewriter &rewriter) const override {


    if (contractOp.getKind() != vector::CombiningKind::ADD)

      return rewriter.notifyMatchFailure(contractOp,

                                         "Expects add combining kind.");


    VectorType lhsTy = contractOp.getLhsType();

    if (!lhsTy.getElementType().isBF16() &&

        !lhsTy.getElementType().isSignlessInteger(8))

      return rewriter.notifyMatchFailure(

          contractOp, "Only BF16/Int8 lowering is supported.");


    unsigned int blockingFactor = lhsTy.getElementType().isBF16() ? 2 : 4;

    if (!isInVnniLayout(contractOp.getOperation(),

                        contractOp.getIndexingMapsArray(), blockingFactor))

      return rewriter.notifyMatchFailure(contractOp,

                                         "Input matrices not in VNNI format.");


    ArrayRef<int64_t> lhsShape = lhsTy.getShape();

    llvm::SmallVector<int64_t> nonUnitDimLhs;

    llvm::copy_if(lhsShape, std::back_inserter(nonUnitDimLhs),

                  [](int64_t dim) { return dim != 1; });


    VectorType rhsTy = contractOp.getRhsType();

    ArrayRef<int64_t> rhsShape = rhsTy.getShape();

    llvm::SmallVector<int64_t> nonUnitDimRhs;

    llvm::copy_if(rhsShape, std::back_inserter(nonUnitDimRhs),

                  [](int64_t dim) { return dim != 1; });


    if ((nonUnitDimLhs.size() - 1) > 0 && (nonUnitDimRhs.size() - 1) > 0)

      return rewriter.notifyMatchFailure(contractOp,

                                         "Excepts unit dimensions for either "

                                         "LHS or RHS shape other than VNNI.");


    if ((nonUnitDimLhs.size() - 1) != 1 && (nonUnitDimRhs.size() - 1) != 1)

      return rewriter.notifyMatchFailure(

          contractOp,

          "Excepts a one non-unit A/B dimension for either LHS or RHS shape.");


    VectorType accTy = dyn_cast<VectorType>(contractOp.getAccType());

    if (!accTy)

      return rewriter.notifyMatchFailure(contractOp, "Wrong accmulator type.");


    if ((lhsTy.getElementType().isBF16() && !accTy.getElementType().isF32()) ||

        (lhsTy.getElementType().isSignlessInteger(8) &&

         !accTy.getElementType().isSignlessInteger(32)))

      return rewriter.notifyMatchFailure(contractOp,

                                         "Only F32 for BF16 or Int32 for Int8 "

                                         "accumulation type is supported.");


    ArrayRef<int64_t> accShape = accTy.getShape();

    llvm::SmallVector<int64_t> nonUnitDimAcc;

    llvm::copy_if(accShape, std::back_inserter(nonUnitDimAcc),

                  [](int64_t dim) { return dim != 1; });

    if (nonUnitDimAcc.size() != 1)

      return rewriter.notifyMatchFailure(

          contractOp, "A or B should be a non-unit dim in acc.");


    // Non-unit dimensions should match the vector length of BF16 or Int8

    // dot-product.

    unsigned int nonUnitDim = nonUnitDimLhs.size() == 2 ? nonUnitDimLhs.front()

                                                        : nonUnitDimRhs.front();

    if (lhsTy.getElementType().isBF16() && nonUnitDim != 4 && nonUnitDim != 8 &&

        nonUnitDim != 16 && nonUnitDimAcc.front() == nonUnitDim)

      return rewriter.notifyMatchFailure(

          contractOp, "BF16 dot-product operation expects non-unit (LHR or "

                      "RHS) dim and acc dim of size 4/8/16.");


    if (lhsTy.getElementType().isSignlessInteger(8) && nonUnitDim != 4 &&

        nonUnitDim != 8 && nonUnitDimAcc.front() == nonUnitDim)

      return rewriter.notifyMatchFailure(

          contractOp, "Int8 dot-product operation expects non-unit (LHR or "

                      "RHS) dim and acc dim of size 4/8.");


    auto loc = contractOp.getLoc();

    auto castAcc = vector::ShapeCastOp::create(

        rewriter, loc,

        VectorType::get(nonUnitDimAcc.front(), accTy.getElementType()),

        contractOp.getAcc());


    Value dp;


    // Broadcast the unit-dimension LHS or RHS to match the vector length of the

    // corresponding non-unit dimension on the other operand. For example,

    // if LHS has type vector<1x1x2xbf16> and RHS has type vector<1x16x2xbf16>,

    // we broadcast the LHS to vector<16x2xbf16>. In the opposite case (non-unit

    // dimension on the LHS), we broadcast the RHS instead.

    if ((nonUnitDimRhs.size() - 1) > 0) {

      auto castRhs = vector::ShapeCastOp::create(

          rewriter, loc,

          VectorType::get(nonUnitDimRhs.front() * nonUnitDimRhs.back(),

                          rhsTy.getElementType()),

          contractOp.getRhs());

      auto castLhs = vector::ShapeCastOp::create(

          rewriter, loc,

          VectorType::get(nonUnitDimLhs.front(), lhsTy.getElementType()),

          contractOp.getLhs());

      auto bitcastLhs = vector::BitCastOp::create(

          rewriter, loc, VectorType::get({1}, rewriter.getIntegerType(32)),

          castLhs);

      auto broadcastLhs = vector::BroadcastOp::create(

          rewriter, loc,

          VectorType::get({nonUnitDimRhs.front()}, rewriter.getIntegerType(32)),

          bitcastLhs);

      auto bitcastLhsPkType = vector::BitCastOp::create(

          rewriter, loc, castRhs.getResult().getType(), broadcastLhs);


      if (lhsTy.getElementType().isBF16()) {

        dp = x86vector::DotBF16Op::create(

            rewriter, loc,

            VectorType::get(nonUnitDimRhs.front(), rewriter.getF32Type()),

            castAcc, bitcastLhsPkType, castRhs);

      }


      if (lhsTy.getElementType().isSignlessInteger(8)) {

        dp = x86vector::DotInt8Op::create(

            rewriter, loc,

            VectorType::get(nonUnitDimRhs.front(), rewriter.getIntegerType(32)),

            castAcc, bitcastLhsPkType, castRhs);

      }

    } else {

      auto castLhs = vector::ShapeCastOp::create(

          rewriter, loc,

          VectorType::get(nonUnitDimLhs.front() * nonUnitDimLhs.back(),

                          lhsTy.getElementType()),

          contractOp.getLhs());

      auto castRhs = vector::ShapeCastOp::create(

          rewriter, loc,

          VectorType::get(nonUnitDimRhs.front(), rhsTy.getElementType()),

          contractOp.getRhs());

      auto bitcastRhs = vector::BitCastOp::create(

          rewriter, loc, VectorType::get({1}, rewriter.getIntegerType(32)),

          castRhs);

      auto broadcastRhs = vector::BroadcastOp::create(

          rewriter, loc,

          VectorType::get({nonUnitDimLhs.front()}, rewriter.getIntegerType(32)),

          bitcastRhs);

      auto bitcastRhsPkType = vector::BitCastOp::create(

          rewriter, loc, castLhs.getResult().getType(), broadcastRhs);


      if (lhsTy.getElementType().isBF16()) {

        dp = x86vector::DotBF16Op::create(

            rewriter, loc,

            VectorType::get(nonUnitDimLhs.front(), rewriter.getF32Type()),

            castAcc, castLhs, bitcastRhsPkType);

      }


      if (lhsTy.getElementType().isSignlessInteger(8)) {

        dp = x86vector::DotInt8Op::create(

            rewriter, loc,

            VectorType::get(nonUnitDimLhs.front(), rewriter.getIntegerType(32)),

            castAcc, castLhs, bitcastRhsPkType);

      }

    }


    if (!dp)

      return failure();


    auto castDp = vector::ShapeCastOp::create(rewriter, loc, accTy, dp);

    rewriter.replaceOp(contractOp, castDp);

    return success();

  }

};


} // namespace


void x86vector::populateVectorContractToPackedTypeDotProductPatterns(

    RewritePatternSet &patterns) {

  patterns.add<VectorContractToPackedTypeDotProduct>(patterns.getContext());

}


success
return success()

Dominance.h

GreedyPatternRewriteDriver.h

LinalgInterfaces.h

PatternMatch.h

VectorOps.h

VectorUtils.h

X86VectorDialect.h

X86VectorUtils.h

mlir::Builder::getF32Type
FloatType getF32Type()
Definition Builders.cpp:43

mlir::Builder::getIntegerType
IntegerType getIntegerType(unsigned width)
Definition Builders.cpp:67

mlir::RewritePatternSet
Definition PatternMatch.h:822

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition PatternMatch.cpp:127

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition PatternMatch.h:732

Pass.h

Linalg.h

Transforms.h

BuiltinAttributes.h

mlir::vector
Definition ConvertVectorToLLVM.h:22

mlir::x86vector
Definition X86VectorTransformOps.h:25

mlir::x86vector::populateVectorContractToPackedTypeDotProductPatterns
void populateVectorContractToPackedTypeDotProductPatterns(RewritePatternSet &patterns)
Definition VectorContractToPackedTypeDotProduct.cpp:213

mlir::x86vector::isInVnniLayout
bool isInVnniLayout(Operation *op, llvm::ArrayRef< AffineMap > indexingMaps, std::optional< unsigned > blockingFactor=std::nullopt)
Definition X86VectorUtils.cpp:35

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition GreedyPatternRewriteDriver.h:283

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition PatternMatch.h:314