doxygen/ShuffleVectorFMAOps_8cpp_source.html

//===- ShuffleVectorFMAOps.cpp --------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "mlir/Dialect/Vector/IR/VectorOps.h"

#include "mlir/Dialect/X86/Transforms.h"

#include "mlir/Dialect/X86/X86Dialect.h"


#include "mlir/IR/PatternMatch.h"


#include "mlir/Pass/Pass.h"

#include "mlir/Transforms/GreedyPatternRewriteDriver.h"


using namespace mlir;

using namespace mlir::vector;

using namespace mlir::x86;


namespace {


// Validates whether the given operation is an x86 operation and has only

// one consumer.

static bool validateFMAOperands(Value op) {

  if (auto cvt = op.getDefiningOp<x86::CvtPackedEvenIndexedToF32Op>())

    return cvt.getResult().hasOneUse();


  if (auto bcst = op.getDefiningOp<x86::BcstToPackedF32Op>())

    return bcst.getResult().hasOneUse();


  return false;

}


// Validates the vector.fma operation on the following conditions:

// (i) one of the lhs or rhs defining operation should be

// CvtPackedEvenIndexedToF32Op, (ii) the lhs or rhs defining operation should be

// an x86 operation and has only one consumer, (iii) all operations

// are in the same block, and (iv) ths FMA has only one user.

static bool validateVectorFMAOp(vector::FMAOp fmaOp) {

  Value lhs = fmaOp.getLhs();

  Value rhs = fmaOp.getRhs();


  if (!isa<x86::CvtPackedEvenIndexedToF32Op>(lhs.getDefiningOp()) &&

      !isa<x86::CvtPackedEvenIndexedToF32Op>(rhs.getDefiningOp()))

    return false;


  if (!validateFMAOperands(lhs) || !validateFMAOperands(rhs))

    return false;


  if (lhs.getDefiningOp()->getBlock() != rhs.getDefiningOp()->getBlock())

    return false;


  if (lhs.getDefiningOp()->getBlock() != fmaOp->getBlock())

    return false;


  if (!fmaOp.getResult().hasOneUse())

    return false;


  Operation *consumer = *fmaOp.getResult().getUsers().begin();

  if (consumer->getBlock() != fmaOp->getBlock())

    return false;


  return true;

}


// Moves vector.fma along with the lhs and rhs defining operation before its

// consumer. If the consumer is vector.ShapeCastOp and has only one user then

// move before the consumer of vector.ShapeCastOp.

// TODO: Move before first consumer, if there are multiple.

static void moveFMA(PatternRewriter &rewriter, vector::FMAOp fmaOp) {

  Operation *consumer = *fmaOp.getResult().getUsers().begin();


  if (auto shapeCastOp = dyn_cast<vector::ShapeCastOp>(consumer)) {

    if (shapeCastOp.getResult().hasOneUse()) {

      Operation *nxtConsumer = *shapeCastOp.getResult().getUsers().begin();

      if (nxtConsumer->getBlock() == fmaOp->getBlock()) {

        consumer = *shapeCastOp.getResult().getUsers().begin();

        rewriter.moveOpBefore(fmaOp.getLhs().getDefiningOp(), consumer);

        rewriter.moveOpBefore(fmaOp.getRhs().getDefiningOp(), consumer);

        rewriter.moveOpBefore(fmaOp.getOperation(), consumer);

        rewriter.moveOpBefore(shapeCastOp.getOperation(), consumer);

        return;

      }

    }

  }


  rewriter.moveOpBefore(fmaOp.getLhs().getDefiningOp(), consumer);

  rewriter.moveOpBefore(fmaOp.getRhs().getDefiningOp(), consumer);

  rewriter.moveOpBefore(fmaOp.getOperation(), consumer);


  return;

}


// Shuffle FMAs with x86 operations as operands such that

// FMAs are grouped with respect to odd/even packed index.

//

// For example:

// ```

//   %1 = x86.avx.bcst_to_f32.packed

//   %2 = x86.avx.cvt.packed.odd.indexed_to_f32

//   %3 = vector.fma %1, %2, %arg1

//   %4 = x86.avx.bcst_to_f32.packed

//   %5 = x86.avx.cvt.packed.even.indexed_to_f32

//   %6 = vector.fma %4, %5, %3

//   %7 = x86.avx.bcst_to_f32.packed

//   %8 = x86.avx.cvt.packed.odd.indexed_to_f32

//   %9 = vector.fma %7, %8, %arg2

//   %10 = x86.avx.bcst_to_f32.packed

//   %11 = x86.avx.cvt.packed.even.indexed_to_f32

//   %12 = vector.fma %10, %11, %9

//   yield %6, %12

// ```

// to

// ```

//   %1 = x86.avx.bcst_to_f32.packed

//   %2 = x86.avx.cvt.packed.odd.indexed_to_f32

//   %3 = vector.fma %1, %2, %arg1

//   %7 = x86.avx.bcst_to_f32.packed

//   %8 = x86.avx.cvt.packed.odd.indexed_to_f32

//   %9 = vector.fma %7, %8, %arg2

//   %4 = x86.avx.bcst_to_f32.packed

//   %5 = x86.avx.cvt.packed.even.indexed_to_f32

//   %6 = vector.fma %4, %5, %3

//   %10 = x86.avx.bcst_to_f32.packed

//   %11 = x86.avx.cvt.packed.even.indexed_to_f32

//   %12 = vector.fma %10, %11, %9

//   yield %9, %12

// ```

// TODO: Shuffling supported only if the FMA, lhs/rhs defining operations

// have only one consumer. Have to extend this pass for multiple consumers.

struct ShuffleVectorFMAOps : public OpRewritePattern<vector::FMAOp> {

  using OpRewritePattern<vector::FMAOp>::OpRewritePattern;


  LogicalResult matchAndRewrite(vector::FMAOp fmaOp,

                                PatternRewriter &rewriter) const override {


    if (!validateVectorFMAOp(fmaOp))

      return failure();


    llvm::SmallVector<vector::FMAOp> fmaOps;

    Operation *nextOp = fmaOp;

    bool stopAtNextDependentFMA = true;


    // Break the loop and return failure if the immediate next FMA op

    // have CvtPackedEvenIndexedToF32Op in it's lhs/rhs defining ops.

    while ((nextOp = nextOp->getNextNode())) {

      auto fma = dyn_cast<vector::FMAOp>(nextOp);

      if (!fma)

        continue;


      bool hasX86CvtOperand =

          isa<x86::CvtPackedEvenIndexedToF32Op>(fma.getLhs().getDefiningOp()) ||

          isa<x86::CvtPackedEvenIndexedToF32Op>(fma.getRhs().getDefiningOp());


      if (hasX86CvtOperand && stopAtNextDependentFMA)

        break;


      if (validateVectorFMAOp(fma))

        fmaOps.push_back(fma);


      stopAtNextDependentFMA = false;

    }


    if (fmaOps.empty())

      return rewriter.notifyMatchFailure(

          fmaOp, "No eligible FMA operations were found: the operation may "

                 "already be shuffled, there may be no following FMAs, or the "

                 "following FMAs do not satisfy the shuffle conditions.");


    fmaOps.push_back(fmaOp);

    for (auto fmaOp : fmaOps)

      moveFMA(rewriter, fmaOp);


    return success();

  }

};


} // namespace


void x86::populateShuffleVectorFMAOpsPatterns(RewritePatternSet &patterns) {

  patterns.add<ShuffleVectorFMAOps>(patterns.getContext());

}


success
return success()

GreedyPatternRewriteDriver.h

lhs
lhs
Definition AffineExpr.cpp:833

PatternMatch.h

VectorOps.h

rhs
*B rhs
Definition VectorTransforms.cpp:2249

X86Dialect.h

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88

mlir::Operation::getBlock
Block * getBlock()
Returns the operation block that contains this operation.
Definition Operation.h:213

mlir::Operation::getResult
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:415

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition PatternMatch.h:799

mlir::RewritePatternSet
Definition PatternMatch.h:822

mlir::RewritePatternSet::getContext
MLIRContext * getContext() const
Definition PatternMatch.h:837

mlir::RewritePatternSet::add
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
Definition PatternMatch.h:861

mlir::RewriterBase::moveOpBefore
void moveOpBefore(Operation *op, Operation *existingOp)
Unlink this operation from its current block and insert it right before existingOp which may be in th...
Definition PatternMatch.cpp:443

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition PatternMatch.h:732

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96

mlir::Value::getUsers
user_range getUsers() const
Definition Value.h:218

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition Value.cpp:18

Pass.h

Transforms.h

mlir::vector
Definition ConvertVectorToLLVM.h:22

mlir::x86
Definition X86TransformOps.h:25

mlir::x86::populateShuffleVectorFMAOpsPatterns
void populateShuffleVectorFMAOpsPatterns(RewritePatternSet &patterns)
Definition ShuffleVectorFMAOps.cpp:182

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition PatternMatch.h:314