doxygen/MathToFuncs_8cpp_source.html

//===- MathToFuncs.cpp - Math to outlined implementation conversion -------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//


#include "mlir/Conversion/MathToFuncs/MathToFuncs.h"


#include "mlir/Dialect/Arith/IR/Arith.h"

#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"

#include "mlir/Dialect/Func/IR/FuncOps.h"

#include "mlir/Dialect/LLVMIR/LLVMDialect.h"

#include "mlir/Dialect/Math/IR/Math.h"

#include "mlir/Dialect/SCF/IR/SCF.h"

#include "mlir/Dialect/Utils/IndexingUtils.h"

#include "mlir/Dialect/Vector/IR/VectorOps.h"

#include "mlir/Dialect/Vector/Utils/VectorUtils.h"

#include "mlir/IR/TypeUtilities.h"

#include "mlir/Pass/Pass.h"

#include "mlir/Transforms/DialectConversion.h"

#include "llvm/ADT/DenseMap.h"

#include "llvm/ADT/TypeSwitch.h"

#include "llvm/Support/DebugLog.h"


namespace mlir {

#define GEN_PASS_DEF_CONVERTMATHTOFUNCS

#include "mlir/Conversion/Passes.h.inc"

} // namespace mlir


using namespace mlir;


#define DEBUG_TYPE "math-to-funcs"


namespace {

// Pattern to convert vector operations to scalar operations.

template <typename Op>

struct VecOpToScalarOp : public OpRewritePattern<Op> {

public:

  using OpRewritePattern<Op>::OpRewritePattern;


  LogicalResult matchAndRewrite(Op op, PatternRewriter &rewriter) const final;

};


// Callback type for getting pre-generated FuncOp implementing

// an operation of the given type.

using GetFuncCallbackTy = function_ref<func::FuncOp(Operation *, Type)>;


// Pattern to convert scalar IPowIOp into a call of outlined

// software implementation.

class IPowIOpLowering : public OpRewritePattern<math::IPowIOp> {

public:

  IPowIOpLowering(MLIRContext *context, GetFuncCallbackTy cb)

      : OpRewritePattern<math::IPowIOp>(context), getFuncOpCallback(cb) {}


  /// Convert IPowI into a call to a local function implementing

  /// the power operation. The local function computes a scalar result,

  /// so vector forms of IPowI are linearized.

  LogicalResult matchAndRewrite(math::IPowIOp op,

                                PatternRewriter &rewriter) const final;


private:

  GetFuncCallbackTy getFuncOpCallback;

};


// Pattern to convert scalar FPowIOp into a call of outlined

// software implementation.

class FPowIOpLowering : public OpRewritePattern<math::FPowIOp> {

public:

  FPowIOpLowering(MLIRContext *context, GetFuncCallbackTy cb)

      : OpRewritePattern<math::FPowIOp>(context), getFuncOpCallback(cb) {}


  /// Convert FPowI into a call to a local function implementing

  /// the power operation. The local function computes a scalar result,

  /// so vector forms of FPowI are linearized.

  LogicalResult matchAndRewrite(math::FPowIOp op,

                                PatternRewriter &rewriter) const final;


private:

  GetFuncCallbackTy getFuncOpCallback;

};


// Pattern to convert scalar ctlz into a call of outlined software

// implementation.

class CtlzOpLowering : public OpRewritePattern<math::CountLeadingZerosOp> {

public:

  CtlzOpLowering(MLIRContext *context, GetFuncCallbackTy cb)

      : OpRewritePattern<math::CountLeadingZerosOp>(context),

        getFuncOpCallback(cb) {}


  /// Convert ctlz into a call to a local function implementing

  /// the count leading zeros operation.

  LogicalResult matchAndRewrite(math::CountLeadingZerosOp op,

                                PatternRewriter &rewriter) const final;


private:

  GetFuncCallbackTy getFuncOpCallback;

};

} // namespace


template <typename Op>

LogicalResult

VecOpToScalarOp<Op>::matchAndRewrite(Op op, PatternRewriter &rewriter) const {

  Type opType = op.getType();

  Location loc = op.getLoc();

  auto vecType = dyn_cast<VectorType>(opType);


  if (!vecType)

    return rewriter.notifyMatchFailure(op, "not a vector operation");

  if (!vecType.hasRank())

    return rewriter.notifyMatchFailure(op, "unknown vector rank");

  ArrayRef<int64_t> shape = vecType.getShape();

  int64_t numElements = vecType.getNumElements();


  Type resultElementType = vecType.getElementType();

  Attribute initValueAttr;

  if (isa<FloatType>(resultElementType))

    initValueAttr = FloatAttr::get(resultElementType, 0.0);

  else

    initValueAttr = IntegerAttr::get(resultElementType, 0);

  Value result = arith::ConstantOp::create(

      rewriter, loc, DenseElementsAttr::get(vecType, initValueAttr));

  SmallVector<int64_t> strides = computeStrides(shape);

  for (int64_t linearIndex = 0; linearIndex < numElements; ++linearIndex) {

    SmallVector<int64_t> positions = delinearize(linearIndex, strides);

    SmallVector<Value> operands;

    for (Value input : op->getOperands())

      operands.push_back(

          vector::ExtractOp::create(rewriter, loc, input, positions));

    Value scalarOp =

        Op::create(rewriter, loc, vecType.getElementType(), operands);

    result =

        vector::InsertOp::create(rewriter, loc, scalarOp, result, positions);

  }

  rewriter.replaceOp(op, result);

  return success();

}


static FunctionType getElementalFuncTypeForOp(Operation *op) {

  SmallVector<Type, 1> resultTys(op->getNumResults());

  SmallVector<Type, 2> inputTys(op->getNumOperands());

  std::transform(op->result_type_begin(), op->result_type_end(),

                 resultTys.begin(),

                 [](Type ty) { return getElementTypeOrSelf(ty); });

  std::transform(op->operand_type_begin(), op->operand_type_end(),

                 inputTys.begin(),

                 [](Type ty) { return getElementTypeOrSelf(ty); });

  return FunctionType::get(op->getContext(), inputTys, resultTys);

}


/// Create linkonce_odr function to implement the power function with

/// the given \p elementType type inside \p module. The \p elementType

/// must be IntegerType, an the created function has

/// 'IntegerType (*)(IntegerType, IntegerType)' function type.

///

/// template <typename T>

/// T __mlir_math_ipowi_*(T b, T p) {

///   if (p == T(0))

///     return T(1);

///   if (p < T(0)) {

///     if (b == T(0))

///       return T(1) / T(0); // trigger div-by-zero

///     if (b == T(1))

///       return T(1);

///     if (b == T(-1)) {

///       if (p & T(1))

///         return T(-1);

///       return T(1);

///     }

///     return T(0);

///   }

///   T result = T(1);

///   while (true) {

///     if (p & T(1))

///       result *= b;

///     p >>= T(1);

///     if (p == T(0))

///       return result;

///     b *= b;

///   }

/// }


static func::FuncOp createElementIPowIFunc(ModuleOp *module, Type elementType) {

  assert(isa<IntegerType>(elementType) &&

         "non-integer element type for IPowIOp");


  ImplicitLocOpBuilder builder =

      ImplicitLocOpBuilder::atBlockEnd(module->getLoc(), module->getBody());


  std::string funcName("__mlir_math_ipowi");

  llvm::raw_string_ostream nameOS(funcName);

  nameOS << '_' << elementType;


  FunctionType funcType = FunctionType::get(

      builder.getContext(), {elementType, elementType}, elementType);

  auto funcOp = func::FuncOp::create(builder, funcName, funcType);

  LLVM::linkage::Linkage inlineLinkage = LLVM::linkage::Linkage::LinkonceODR;

  Attribute linkage =

      LLVM::LinkageAttr::get(builder.getContext(), inlineLinkage);

  funcOp->setAttr("llvm.linkage", linkage);

  funcOp.setPrivate();


  Block *entryBlock = funcOp.addEntryBlock();

  Region *funcBody = entryBlock->getParent();


  Value bArg = funcOp.getArgument(0);

  Value pArg = funcOp.getArgument(1);

  builder.setInsertionPointToEnd(entryBlock);

  Value zeroValue = arith::ConstantOp::create(

      builder, elementType, builder.getIntegerAttr(elementType, 0));

  Value oneValue = arith::ConstantOp::create(

      builder, elementType, builder.getIntegerAttr(elementType, 1));

  Value minusOneValue = arith::ConstantOp::create(

      builder, elementType,

      builder.getIntegerAttr(elementType,

                             APInt(elementType.getIntOrFloatBitWidth(), -1ULL,

                                   /*isSigned=*/true)));


  // if (p == T(0))

  //   return T(1);

  auto pIsZero =

      arith::CmpIOp::create(builder, arith::CmpIPredicate::eq, pArg, zeroValue);

  Block *thenBlock = builder.createBlock(funcBody);

  func::ReturnOp::create(builder, oneValue);

  Block *fallthroughBlock = builder.createBlock(funcBody);

  // Set up conditional branch for (p == T(0)).

  builder.setInsertionPointToEnd(pIsZero->getBlock());

  cf::CondBranchOp::create(builder, pIsZero, thenBlock, fallthroughBlock);


  // if (p < T(0)) {

  builder.setInsertionPointToEnd(fallthroughBlock);

  auto pIsNeg = arith::CmpIOp::create(builder, arith::CmpIPredicate::sle, pArg,

                                      zeroValue);

  //   if (b == T(0))

  builder.createBlock(funcBody);

  auto bIsZero =

      arith::CmpIOp::create(builder, arith::CmpIPredicate::eq, bArg, zeroValue);

  //     return T(1) / T(0);

  thenBlock = builder.createBlock(funcBody);

  func::ReturnOp::create(

      builder,

      arith::DivSIOp::create(builder, oneValue, zeroValue).getResult());

  fallthroughBlock = builder.createBlock(funcBody);

  // Set up conditional branch for (b == T(0)).

  builder.setInsertionPointToEnd(bIsZero->getBlock());

  cf::CondBranchOp::create(builder, bIsZero, thenBlock, fallthroughBlock);


  //   if (b == T(1))

  builder.setInsertionPointToEnd(fallthroughBlock);

  auto bIsOne =

      arith::CmpIOp::create(builder, arith::CmpIPredicate::eq, bArg, oneValue);

  //    return T(1);

  thenBlock = builder.createBlock(funcBody);

  func::ReturnOp::create(builder, oneValue);

  fallthroughBlock = builder.createBlock(funcBody);

  // Set up conditional branch for (b == T(1)).

  builder.setInsertionPointToEnd(bIsOne->getBlock());

  cf::CondBranchOp::create(builder, bIsOne, thenBlock, fallthroughBlock);


  //   if (b == T(-1)) {

  builder.setInsertionPointToEnd(fallthroughBlock);

  auto bIsMinusOne = arith::CmpIOp::create(builder, arith::CmpIPredicate::eq,

                                           bArg, minusOneValue);

  //     if (p & T(1))

  builder.createBlock(funcBody);

  auto pIsOdd = arith::CmpIOp::create(

      builder, arith::CmpIPredicate::ne,

      arith::AndIOp::create(builder, pArg, oneValue), zeroValue);

  //       return T(-1);

  thenBlock = builder.createBlock(funcBody);

  func::ReturnOp::create(builder, minusOneValue);

  fallthroughBlock = builder.createBlock(funcBody);

  // Set up conditional branch for (p & T(1)).

  builder.setInsertionPointToEnd(pIsOdd->getBlock());

  cf::CondBranchOp::create(builder, pIsOdd, thenBlock, fallthroughBlock);


  //     return T(1);

  //   } // b == T(-1)

  builder.setInsertionPointToEnd(fallthroughBlock);

  func::ReturnOp::create(builder, oneValue);

  fallthroughBlock = builder.createBlock(funcBody);

  // Set up conditional branch for (b == T(-1)).

  builder.setInsertionPointToEnd(bIsMinusOne->getBlock());

  cf::CondBranchOp::create(builder, bIsMinusOne, pIsOdd->getBlock(),

                           fallthroughBlock);


  //   return T(0);

  // } // (p < T(0))

  builder.setInsertionPointToEnd(fallthroughBlock);

  func::ReturnOp::create(builder, zeroValue);

  Block *loopHeader = builder.createBlock(

      funcBody, funcBody->end(), {elementType, elementType, elementType},

      {builder.getLoc(), builder.getLoc(), builder.getLoc()});

  // Set up conditional branch for (p < T(0)).

  builder.setInsertionPointToEnd(pIsNeg->getBlock());

  // Set initial values of 'result', 'b' and 'p' for the loop.

  cf::CondBranchOp::create(builder, pIsNeg, bIsZero->getBlock(), loopHeader,

                           ValueRange{oneValue, bArg, pArg});


  // T result = T(1);

  // while (true) {

  //   if (p & T(1))

  //     result *= b;

  //   p >>= T(1);

  //   if (p == T(0))

  //     return result;

  //   b *= b;

  // }

  Value resultTmp = loopHeader->getArgument(0);

  Value baseTmp = loopHeader->getArgument(1);

  Value powerTmp = loopHeader->getArgument(2);

  builder.setInsertionPointToEnd(loopHeader);


  //   if (p & T(1))

  auto powerTmpIsOdd = arith::CmpIOp::create(

      builder, arith::CmpIPredicate::ne,

      arith::AndIOp::create(builder, powerTmp, oneValue), zeroValue);

  thenBlock = builder.createBlock(funcBody);

  //     result *= b;

  Value newResultTmp = arith::MulIOp::create(builder, resultTmp, baseTmp);

  fallthroughBlock = builder.createBlock(funcBody, funcBody->end(), elementType,

                                         builder.getLoc());

  builder.setInsertionPointToEnd(thenBlock);

  cf::BranchOp::create(builder, newResultTmp, fallthroughBlock);

  // Set up conditional branch for (p & T(1)).

  builder.setInsertionPointToEnd(powerTmpIsOdd->getBlock());

  cf::CondBranchOp::create(builder, powerTmpIsOdd, thenBlock, fallthroughBlock,

                           resultTmp);

  // Merged 'result'.

  newResultTmp = fallthroughBlock->getArgument(0);


  //   p >>= T(1);

  builder.setInsertionPointToEnd(fallthroughBlock);

  Value newPowerTmp = arith::ShRUIOp::create(builder, powerTmp, oneValue);


  //   if (p == T(0))

  auto newPowerIsZero = arith::CmpIOp::create(builder, arith::CmpIPredicate::eq,

                                              newPowerTmp, zeroValue);

  //     return result;

  thenBlock = builder.createBlock(funcBody);

  func::ReturnOp::create(builder, newResultTmp);

  fallthroughBlock = builder.createBlock(funcBody);

  // Set up conditional branch for (p == T(0)).

  builder.setInsertionPointToEnd(newPowerIsZero->getBlock());

  cf::CondBranchOp::create(builder, newPowerIsZero, thenBlock,

                           fallthroughBlock);


  //   b *= b;

  // }

  builder.setInsertionPointToEnd(fallthroughBlock);

  Value newBaseTmp = arith::MulIOp::create(builder, baseTmp, baseTmp);

  // Pass new values for 'result', 'b' and 'p' to the loop header.

  cf::BranchOp::create(

      builder, ValueRange{newResultTmp, newBaseTmp, newPowerTmp}, loopHeader);

  return funcOp;

}


/// Convert IPowI into a call to a local function implementing

/// the power operation. The local function computes a scalar result,

/// so vector forms of IPowI are linearized.

LogicalResult

IPowIOpLowering::matchAndRewrite(math::IPowIOp op,

                                 PatternRewriter &rewriter) const {

  auto baseType = dyn_cast<IntegerType>(op.getOperands()[0].getType());


  if (!baseType)

    return rewriter.notifyMatchFailure(op, "non-integer base operand");


  // The outlined software implementation must have been already

  // generated.

  func::FuncOp elementFunc = getFuncOpCallback(op, baseType);

  if (!elementFunc)

    return rewriter.notifyMatchFailure(op, "missing software implementation");


  rewriter.replaceOpWithNewOp<func::CallOp>(op, elementFunc, op.getOperands());

  return success();

}


/// Create linkonce_odr function to implement the power function with

/// the given \p funcType type inside \p module. The \p funcType must be

/// 'FloatType (*)(FloatType, IntegerType)' function type.

///

/// template <typename T>

/// Tb __mlir_math_fpowi_*(Tb b, Tp p) {

///   if (p == Tp{0})

///     return Tb{1};

///   bool isNegativePower{p < Tp{0}}

///   bool isMin{p == std::numeric_limits<Tp>::min()};

///   if (isMin) {

///     p = std::numeric_limits<Tp>::max();

///   } else if (isNegativePower) {

///     p = -p;

///   }

///   Tb result = Tb{1};

///   Tb origBase = Tb{b};

///   while (true) {

///     if (p & Tp{1})

///       result *= b;

///     p >>= Tp{1};

///     if (p == Tp{0})

///       break;

///     b *= b;

///   }

///   if (isMin) {

///     result *= origBase;

///   }

///   if (isNegativePower) {

///     result = Tb{1} / result;

///   }

///   return result;

/// }


static func::FuncOp createElementFPowIFunc(ModuleOp *module,

                                           FunctionType funcType) {

  auto baseType = cast<FloatType>(funcType.getInput(0));

  auto powType = cast<IntegerType>(funcType.getInput(1));

  ImplicitLocOpBuilder builder =

      ImplicitLocOpBuilder::atBlockEnd(module->getLoc(), module->getBody());


  std::string funcName("__mlir_math_fpowi");

  llvm::raw_string_ostream nameOS(funcName);

  nameOS << '_' << baseType;

  nameOS << '_' << powType;

  auto funcOp = func::FuncOp::create(builder, funcName, funcType);

  LLVM::linkage::Linkage inlineLinkage = LLVM::linkage::Linkage::LinkonceODR;

  Attribute linkage =

      LLVM::LinkageAttr::get(builder.getContext(), inlineLinkage);

  funcOp->setAttr("llvm.linkage", linkage);

  funcOp.setPrivate();


  Block *entryBlock = funcOp.addEntryBlock();

  Region *funcBody = entryBlock->getParent();


  Value bArg = funcOp.getArgument(0);

  Value pArg = funcOp.getArgument(1);

  builder.setInsertionPointToEnd(entryBlock);

  Value oneBValue = arith::ConstantOp::create(

      builder, baseType, builder.getFloatAttr(baseType, 1.0));

  Value zeroPValue = arith::ConstantOp::create(

      builder, powType, builder.getIntegerAttr(powType, 0));

  Value onePValue = arith::ConstantOp::create(

      builder, powType, builder.getIntegerAttr(powType, 1));

  Value minPValue = arith::ConstantOp::create(

      builder, powType,

      builder.getIntegerAttr(

          powType, llvm::APInt::getSignedMinValue(powType.getWidth())));

  Value maxPValue = arith::ConstantOp::create(

      builder, powType,

      builder.getIntegerAttr(

          powType, llvm::APInt::getSignedMaxValue(powType.getWidth())));


  // if (p == Tp{0})

  //   return Tb{1};

  auto pIsZero = arith::CmpIOp::create(builder, arith::CmpIPredicate::eq, pArg,

                                       zeroPValue);

  Block *thenBlock = builder.createBlock(funcBody);

  func::ReturnOp::create(builder, oneBValue);

  Block *fallthroughBlock = builder.createBlock(funcBody);

  // Set up conditional branch for (p == Tp{0}).

  builder.setInsertionPointToEnd(pIsZero->getBlock());

  cf::CondBranchOp::create(builder, pIsZero, thenBlock, fallthroughBlock);


  builder.setInsertionPointToEnd(fallthroughBlock);

  // bool isNegativePower{p < Tp{0}}

  auto pIsNeg = arith::CmpIOp::create(builder, arith::CmpIPredicate::sle, pArg,

                                      zeroPValue);

  // bool isMin{p == std::numeric_limits<Tp>::min()};

  auto pIsMin =

      arith::CmpIOp::create(builder, arith::CmpIPredicate::eq, pArg, minPValue);


  // if (isMin) {

  //   p = std::numeric_limits<Tp>::max();

  // } else if (isNegativePower) {

  //   p = -p;

  // }

  Value negP = arith::SubIOp::create(builder, zeroPValue, pArg);

  auto pInit = arith::SelectOp::create(builder, pIsNeg, negP, pArg);

  pInit = arith::SelectOp::create(builder, pIsMin, maxPValue, pInit);


  // Tb result = Tb{1};

  // Tb origBase = Tb{b};

  // while (true) {

  //   if (p & Tp{1})

  //     result *= b;

  //   p >>= Tp{1};

  //   if (p == Tp{0})

  //     break;

  //   b *= b;

  // }

  Block *loopHeader = builder.createBlock(

      funcBody, funcBody->end(), {baseType, baseType, powType},

      {builder.getLoc(), builder.getLoc(), builder.getLoc()});

  // Set initial values of 'result', 'b' and 'p' for the loop.

  builder.setInsertionPointToEnd(pInit->getBlock());

  cf::BranchOp::create(builder, loopHeader, ValueRange{oneBValue, bArg, pInit});


  // Create loop body.

  Value resultTmp = loopHeader->getArgument(0);

  Value baseTmp = loopHeader->getArgument(1);

  Value powerTmp = loopHeader->getArgument(2);

  builder.setInsertionPointToEnd(loopHeader);


  //   if (p & Tp{1})

  auto powerTmpIsOdd = arith::CmpIOp::create(

      builder, arith::CmpIPredicate::ne,

      arith::AndIOp::create(builder, powerTmp, onePValue), zeroPValue);

  thenBlock = builder.createBlock(funcBody);

  //     result *= b;

  Value newResultTmp = arith::MulFOp::create(builder, resultTmp, baseTmp);

  fallthroughBlock = builder.createBlock(funcBody, funcBody->end(), baseType,

                                         builder.getLoc());

  builder.setInsertionPointToEnd(thenBlock);

  cf::BranchOp::create(builder, newResultTmp, fallthroughBlock);

  // Set up conditional branch for (p & Tp{1}).

  builder.setInsertionPointToEnd(powerTmpIsOdd->getBlock());

  cf::CondBranchOp::create(builder, powerTmpIsOdd, thenBlock, fallthroughBlock,

                           resultTmp);

  // Merged 'result'.

  newResultTmp = fallthroughBlock->getArgument(0);


  //   p >>= Tp{1};

  builder.setInsertionPointToEnd(fallthroughBlock);

  Value newPowerTmp = arith::ShRUIOp::create(builder, powerTmp, onePValue);


  //   if (p == Tp{0})

  auto newPowerIsZero = arith::CmpIOp::create(builder, arith::CmpIPredicate::eq,

                                              newPowerTmp, zeroPValue);

  //     break;

  //

  // The conditional branch is finalized below with a jump to

  // the loop exit block.

  fallthroughBlock = builder.createBlock(funcBody);


  //   b *= b;

  // }

  builder.setInsertionPointToEnd(fallthroughBlock);

  Value newBaseTmp = arith::MulFOp::create(builder, baseTmp, baseTmp);

  // Pass new values for 'result', 'b' and 'p' to the loop header.

  cf::BranchOp::create(

      builder, ValueRange{newResultTmp, newBaseTmp, newPowerTmp}, loopHeader);


  // Set up conditional branch for early loop exit:

  //   if (p == Tp{0})

  //     break;

  Block *loopExit = builder.createBlock(funcBody, funcBody->end(), baseType,

                                        builder.getLoc());

  builder.setInsertionPointToEnd(newPowerIsZero->getBlock());

  cf::CondBranchOp::create(builder, newPowerIsZero, loopExit, newResultTmp,

                           fallthroughBlock, ValueRange{});


  // if (isMin) {

  //   result *= origBase;

  // }

  newResultTmp = loopExit->getArgument(0);

  thenBlock = builder.createBlock(funcBody);

  fallthroughBlock = builder.createBlock(funcBody, funcBody->end(), baseType,

                                         builder.getLoc());

  builder.setInsertionPointToEnd(loopExit);

  cf::CondBranchOp::create(builder, pIsMin, thenBlock, fallthroughBlock,

                           newResultTmp);

  builder.setInsertionPointToEnd(thenBlock);

  newResultTmp = arith::MulFOp::create(builder, newResultTmp, bArg);

  cf::BranchOp::create(builder, newResultTmp, fallthroughBlock);


  /// if (isNegativePower) {

  ///   result = Tb{1} / result;

  /// }

  newResultTmp = fallthroughBlock->getArgument(0);

  thenBlock = builder.createBlock(funcBody);

  Block *returnBlock = builder.createBlock(funcBody, funcBody->end(), baseType,

                                           builder.getLoc());

  builder.setInsertionPointToEnd(fallthroughBlock);

  cf::CondBranchOp::create(builder, pIsNeg, thenBlock, returnBlock,

                           newResultTmp);

  builder.setInsertionPointToEnd(thenBlock);

  newResultTmp = arith::DivFOp::create(builder, oneBValue, newResultTmp);

  cf::BranchOp::create(builder, newResultTmp, returnBlock);


  // return result;

  builder.setInsertionPointToEnd(returnBlock);

  func::ReturnOp::create(builder, returnBlock->getArgument(0));


  return funcOp;

}


/// Convert FPowI into a call to a local function implementing

/// the power operation. The local function computes a scalar result,

/// so vector forms of FPowI are linearized.

LogicalResult

FPowIOpLowering::matchAndRewrite(math::FPowIOp op,

                                 PatternRewriter &rewriter) const {

  if (isa<VectorType>(op.getType()))

    return rewriter.notifyMatchFailure(op, "non-scalar operation");


  FunctionType funcType = getElementalFuncTypeForOp(op);


  // The outlined software implementation must have been already

  // generated.

  func::FuncOp elementFunc = getFuncOpCallback(op, funcType);

  if (!elementFunc)

    return rewriter.notifyMatchFailure(op, "missing software implementation");


  rewriter.replaceOpWithNewOp<func::CallOp>(op, elementFunc, op.getOperands());

  return success();

}


/// Create function to implement the ctlz function the given \p elementType type

/// inside \p module. The \p elementType must be IntegerType, an the created

/// function has 'IntegerType (*)(IntegerType)' function type.

///

/// template <typename T>

/// T __mlir_math_ctlz_*(T x) {

///     bits = sizeof(x) * 8;

///     if (x == 0)

///       return bits;

///

///     uint32_t n = 0;

///     for (int i = 1; i < bits; ++i) {

///         if (x < 0) continue;

///         n++;

///         x <<= 1;

///     }

///     return n;

/// }

///

/// Converts to (for i32):

///

/// func.func private @__mlir_math_ctlz_i32(%arg: i32) -> i32 {

///   %c_32 = arith.constant 32 : index

///   %c_0 = arith.constant 0 : i32

///   %arg_eq_zero = arith.cmpi eq, %arg, %c_0 : i1

///   %out = scf.if %arg_eq_zero {

///     scf.yield %c_32 : i32

///   } else {

///     %c_1index = arith.constant 1 : index

///     %c_1i32 = arith.constant 1 : i32

///     %n = arith.constant 0 : i32

///     %arg_out, %n_out = scf.for %i = %c_1index to %c_32 step %c_1index

///         iter_args(%arg_iter = %arg, %n_iter = %n) -> (i32, i32) {

///       %cond = arith.cmpi slt, %arg_iter, %c_0 : i32

///       %yield_val = scf.if %cond {

///         scf.yield %arg_iter, %n_iter : i32, i32

///       } else {

///         %arg_next = arith.shli %arg_iter, %c_1i32 : i32

///         %n_next = arith.addi %n_iter, %c_1i32 : i32

///         scf.yield %arg_next, %n_next : i32, i32

///       }

///       scf.yield %yield_val: i32, i32

///     }

///     scf.yield %n_out : i32

///   }

///   return %out: i32

/// }


static func::FuncOp createCtlzFunc(ModuleOp *module, Type elementType) {

  if (!isa<IntegerType>(elementType)) {

    LDBG() << "non-integer element type for CtlzFunc; type was: "

           << elementType;

    llvm_unreachable("non-integer element type");

  }

  int64_t bitWidth = elementType.getIntOrFloatBitWidth();


  Location loc = module->getLoc();

  ImplicitLocOpBuilder builder =

      ImplicitLocOpBuilder::atBlockEnd(loc, module->getBody());


  std::string funcName("__mlir_math_ctlz");

  llvm::raw_string_ostream nameOS(funcName);

  nameOS << '_' << elementType;

  FunctionType funcType =

      FunctionType::get(builder.getContext(), {elementType}, elementType);

  auto funcOp = func::FuncOp::create(builder, funcName, funcType);


  // LinkonceODR ensures that there is only one implementation of this function

  // across all math.ctlz functions that are lowered in this way.

  LLVM::linkage::Linkage inlineLinkage = LLVM::linkage::Linkage::LinkonceODR;

  Attribute linkage =

      LLVM::LinkageAttr::get(builder.getContext(), inlineLinkage);

  funcOp->setAttr("llvm.linkage", linkage);

  funcOp.setPrivate();


  // set the insertion point to the start of the function

  Block *funcBody = funcOp.addEntryBlock();

  builder.setInsertionPointToStart(funcBody);


  Value arg = funcOp.getArgument(0);

  Type indexType = builder.getIndexType();

  Value bitWidthValue = arith::ConstantOp::create(

      builder, elementType, builder.getIntegerAttr(elementType, bitWidth));

  Value zeroValue = arith::ConstantOp::create(

      builder, elementType, builder.getIntegerAttr(elementType, 0));


  Value inputEqZero =

      arith::CmpIOp::create(builder, arith::CmpIPredicate::eq, arg, zeroValue);


  // if input == 0, return bit width, else enter loop.

  scf::IfOp ifOp =

      scf::IfOp::create(builder, elementType, inputEqZero,

                        /*addThenBlock=*/true, /*addElseBlock=*/true);

  auto thenBuilder = ifOp.getThenBodyBuilder();

  scf::YieldOp::create(thenBuilder, loc, bitWidthValue);


  auto elseBuilder =

      ImplicitLocOpBuilder::atBlockEnd(loc, &ifOp.getElseRegion().front());


  Value oneIndex = arith::ConstantOp::create(elseBuilder, indexType,

                                             elseBuilder.getIndexAttr(1));

  Value oneValue = arith::ConstantOp::create(

      elseBuilder, elementType, elseBuilder.getIntegerAttr(elementType, 1));

  Value bitWidthIndex = arith::ConstantOp::create(

      elseBuilder, indexType, elseBuilder.getIndexAttr(bitWidth));

  Value nValue = arith::ConstantOp::create(

      elseBuilder, elementType, elseBuilder.getIntegerAttr(elementType, 0));


  auto loop = scf::ForOp::create(

      elseBuilder, oneIndex, bitWidthIndex, oneIndex,

      // Initial values for two loop induction variables, the arg which is being

      // shifted left in each iteration, and the n value which tracks the count

      // of leading zeros.

      ValueRange{arg, nValue},

      // Callback to build the body of the for loop

      //   if (arg < 0) {

      //     continue;

      //   } else {

      //     n++;

      //     arg <<= 1;

      //   }

      [&](OpBuilder &b, Location loc, Value iv, ValueRange args) {

        Value argIter = args[0];

        Value nIter = args[1];


        Value argIsNonNegative = arith::CmpIOp::create(

            b, loc, arith::CmpIPredicate::slt, argIter, zeroValue);

        scf::IfOp ifOp = scf::IfOp::create(

            b, loc, argIsNonNegative,

            [&](OpBuilder &b, Location loc) {

              // If arg is negative, continue (effectively, break)

              scf::YieldOp::create(b, loc, ValueRange{argIter, nIter});

            },

            [&](OpBuilder &b, Location loc) {

              // Otherwise, increment n and shift arg left.

              Value nNext = arith::AddIOp::create(b, loc, nIter, oneValue);

              Value argNext = arith::ShLIOp::create(b, loc, argIter, oneValue);

              scf::YieldOp::create(b, loc, ValueRange{argNext, nNext});

            });

        scf::YieldOp::create(b, loc, ifOp.getResults());

      });

  scf::YieldOp::create(elseBuilder, loop.getResult(1));


  func::ReturnOp::create(builder, ifOp.getResult(0));

  return funcOp;

}


/// Convert ctlz into a call to a local function implementing the ctlz

/// operation.

LogicalResult CtlzOpLowering::matchAndRewrite(math::CountLeadingZerosOp op,

                                              PatternRewriter &rewriter) const {

  if (isa<VectorType>(op.getType()))

    return rewriter.notifyMatchFailure(op, "non-scalar operation");


  Type type = getElementTypeOrSelf(op.getResult().getType());

  func::FuncOp elementFunc = getFuncOpCallback(op, type);

  if (!elementFunc)

    return rewriter.notifyMatchFailure(op, [&](::mlir::Diagnostic &diag) {

      diag << "Missing software implementation for op " << op->getName()

           << " and type " << type;

    });


  rewriter.replaceOpWithNewOp<func::CallOp>(op, elementFunc, op.getOperand());

  return success();

}


namespace {

struct ConvertMathToFuncsPass

    : public impl::ConvertMathToFuncsBase<ConvertMathToFuncsPass> {

  ConvertMathToFuncsPass() = default;

  ConvertMathToFuncsPass(const ConvertMathToFuncsOptions &options)

      : impl::ConvertMathToFuncsBase<ConvertMathToFuncsPass>(options) {}


  void runOnOperation() override;


private:

  // Return true, if this FPowI operation must be converted

  // because the width of its exponent's type is greater than

  // or equal to minWidthOfFPowIExponent option value.

  bool isFPowIConvertible(math::FPowIOp op);


  // Reture true, if operation is integer type.

  bool isConvertible(Operation *op);


  // Generate outlined implementations for power operations

  // and store them in funcImpls map.

  void generateOpImplementations();


  // A map between pairs of (operation, type) deduced from operations that this

  // pass will convert, and the corresponding outlined software implementations

  // of these operations for the given type.

  DenseMap<std::pair<OperationName, Type>, func::FuncOp> funcImpls;

};

} // namespace


bool ConvertMathToFuncsPass::isFPowIConvertible(math::FPowIOp op) {

  auto expTy =

      dyn_cast<IntegerType>(getElementTypeOrSelf(op.getRhs().getType()));

  return (expTy && expTy.getWidth() >= minWidthOfFPowIExponent);

}


bool ConvertMathToFuncsPass::isConvertible(Operation *op) {

  return isa<IntegerType>(getElementTypeOrSelf(op->getResult(0).getType()));

}


void ConvertMathToFuncsPass::generateOpImplementations() {

  ModuleOp module = getOperation();


  module.walk([&](Operation *op) {

    TypeSwitch<Operation *>(op)

        .Case<math::CountLeadingZerosOp>([&](math::CountLeadingZerosOp op) {

          if (!convertCtlz || !isConvertible(op))

            return;

          Type resultType = getElementTypeOrSelf(op.getResult().getType());


          // Generate the software implementation of this operation,

          // if it has not been generated yet.

          auto key = std::pair(op->getName(), resultType);

          auto entry = funcImpls.try_emplace(key, func::FuncOp{});

          if (entry.second)

            entry.first->second = createCtlzFunc(&module, resultType);

        })

        .Case<math::IPowIOp>([&](math::IPowIOp op) {

          if (!isConvertible(op))

            return;


          Type resultType = getElementTypeOrSelf(op.getResult().getType());


          // Generate the software implementation of this operation,

          // if it has not been generated yet.

          auto key = std::pair(op->getName(), resultType);

          auto entry = funcImpls.try_emplace(key, func::FuncOp{});

          if (entry.second)

            entry.first->second = createElementIPowIFunc(&module, resultType);

        })

        .Case<math::FPowIOp>([&](math::FPowIOp op) {

          if (!isFPowIConvertible(op))

            return;


          FunctionType funcType = getElementalFuncTypeForOp(op);


          // Generate the software implementation of this operation,

          // if it has not been generated yet.

          // FPowI implementations are mapped via the FunctionType

          // created from the operation's result and operands.

          auto key = std::pair(op->getName(), funcType);

          auto entry = funcImpls.try_emplace(key, func::FuncOp{});

          if (entry.second)

            entry.first->second = createElementFPowIFunc(&module, funcType);

        });

  });

}


void ConvertMathToFuncsPass::runOnOperation() {

  ModuleOp module = getOperation();


  // Create outlined implementations for power operations.

  generateOpImplementations();


  RewritePatternSet patterns(&getContext());

  patterns.add<VecOpToScalarOp<math::IPowIOp>, VecOpToScalarOp<math::FPowIOp>,

               VecOpToScalarOp<math::CountLeadingZerosOp>>(

      patterns.getContext());


  // For the given Type Returns FuncOp stored in funcImpls map.

  auto getFuncOpByType = [&](Operation *op, Type type) -> func::FuncOp {

    auto it = funcImpls.find(std::pair(op->getName(), type));

    if (it == funcImpls.end())

      return {};


    return it->second;

  };

  patterns.add<IPowIOpLowering, FPowIOpLowering>(patterns.getContext(),

                                                 getFuncOpByType);


  if (convertCtlz)

    patterns.add<CtlzOpLowering>(patterns.getContext(), getFuncOpByType);


  ConversionTarget target(getContext());

  target.addLegalDialect<arith::ArithDialect, cf::ControlFlowDialect,

                         func::FuncDialect, scf::SCFDialect,

                         vector::VectorDialect>();


  target.addDynamicallyLegalOp<math::IPowIOp>(

      [this](math::IPowIOp op) { return !isConvertible(op); });

  if (convertCtlz) {

    target.addDynamicallyLegalOp<math::CountLeadingZerosOp>(

        [this](math::CountLeadingZerosOp op) { return !isConvertible(op); });

  }

  target.addDynamicallyLegalOp<math::FPowIOp>(

      [this](math::FPowIOp op) { return !isFPowIConvertible(op); });

  if (failed(applyPartialConversion(module, target, std::move(patterns))))

    signalPassFailure();

}

success
return success()

ControlFlowOps.h

DialectConversion.h

FuncOps.h

IndexingUtils.h

LLVMDialect.h

b
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
Definition LinalgTransformOps.cpp:2096

target
target
Definition LinalgTransformOps.cpp:2099

result
result
Definition LinalgTransformOps.cpp:2097

getContext
b getContext())

createElementIPowIFunc
static func::FuncOp createElementIPowIFunc(ModuleOp *module, Type elementType)
Create linkonce_odr function to implement the power function with the given elementType type inside m...
Definition MathToFuncs.cpp:183

getElementalFuncTypeForOp
static FunctionType getElementalFuncTypeForOp(Operation *op)
Definition MathToFuncs.cpp:140

createElementFPowIFunc
static func::FuncOp createElementFPowIFunc(ModuleOp *module, FunctionType funcType)
Create linkonce_odr function to implement the power function with the given funcType type inside modu...
Definition MathToFuncs.cpp:412

createCtlzFunc
static func::FuncOp createCtlzFunc(ModuleOp *module, Type elementType)
Create function to implement the ctlz function the given elementType type inside module.
Definition MathToFuncs.cpp:653

MathToFuncs.h

diag
static std::string diag(const llvm::Value &value)
Definition ModuleImport.cpp:57

options
static llvm::ManagedStatic< PassManagerOptions > options
Definition PassManagerOptions.cpp:89

TypeUtilities.h

VectorOps.h

VectorUtils.h

int64_t

llvm::ArrayRef
Definition LLVM.h:48

llvm::SmallVector
Definition LLVM.h:72

mlir::Attribute
Attributes are known-constant values of operations.
Definition Attributes.h:25

mlir::Block
Block represents an ordered list of Operations.
Definition Block.h:33

mlir::Block::getArgument
BlockArgument getArgument(unsigned i)
Definition Block.h:129

mlir::Block::getParent
Region * getParent() const
Provide a 'getParent' method for ilist_node_with_parent methods.
Definition Block.cpp:27

mlir::Builder::getIntegerAttr
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition Builders.cpp:228

mlir::Builder::getFloatAttr
FloatAttr getFloatAttr(Type type, double value)
Definition Builders.cpp:254

mlir::Builder::getContext
MLIRContext * getContext() const
Definition Builders.h:56

mlir::Builder::getIndexType
IndexType getIndexType()
Definition Builders.cpp:51

mlir::DenseElementsAttr::get
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
Definition BuiltinAttributes.cpp:910

mlir::ImplicitLocOpBuilder
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Definition Builders.h:630

mlir::ImplicitLocOpBuilder::getLoc
Location getLoc() const
Accessors for the implied location.
Definition Builders.h:663

mlir::ImplicitLocOpBuilder::atBlockEnd
static ImplicitLocOpBuilder atBlockEnd(Location loc, Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to after the last operation in the block but still insid...
Definition Builders.h:647

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63

mlir::OpBuilder
This class helps build Operations.
Definition Builders.h:207

mlir::OpBuilder::createBlock
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Definition Builders.cpp:430

mlir::OpBuilder::getBlock
Block * getBlock() const
Returns the current block of the builder.
Definition Builders.h:448

mlir::OpBuilder::setInsertionPointToStart
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition Builders.h:431

mlir::OpBuilder::setInsertionPointToEnd
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition Builders.h:436

mlir::OpState::getLoc
Location getLoc()
The source location the operation was defined or derived from.
Definition OpDefinition.h:129

mlir::Op
This provides public APIs that all operations should have.
Definition OpDefinition.h:1673

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88

mlir::Operation::getResult
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:407

mlir::Operation::operand_type_end
operand_type_iterator operand_type_end()
Definition Operation.h:396

mlir::Operation::getNumOperands
unsigned getNumOperands()
Definition Operation.h:346

mlir::Operation::result_type_end
result_type_iterator result_type_end()
Definition Operation.h:427

mlir::Operation::result_type_begin
result_type_iterator result_type_begin()
Definition Operation.h:426

mlir::Operation::getName
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119

mlir::Operation::getContext
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216

mlir::Operation::getNumResults
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:404

mlir::Operation::operand_type_begin
operand_type_iterator operand_type_begin()
Definition Operation.h:395

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition PatternMatch.h:793

mlir::Region
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26

mlir::Region::end
iterator end()
Definition Region.h:56

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition PatternMatch.cpp:127

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition PatternMatch.h:726

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition PatternMatch.h:529

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74

mlir::Type::getIntOrFloatBitWidth
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition Types.cpp:122

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition Value.h:105

Pass.h

Arith.h

Math.h

SCF.h

mlir::remark::failed
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:561

mlir::shape
Definition ShapeMappingAnalysis.h:20

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::computeStrides
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Definition IndexingUtils.h:47

mlir::delinearize
SmallVector< int64_t > delinearize(int64_t linearIndex, ArrayRef< int64_t > strides)
Given the strides together with a linear index in the dimension space, return the vector-space offset...
Definition IndexingUtils.cpp:97

mlir::getElementTypeOrSelf
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
Definition TypeUtilities.cpp:23

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition GreedyPatternRewriteDriver.h:283

mlir::DenseMap
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
Definition LLVM.h:126

mlir::function_ref
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition PatternMatch.h:314