doxygen/Arith_2Transforms_2EmulateWideInt_8cpp_source.html

 //===- EmulateWideInt.cpp - Wide integer operation emulation ----*- C++ -*-===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Arith/Transforms/Passes.h"


 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/Arith/Transforms/WideIntEmulationConverter.h"

 #include "mlir/Dialect/Arith/Utils/Utils.h"

 #include "mlir/Dialect/Func/IR/FuncOps.h"

 #include "mlir/Dialect/Func/Transforms/FuncConversions.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 #include "mlir/IR/BuiltinTypes.h"

 #include "mlir/IR/TypeUtilities.h"

 #include "mlir/Transforms/DialectConversion.h"

 #include "llvm/ADT/APFloat.h"

 #include "llvm/ADT/APInt.h"

 #include "llvm/Support/FormatVariadic.h"

 #include "llvm/Support/MathExtras.h"

 #include <cassert>


 namespace mlir::arith {

 #define GEN_PASS_DEF_ARITHEMULATEWIDEINT

 #include "mlir/Dialect/Arith/Transforms/Passes.h.inc"

 } // namespace mlir::arith


 using namespace mlir;


 //===----------------------------------------------------------------------===//

 // Common Helper Functions

 //===----------------------------------------------------------------------===//


 /// Returns N bottom and N top bits from `value`, where N = `newBitWidth`.

 /// Treats `value` as a 2*N bits-wide integer.

 /// The bottom bits are returned in the first pair element, while the top bits

 /// in the second one.

 static std::pair<APInt, APInt> getHalves(const APInt &value,

                                          unsigned newBitWidth) {

   APInt low = value.extractBits(newBitWidth, 0);

   APInt high = value.extractBits(newBitWidth, newBitWidth);

   return {std::move(low), std::move(high)};

 }


 /// Returns the type with the last (innermost) dimension reduced to x1.

 /// Scalarizes 1D vector inputs to match how we extract/insert vector values,

 /// e.g.:

 ///   - vector<3x2xi16> --> vector<3x1xi16>

 ///   - vector<2xi16>   --> i16

 static Type reduceInnermostDim(VectorType type) {

   if (type.getShape().size() == 1)

     return type.getElementType();


   auto newShape = to_vector(type.getShape());

   newShape.back() = 1;

   return VectorType::get(newShape, type.getElementType());

 }


 /// Extracts the `input` vector slice with elements at the last dimension offset

 /// by `lastOffset`. Returns a value of vector type with the last dimension

 /// reduced to x1 or fully scalarized, e.g.:

 ///   - vector<3x2xi16> --> vector<3x1xi16>

 ///   - vector<2xi16>   --> i16

 static Value extractLastDimSlice(ConversionPatternRewriter &rewriter,

                                  Location loc, Value input,

                                  int64_t lastOffset) {

   ArrayRef<int64_t> shape = cast<VectorType>(input.getType()).getShape();

   assert(lastOffset < shape.back() && "Offset out of bounds");


   // Scalarize the result in case of 1D vectors.

   if (shape.size() == 1)

     return rewriter.create<vector::ExtractOp>(loc, input, lastOffset);


   SmallVector<int64_t> offsets(shape.size(), 0);

   offsets.back() = lastOffset;

   auto sizes = llvm::to_vector(shape);

   sizes.back() = 1;

   SmallVector<int64_t> strides(shape.size(), 1);


   return rewriter.create<vector::ExtractStridedSliceOp>(loc, input, offsets,

                                                         sizes, strides);

 }


 /// Extracts two vector slices from the `input` whose type is `vector<...x2T>`,

 /// with the first element at offset 0 and the second element at offset 1.

 static std::pair<Value, Value>

 extractLastDimHalves(ConversionPatternRewriter &rewriter, Location loc,

                      Value input) {

   return {extractLastDimSlice(rewriter, loc, input, 0),

           extractLastDimSlice(rewriter, loc, input, 1)};

 }


 // Performs a vector shape cast to drop the trailing x1 dimension. If the

 // `input` is a scalar, this is a noop.

 static Value dropTrailingX1Dim(ConversionPatternRewriter &rewriter,

                                Location loc, Value input) {

   auto vecTy = dyn_cast<VectorType>(input.getType());

   if (!vecTy)

     return input;


   // Shape cast to drop the last x1 dimension.

   ArrayRef<int64_t> shape = vecTy.getShape();

   assert(shape.size() >= 2 && "Expected vector with at list two dims");

   assert(shape.back() == 1 && "Expected the last vector dim to be x1");


   auto newVecTy = VectorType::get(shape.drop_back(), vecTy.getElementType());

   return rewriter.create<vector::ShapeCastOp>(loc, newVecTy, input);

 }


 /// Performs a vector shape cast to append an x1 dimension. If the

 /// `input` is a scalar, this is a noop.

 static Value appendX1Dim(ConversionPatternRewriter &rewriter, Location loc,

                          Value input) {

   auto vecTy = dyn_cast<VectorType>(input.getType());

   if (!vecTy)

     return input;


   // Add a trailing x1 dim.

   auto newShape = llvm::to_vector(vecTy.getShape());

   newShape.push_back(1);

   auto newTy = VectorType::get(newShape, vecTy.getElementType());

   return rewriter.create<vector::ShapeCastOp>(loc, newTy, input);

 }


 /// Inserts the `source` vector slice into the `dest` vector at offset

 /// `lastOffset` in the last dimension. `source` can be a scalar when `dest` is

 /// a 1D vector.

 static Value insertLastDimSlice(ConversionPatternRewriter &rewriter,

                                 Location loc, Value source, Value dest,

                                 int64_t lastOffset) {

   ArrayRef<int64_t> shape = cast<VectorType>(dest.getType()).getShape();

   assert(lastOffset < shape.back() && "Offset out of bounds");


   // Handle scalar source.

   if (isa<IntegerType>(source.getType()))

     return rewriter.create<vector::InsertOp>(loc, source, dest, lastOffset);


   SmallVector<int64_t> offsets(shape.size(), 0);

   offsets.back() = lastOffset;

   SmallVector<int64_t> strides(shape.size(), 1);

   return rewriter.create<vector::InsertStridedSliceOp>(loc, source, dest,

                                                        offsets, strides);

 }


 /// Constructs a new vector of type `resultType` by creating a series of

 /// insertions of `resultComponents`, each at the next offset of the last vector

 /// dimension.

 /// When all `resultComponents` are scalars, the result type is `vector<NxT>`;

 /// when `resultComponents` are `vector<...x1xT>`s, the result type is

 /// `vector<...xNxT>`, where `N` is the number of `resultComponents`.

 static Value constructResultVector(ConversionPatternRewriter &rewriter,

                                    Location loc, VectorType resultType,

                                    ValueRange resultComponents) {

   llvm::ArrayRef<int64_t> resultShape = resultType.getShape();

   (void)resultShape;

   assert(!resultShape.empty() && "Result expected to have dimensions");

   assert(resultShape.back() == static_cast<int64_t>(resultComponents.size()) &&

          "Wrong number of result components");


   Value resultVec = createScalarOrSplatConstant(rewriter, loc, resultType, 0);

   for (auto [i, component] : llvm::enumerate(resultComponents))

     resultVec = insertLastDimSlice(rewriter, loc, component, resultVec, i);


   return resultVec;

 }


 namespace {

 //===----------------------------------------------------------------------===//

 // ConvertConstant

 //===----------------------------------------------------------------------===//


 struct ConvertConstant final : OpConversionPattern<arith::ConstantOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::ConstantOp op, OpAdaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Type oldType = op.getType();

     auto newType = getTypeConverter()->convertType<VectorType>(oldType);

     if (!newType)

       return rewriter.notifyMatchFailure(

           op, llvm::formatv("unsupported type: {0}", op.getType()));


     unsigned newBitWidth = newType.getElementTypeBitWidth();

     Attribute oldValue = op.getValueAttr();


     if (auto intAttr = dyn_cast<IntegerAttr>(oldValue)) {

       auto [low, high] = getHalves(intAttr.getValue(), newBitWidth);

       auto newAttr = DenseElementsAttr::get(newType, {low, high});

       rewriter.replaceOpWithNewOp<arith::ConstantOp>(op, newAttr);

       return success();

     }


     if (auto splatAttr = dyn_cast<SplatElementsAttr>(oldValue)) {

       auto [low, high] =

           getHalves(splatAttr.getSplatValue<APInt>(), newBitWidth);

       int64_t numSplatElems = splatAttr.getNumElements();

       SmallVector<APInt> values;

       values.reserve(numSplatElems * 2);

       for (int64_t i = 0; i < numSplatElems; ++i) {

         values.push_back(low);

         values.push_back(high);

       }


       auto attr = DenseElementsAttr::get(newType, values);

       rewriter.replaceOpWithNewOp<arith::ConstantOp>(op, attr);

       return success();

     }


     if (auto elemsAttr = dyn_cast<DenseElementsAttr>(oldValue)) {

       int64_t numElems = elemsAttr.getNumElements();

       SmallVector<APInt> values;

       values.reserve(numElems * 2);

       for (const APInt &origVal : elemsAttr.getValues<APInt>()) {

         auto [low, high] = getHalves(origVal, newBitWidth);

         values.push_back(std::move(low));

         values.push_back(std::move(high));

       }


       auto attr = DenseElementsAttr::get(newType, values);

       rewriter.replaceOpWithNewOp<arith::ConstantOp>(op, attr);

       return success();

     }


     return rewriter.notifyMatchFailure(op.getLoc(),

                                        "unhandled constant attribute");

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertAddI

 //===----------------------------------------------------------------------===//


 struct ConvertAddI final : OpConversionPattern<arith::AddIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::AddIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();

     auto newTy = getTypeConverter()->convertType<VectorType>(op.getType());

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     Type newElemTy = reduceInnermostDim(newTy);


     auto [lhsElem0, lhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getLhs());

     auto [rhsElem0, rhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getRhs());


     auto lowSum =

         rewriter.create<arith::AddUIExtendedOp>(loc, lhsElem0, rhsElem0);

     Value overflowVal =

         rewriter.create<arith::ExtUIOp>(loc, newElemTy, lowSum.getOverflow());


     Value high0 = rewriter.create<arith::AddIOp>(loc, overflowVal, lhsElem1);

     Value high = rewriter.create<arith::AddIOp>(loc, high0, rhsElem1);


     Value resultVec =

         constructResultVector(rewriter, loc, newTy, {lowSum.getSum(), high});

     rewriter.replaceOp(op, resultVec);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertBitwiseBinary

 //===----------------------------------------------------------------------===//


 /// Conversion pattern template for bitwise binary ops, e.g., `arith.andi`.

 template <typename BinaryOp>

 struct ConvertBitwiseBinary final : OpConversionPattern<BinaryOp> {

   using OpConversionPattern<BinaryOp>::OpConversionPattern;

   using OpAdaptor = typename OpConversionPattern<BinaryOp>::OpAdaptor;


   LogicalResult

   matchAndRewrite(BinaryOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();

     auto newTy = this->getTypeConverter()->template convertType<VectorType>(

         op.getType());

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     auto [lhsElem0, lhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getLhs());

     auto [rhsElem0, rhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getRhs());


     Value resElem0 = rewriter.create<BinaryOp>(loc, lhsElem0, rhsElem0);

     Value resElem1 = rewriter.create<BinaryOp>(loc, lhsElem1, rhsElem1);

     Value resultVec =

         constructResultVector(rewriter, loc, newTy, {resElem0, resElem1});

     rewriter.replaceOp(op, resultVec);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertCmpI

 //===----------------------------------------------------------------------===//


 /// Returns the matching unsigned version of the given predicate `pred`, or the

 /// same predicate if `pred` is not a signed.

 static arith::CmpIPredicate toUnsignedPredicate(arith::CmpIPredicate pred) {

   using P = arith::CmpIPredicate;

   switch (pred) {

   case P::sge:

     return P::uge;

   case P::sgt:

     return P::ugt;

   case P::sle:

     return P::ule;

   case P::slt:

     return P::ult;

   default:

     return pred;

   }

 }


 struct ConvertCmpI final : OpConversionPattern<arith::CmpIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::CmpIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();

     auto inputTy =

         getTypeConverter()->convertType<VectorType>(op.getLhs().getType());

     if (!inputTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     arith::CmpIPredicate highPred = adaptor.getPredicate();

     arith::CmpIPredicate lowPred = toUnsignedPredicate(highPred);


     auto [lhsElem0, lhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getLhs());

     auto [rhsElem0, rhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getRhs());


     Value lowCmp =

         rewriter.create<arith::CmpIOp>(loc, lowPred, lhsElem0, rhsElem0);

     Value highCmp =

         rewriter.create<arith::CmpIOp>(loc, highPred, lhsElem1, rhsElem1);


     Value cmpResult{};

     switch (highPred) {

     case arith::CmpIPredicate::eq: {

       cmpResult = rewriter.create<arith::AndIOp>(loc, lowCmp, highCmp);

       break;

     }

     case arith::CmpIPredicate::ne: {

       cmpResult = rewriter.create<arith::OrIOp>(loc, lowCmp, highCmp);

       break;

     }

     default: {

       // Handle inequality checks.

       Value highEq = rewriter.create<arith::CmpIOp>(

           loc, arith::CmpIPredicate::eq, lhsElem1, rhsElem1);

       cmpResult =

           rewriter.create<arith::SelectOp>(loc, highEq, lowCmp, highCmp);

       break;

     }

     }


     assert(cmpResult && "Unhandled case");

     rewriter.replaceOp(op, dropTrailingX1Dim(rewriter, loc, cmpResult));

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMulI

 //===----------------------------------------------------------------------===//


 struct ConvertMulI final : OpConversionPattern<arith::MulIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::MulIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();

     auto newTy = getTypeConverter()->convertType<VectorType>(op.getType());

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     auto [lhsElem0, lhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getLhs());

     auto [rhsElem0, rhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getRhs());


     // The multiplication algorithm used is the standard (long) multiplication.

     // Multiplying two i2N integers produces (at most) an i4N result, but

     // because the calculation of top i2N is not necessary, we omit it.

     auto mulLowLow =

         rewriter.create<arith::MulUIExtendedOp>(loc, lhsElem0, rhsElem0);

     Value mulLowHi = rewriter.create<arith::MulIOp>(loc, lhsElem0, rhsElem1);

     Value mulHiLow = rewriter.create<arith::MulIOp>(loc, lhsElem1, rhsElem0);


     Value resLow = mulLowLow.getLow();

     Value resHi =

         rewriter.create<arith::AddIOp>(loc, mulLowLow.getHigh(), mulLowHi);

     resHi = rewriter.create<arith::AddIOp>(loc, resHi, mulHiLow);


     Value resultVec =

         constructResultVector(rewriter, loc, newTy, {resLow, resHi});

     rewriter.replaceOp(op, resultVec);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertExtSI

 //===----------------------------------------------------------------------===//


 struct ConvertExtSI final : OpConversionPattern<arith::ExtSIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::ExtSIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();

     auto newTy = getTypeConverter()->convertType<VectorType>(op.getType());

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     Type newResultComponentTy = reduceInnermostDim(newTy);


     // Sign-extend the input value to determine the low half of the result.

     // Then, check if the low half is negative, and sign-extend the comparison

     // result to get the high half.

     Value newOperand = appendX1Dim(rewriter, loc, adaptor.getIn());

     Value extended = rewriter.createOrFold<arith::ExtSIOp>(

         loc, newResultComponentTy, newOperand);

     Value operandZeroCst =

         createScalarOrSplatConstant(rewriter, loc, newResultComponentTy, 0);

     Value signBit = rewriter.create<arith::CmpIOp>(

         loc, arith::CmpIPredicate::slt, extended, operandZeroCst);

     Value signValue =

         rewriter.create<arith::ExtSIOp>(loc, newResultComponentTy, signBit);


     Value resultVec =

         constructResultVector(rewriter, loc, newTy, {extended, signValue});

     rewriter.replaceOp(op, resultVec);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertExtUI

 //===----------------------------------------------------------------------===//


 struct ConvertExtUI final : OpConversionPattern<arith::ExtUIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::ExtUIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();

     auto newTy = getTypeConverter()->convertType<VectorType>(op.getType());

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     Type newResultComponentTy = reduceInnermostDim(newTy);


     // Zero-extend the input value to determine the low half of the result.

     // The high half is always zero.

     Value newOperand = appendX1Dim(rewriter, loc, adaptor.getIn());

     Value extended = rewriter.createOrFold<arith::ExtUIOp>(

         loc, newResultComponentTy, newOperand);

     Value zeroCst = createScalarOrSplatConstant(rewriter, loc, newTy, 0);

     Value newRes = insertLastDimSlice(rewriter, loc, extended, zeroCst, 0);

     rewriter.replaceOp(op, newRes);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMaxMin

 //===----------------------------------------------------------------------===//


 template <typename SourceOp, arith::CmpIPredicate CmpPred>

 struct ConvertMaxMin final : OpConversionPattern<SourceOp> {

   using OpConversionPattern<SourceOp>::OpConversionPattern;


   LogicalResult

   matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();


     Type oldTy = op.getType();

     auto newTy = dyn_cast_or_null<VectorType>(

         this->getTypeConverter()->convertType(oldTy));

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     // Rewrite Max*I/Min*I as compare and select over original operands. Let

     // the CmpI and Select emulation patterns handle the final legalization.

     Value cmp =

         rewriter.create<arith::CmpIOp>(loc, CmpPred, op.getLhs(), op.getRhs());

     rewriter.replaceOpWithNewOp<arith::SelectOp>(op, cmp, op.getLhs(),

                                                  op.getRhs());

     return success();

   }

 };


 // Convert IndexCast ops

 //===----------------------------------------------------------------------===//


 /// Returns true iff the type is `index` or `vector<...index>`.

 static bool isIndexOrIndexVector(Type type) {

   if (isa<IndexType>(type))

     return true;


   if (auto vectorTy = dyn_cast<VectorType>(type))

     if (isa<IndexType>(vectorTy.getElementType()))

       return true;


   return false;

 }


 template <typename CastOp>

 struct ConvertIndexCastIntToIndex final : OpConversionPattern<CastOp> {

   using OpConversionPattern<CastOp>::OpConversionPattern;


   LogicalResult

   matchAndRewrite(CastOp op, typename CastOp::Adaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Type resultType = op.getType();

     if (!isIndexOrIndexVector(resultType))

       return failure();


     Location loc = op.getLoc();

     Type inType = op.getIn().getType();

     auto newInTy =

         this->getTypeConverter()->template convertType<VectorType>(inType);

     if (!newInTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", inType));


     // Discard the high half of the input truncating the original value.

     Value extracted = extractLastDimSlice(rewriter, loc, adaptor.getIn(), 0);

     extracted = dropTrailingX1Dim(rewriter, loc, extracted);

     rewriter.replaceOpWithNewOp<CastOp>(op, resultType, extracted);

     return success();

   }

 };


 template <typename CastOp, typename ExtensionOp>

 struct ConvertIndexCastIndexToInt final : OpConversionPattern<CastOp> {

   using OpConversionPattern<CastOp>::OpConversionPattern;


   LogicalResult

   matchAndRewrite(CastOp op, typename CastOp::Adaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Type inType = op.getIn().getType();

     if (!isIndexOrIndexVector(inType))

       return failure();


     Location loc = op.getLoc();

     auto *typeConverter =

         this->template getTypeConverter<arith::WideIntEmulationConverter>();


     Type resultType = op.getType();

     auto newTy = typeConverter->template convertType<VectorType>(resultType);

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", resultType));


     // Emit an index cast over the matching narrow type.

     Type narrowTy =

         rewriter.getIntegerType(typeConverter->getMaxTargetIntBitWidth());

     if (auto vecTy = dyn_cast<VectorType>(resultType))

       narrowTy = VectorType::get(vecTy.getShape(), narrowTy);


     // Sign or zero-extend the result. Let the matching conversion pattern

     // legalize the extension op.

     Value underlyingVal =

         rewriter.create<CastOp>(loc, narrowTy, adaptor.getIn());

     rewriter.replaceOpWithNewOp<ExtensionOp>(op, resultType, underlyingVal);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertSelect

 //===----------------------------------------------------------------------===//


 struct ConvertSelect final : OpConversionPattern<arith::SelectOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::SelectOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();

     auto newTy = getTypeConverter()->convertType<VectorType>(op.getType());

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     auto [trueElem0, trueElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getTrueValue());

     auto [falseElem0, falseElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getFalseValue());

     Value cond = appendX1Dim(rewriter, loc, adaptor.getCondition());


     Value resElem0 =

         rewriter.create<arith::SelectOp>(loc, cond, trueElem0, falseElem0);

     Value resElem1 =

         rewriter.create<arith::SelectOp>(loc, cond, trueElem1, falseElem1);

     Value resultVec =

         constructResultVector(rewriter, loc, newTy, {resElem0, resElem1});

     rewriter.replaceOp(op, resultVec);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertShLI

 //===----------------------------------------------------------------------===//


 struct ConvertShLI final : OpConversionPattern<arith::ShLIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::ShLIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();


     Type oldTy = op.getType();

     auto newTy = getTypeConverter()->convertType<VectorType>(oldTy);

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     Type newOperandTy = reduceInnermostDim(newTy);

     // `oldBitWidth` == `2 * newBitWidth`

     unsigned newBitWidth = newTy.getElementTypeBitWidth();


     auto [lhsElem0, lhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getLhs());

     Value rhsElem0 = extractLastDimSlice(rewriter, loc, adaptor.getRhs(), 0);


     // Assume that the shift amount is < 2 * newBitWidth. Calculate the low and

     // high halves of the results separately:

     //   1. low := LHS.low shli RHS

     //

     //   2. high := a or b or c, where:

     //     a) Bits from LHS.high, shifted by the RHS.

     //     b) Bits from LHS.low, shifted right. These come into play when

     //        RHS < newBitWidth, e.g.:

     //         [0000][llll] shli 3 --> [0lll][l000]

     //                                    ^

     //                                    |

     //                           [llll] shrui (4 - 3)

     //     c) Bits from LHS.low, shifted left. These matter when

     //        RHS > newBitWidth, e.g.:

     //         [0000][llll] shli 7 --> [l000][0000]

     //                                   ^

     //                                   |

     //                          [llll] shli (7 - 4)

     //

     // Because shifts by values >= newBitWidth are undefined, we ignore the high

     // half of RHS, and introduce 'bounds checks' to account for

     // RHS.low > newBitWidth.

     //

     // TODO: Explore possible optimizations.

     Value zeroCst = createScalarOrSplatConstant(rewriter, loc, newOperandTy, 0);

     Value elemBitWidth =

         createScalarOrSplatConstant(rewriter, loc, newOperandTy, newBitWidth);


     Value illegalElemShift = rewriter.create<arith::CmpIOp>(

         loc, arith::CmpIPredicate::uge, rhsElem0, elemBitWidth);


     Value shiftedElem0 =

         rewriter.create<arith::ShLIOp>(loc, lhsElem0, rhsElem0);

     Value resElem0 = rewriter.create<arith::SelectOp>(loc, illegalElemShift,

                                                       zeroCst, shiftedElem0);


     Value cappedShiftAmount = rewriter.create<arith::SelectOp>(

         loc, illegalElemShift, elemBitWidth, rhsElem0);

     Value rightShiftAmount =

         rewriter.create<arith::SubIOp>(loc, elemBitWidth, cappedShiftAmount);

     Value shiftedRight =

         rewriter.create<arith::ShRUIOp>(loc, lhsElem0, rightShiftAmount);

     Value overshotShiftAmount =

         rewriter.create<arith::SubIOp>(loc, rhsElem0, elemBitWidth);

     Value shiftedLeft =

         rewriter.create<arith::ShLIOp>(loc, lhsElem0, overshotShiftAmount);


     Value shiftedElem1 =

         rewriter.create<arith::ShLIOp>(loc, lhsElem1, rhsElem0);

     Value resElem1High = rewriter.create<arith::SelectOp>(

         loc, illegalElemShift, zeroCst, shiftedElem1);

     Value resElem1Low = rewriter.create<arith::SelectOp>(

         loc, illegalElemShift, shiftedLeft, shiftedRight);

     Value resElem1 =

         rewriter.create<arith::OrIOp>(loc, resElem1Low, resElem1High);


     Value resultVec =

         constructResultVector(rewriter, loc, newTy, {resElem0, resElem1});

     rewriter.replaceOp(op, resultVec);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertShRUI

 //===----------------------------------------------------------------------===//


 struct ConvertShRUI final : OpConversionPattern<arith::ShRUIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::ShRUIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();


     Type oldTy = op.getType();

     auto newTy = getTypeConverter()->convertType<VectorType>(oldTy);

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     Type newOperandTy = reduceInnermostDim(newTy);

     // `oldBitWidth` == `2 * newBitWidth`

     unsigned newBitWidth = newTy.getElementTypeBitWidth();


     auto [lhsElem0, lhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getLhs());

     Value rhsElem0 = extractLastDimSlice(rewriter, loc, adaptor.getRhs(), 0);


     // Assume that the shift amount is < 2 * newBitWidth. Calculate the low and

     // high halves of the results separately:

     //   1. low := a or b or c, where:

     //     a) Bits from LHS.low, shifted by the RHS.

     //     b) Bits from LHS.high, shifted left. These matter when

     //        RHS < newBitWidth, e.g.:

     //         [hhhh][0000] shrui 3 --> [000h][hhh0]

     //                                          ^

     //                                          |

     //                                 [hhhh] shli (4 - 1)

     //     c) Bits from LHS.high, shifted right. These come into play when

     //        RHS > newBitWidth, e.g.:

     //         [hhhh][0000] shrui 7 --> [0000][000h]

     //                                          ^

     //                                          |

     //                                 [hhhh] shrui (7 - 4)

     //

     //   2. high := LHS.high shrui RHS

     //

     // Because shifts by values >= newBitWidth are undefined, we ignore the high

     // half of RHS, and introduce 'bounds checks' to account for

     // RHS.low > newBitWidth.

     //

     // TODO: Explore possible optimizations.

     Value zeroCst = createScalarOrSplatConstant(rewriter, loc, newOperandTy, 0);

     Value elemBitWidth =

         createScalarOrSplatConstant(rewriter, loc, newOperandTy, newBitWidth);


     Value illegalElemShift = rewriter.create<arith::CmpIOp>(

         loc, arith::CmpIPredicate::uge, rhsElem0, elemBitWidth);


     Value shiftedElem0 =

         rewriter.create<arith::ShRUIOp>(loc, lhsElem0, rhsElem0);

     Value resElem0Low = rewriter.create<arith::SelectOp>(loc, illegalElemShift,

                                                          zeroCst, shiftedElem0);

     Value shiftedElem1 =

         rewriter.create<arith::ShRUIOp>(loc, lhsElem1, rhsElem0);

     Value resElem1 = rewriter.create<arith::SelectOp>(loc, illegalElemShift,

                                                       zeroCst, shiftedElem1);


     Value cappedShiftAmount = rewriter.create<arith::SelectOp>(

         loc, illegalElemShift, elemBitWidth, rhsElem0);

     Value leftShiftAmount =

         rewriter.create<arith::SubIOp>(loc, elemBitWidth, cappedShiftAmount);

     Value shiftedLeft =

         rewriter.create<arith::ShLIOp>(loc, lhsElem1, leftShiftAmount);

     Value overshotShiftAmount =

         rewriter.create<arith::SubIOp>(loc, rhsElem0, elemBitWidth);

     Value shiftedRight =

         rewriter.create<arith::ShRUIOp>(loc, lhsElem1, overshotShiftAmount);


     Value resElem0High = rewriter.create<arith::SelectOp>(

         loc, illegalElemShift, shiftedRight, shiftedLeft);

     Value resElem0 =

         rewriter.create<arith::OrIOp>(loc, resElem0Low, resElem0High);


     Value resultVec =

         constructResultVector(rewriter, loc, newTy, {resElem0, resElem1});

     rewriter.replaceOp(op, resultVec);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertShRSI

 //===----------------------------------------------------------------------===//


 struct ConvertShRSI final : OpConversionPattern<arith::ShRSIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::ShRSIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();


     Type oldTy = op.getType();

     auto newTy = getTypeConverter()->convertType<VectorType>(oldTy);

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", op.getType()));


     Value lhsElem1 = extractLastDimSlice(rewriter, loc, adaptor.getLhs(), 1);

     Value rhsElem0 = extractLastDimSlice(rewriter, loc, adaptor.getRhs(), 0);


     Type narrowTy = rhsElem0.getType();

     int64_t origBitwidth = newTy.getElementTypeBitWidth() * 2;


     // Rewrite this as an bitwise or of `arith.shrui` and sign extension bits.

     // Perform as many ops over the narrow integer type as possible and let the

     // other emulation patterns convert the rest.

     Value elemZero = createScalarOrSplatConstant(rewriter, loc, narrowTy, 0);

     Value signBit = rewriter.create<arith::CmpIOp>(

         loc, arith::CmpIPredicate::slt, lhsElem1, elemZero);

     signBit = dropTrailingX1Dim(rewriter, loc, signBit);


     // Create a bit pattern of either all ones or all zeros. Then shift it left

     // to calculate the sign extension bits created by shifting the original

     // sign bit right.

     Value allSign = rewriter.create<arith::ExtSIOp>(loc, oldTy, signBit);

     Value maxShift =

         createScalarOrSplatConstant(rewriter, loc, narrowTy, origBitwidth);

     Value numNonSignExtBits =

         rewriter.create<arith::SubIOp>(loc, maxShift, rhsElem0);

     numNonSignExtBits = dropTrailingX1Dim(rewriter, loc, numNonSignExtBits);

     numNonSignExtBits =

         rewriter.create<arith::ExtUIOp>(loc, oldTy, numNonSignExtBits);

     Value signBits =

         rewriter.create<arith::ShLIOp>(loc, allSign, numNonSignExtBits);


     // Use original arguments to create the right shift.

     Value shrui =

         rewriter.create<arith::ShRUIOp>(loc, op.getLhs(), op.getRhs());

     Value shrsi = rewriter.create<arith::OrIOp>(loc, shrui, signBits);


     // Handle shifting by zero. This is necessary when the `signBits` shift is

     // invalid.

     Value isNoop = rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,

                                                   rhsElem0, elemZero);

     isNoop = dropTrailingX1Dim(rewriter, loc, isNoop);

     rewriter.replaceOpWithNewOp<arith::SelectOp>(op, isNoop, op.getLhs(),

                                                  shrsi);


     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertSubI

 //===----------------------------------------------------------------------===//


 struct ConvertSubI final : OpConversionPattern<arith::SubIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::SubIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op->getLoc();

     auto newTy = getTypeConverter()->convertType<VectorType>(op.getType());

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {}", op.getType()));


     Type newElemTy = reduceInnermostDim(newTy);


     auto [lhsElem0, lhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getLhs());

     auto [rhsElem0, rhsElem1] =

         extractLastDimHalves(rewriter, loc, adaptor.getRhs());


     // Emulates LHS - RHS by [LHS0 - RHS0, LHS1 - RHS1 - CARRY] where

     // CARRY is 1 or 0.

     Value low = rewriter.create<arith::SubIOp>(loc, lhsElem0, rhsElem0);

     // We have a carry if lhsElem0 < rhsElem0.

     Value carry0 = rewriter.create<arith::CmpIOp>(

         loc, arith::CmpIPredicate::ult, lhsElem0, rhsElem0);

     Value carryVal = rewriter.create<arith::ExtUIOp>(loc, newElemTy, carry0);


     Value high0 = rewriter.create<arith::SubIOp>(loc, lhsElem1, carryVal);

     Value high = rewriter.create<arith::SubIOp>(loc, high0, rhsElem1);


     Value resultVec = constructResultVector(rewriter, loc, newTy, {low, high});

     rewriter.replaceOp(op, resultVec);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertSIToFP

 //===----------------------------------------------------------------------===//


 struct ConvertSIToFP final : OpConversionPattern<arith::SIToFPOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::SIToFPOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op.getLoc();


     Value in = op.getIn();

     Type oldTy = in.getType();

     auto newTy = getTypeConverter()->convertType<VectorType>(oldTy);

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", oldTy));


     Value zeroCst = createScalarOrSplatConstant(rewriter, loc, oldTy, 0);


     // To avoid operating on very large unsigned numbers, perform the

     // conversion on the absolute value. Then, decide whether to negate the

     // result or not based on that sign bit. We implement negation by

     // subtracting from zero. Note that this relies on the the other conversion

     // patterns to legalize created ops and narrow the bit widths.

     Value isNeg = rewriter.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt,

                                                  in, zeroCst);

     Value neg = rewriter.create<arith::SubIOp>(loc, zeroCst, in);

     Value abs = rewriter.create<arith::SelectOp>(loc, isNeg, neg, in);


     Value absResult = rewriter.create<arith::UIToFPOp>(loc, op.getType(), abs);

     Value negResult = rewriter.create<arith::NegFOp>(loc, absResult);

     rewriter.replaceOpWithNewOp<arith::SelectOp>(op, isNeg, negResult,

                                                  absResult);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertUIToFP

 //===----------------------------------------------------------------------===//


 struct ConvertUIToFP final : OpConversionPattern<arith::UIToFPOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::UIToFPOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op.getLoc();


     Type oldTy = op.getIn().getType();

     auto newTy = getTypeConverter()->convertType<VectorType>(oldTy);

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {0}", oldTy));

     unsigned newBitWidth = newTy.getElementTypeBitWidth();


     auto [low, hi] = extractLastDimHalves(rewriter, loc, adaptor.getIn());

     Value lowInt = dropTrailingX1Dim(rewriter, loc, low);

     Value hiInt = dropTrailingX1Dim(rewriter, loc, hi);

     Value zeroCst =

         createScalarOrSplatConstant(rewriter, loc, hiInt.getType(), 0);


     // The final result has the following form:

     //   if (hi == 0) return uitofp(low)

     //   else         return uitofp(low) + uitofp(hi) * 2^BW

     //

     // where `BW` is the bitwidth of the narrowed integer type. We emit a

     // select to make it easier to fold-away the `hi` part calculation when it

     // is known to be zero.

     //

     // Note 1: The emulation is precise only for input values that have exact

     // integer representation in the result floating point type, and may lead

     // loss of precision otherwise.

     //

     // Note 2: We do not strictly need the `hi == 0`, case, but it makes

     // constant folding easier.

     Value hiEqZero = rewriter.create<arith::CmpIOp>(

         loc, arith::CmpIPredicate::eq, hiInt, zeroCst);


     Type resultTy = op.getType();

     Type resultElemTy = getElementTypeOrSelf(resultTy);

     Value lowFp = rewriter.create<arith::UIToFPOp>(loc, resultTy, lowInt);

     Value hiFp = rewriter.create<arith::UIToFPOp>(loc, resultTy, hiInt);


     int64_t pow2Int = int64_t(1) << newBitWidth;

     TypedAttr pow2Attr =

         rewriter.getFloatAttr(resultElemTy, static_cast<double>(pow2Int));

     if (auto vecTy = dyn_cast<VectorType>(resultTy))

       pow2Attr = SplatElementsAttr::get(vecTy, pow2Attr);


     Value pow2Val = rewriter.create<arith::ConstantOp>(loc, resultTy, pow2Attr);


     Value hiVal = rewriter.create<arith::MulFOp>(loc, hiFp, pow2Val);

     Value result = rewriter.create<arith::AddFOp>(loc, lowFp, hiVal);


     rewriter.replaceOpWithNewOp<arith::SelectOp>(op, hiEqZero, lowFp, result);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertFPToSI

 //===----------------------------------------------------------------------===//


 struct ConvertFPToSI final : OpConversionPattern<arith::FPToSIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::FPToSIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op.getLoc();

     // Get the input float type.

     Value inFp = adaptor.getIn();

     Type fpTy = inFp.getType();


     Type intTy = op.getType();


     auto newTy = getTypeConverter()->convertType<VectorType>(intTy);

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {}", intTy));


     // Work on the absolute value and then convert the result to signed integer.

     // Defer absolute value to fptoui. If minSInt < fp < maxSInt, i.e. if the fp

     // is representable in signed i2N, emits the correct result. Else, the

     // result is UB.


     TypedAttr zeroAttr = rewriter.getZeroAttr(fpTy);

     Value zeroCst = rewriter.create<arith::ConstantOp>(loc, zeroAttr);

     Value zeroCstInt = createScalarOrSplatConstant(rewriter, loc, intTy, 0);


     // Get the absolute value. One could have used math.absf here, but that

     // introduces an extra dependency.

     Value isNeg = rewriter.create<arith::CmpFOp>(loc, arith::CmpFPredicate::OLT,

                                                  inFp, zeroCst);

     Value negInFp = rewriter.create<arith::NegFOp>(loc, inFp);


     Value absVal = rewriter.create<arith::SelectOp>(loc, isNeg, negInFp, inFp);


     // Defer the absolute value to fptoui.

     Value res = rewriter.create<arith::FPToUIOp>(loc, intTy, absVal);


     // Negate the value if < 0 .

     Value neg = rewriter.create<arith::SubIOp>(loc, zeroCstInt, res);


     rewriter.replaceOpWithNewOp<arith::SelectOp>(op, isNeg, neg, res);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertFPToUI

 //===----------------------------------------------------------------------===//


 struct ConvertFPToUI final : OpConversionPattern<arith::FPToUIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::FPToUIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op.getLoc();

     // Get the input float type.

     Value inFp = adaptor.getIn();

     Type fpTy = inFp.getType();


     Type intTy = op.getType();

     auto newTy = getTypeConverter()->convertType<VectorType>(intTy);

     if (!newTy)

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported type: {}", intTy));

     unsigned newBitWidth = newTy.getElementTypeBitWidth();


     Type newHalfType = IntegerType::get(inFp.getContext(), newBitWidth);

     if (auto vecType = dyn_cast<VectorType>(fpTy))

       newHalfType = VectorType::get(vecType.getShape(), newHalfType);


     // The resulting integer has the upper part and the lower part. This would

     // be interpreted as 2^N * high + low, where N is the bitwidth. Therefore,

     // to calculate the higher part, we emit resHigh = fptoui(fp/2^N). For the

     // lower part, we emit fptoui(fp - resHigh * 2^N). The special cases of

     // overflows including +-inf, NaNs and negative numbers are UB.


     const llvm::fltSemantics &fSemantics =

         cast<FloatType>(getElementTypeOrSelf(fpTy)).getFloatSemantics();


     auto powBitwidth = llvm::APFloat(fSemantics);

     // If the integer does not fit the floating point number, we set the

     // powBitwidth to inf. This ensures that the upper part is set

     // correctly to 0. The opStatus inexact here only occurs when we have an

     // overflow, since the number is always a power of two.

     if (powBitwidth.convertFromAPInt(APInt(newBitWidth * 2, 1).shl(newBitWidth),

                                      false, llvm::RoundingMode::TowardZero) ==

         llvm::detail::opStatus::opInexact)

       powBitwidth = llvm::APFloat::getInf(fSemantics);


     TypedAttr powBitwidthAttr =

         FloatAttr::get(getElementTypeOrSelf(fpTy), powBitwidth);

     if (auto vecType = dyn_cast<VectorType>(fpTy))

       powBitwidthAttr = SplatElementsAttr::get(vecType, powBitwidthAttr);

     Value powBitwidthFloatCst =

         rewriter.create<arith::ConstantOp>(loc, powBitwidthAttr);


     Value fpDivPowBitwidth =

         rewriter.create<arith::DivFOp>(loc, inFp, powBitwidthFloatCst);

     Value resHigh =

         rewriter.create<arith::FPToUIOp>(loc, newHalfType, fpDivPowBitwidth);

     // Calculate fp - resHigh * 2^N by getting the remainder of the division

     Value remainder =

         rewriter.create<arith::RemFOp>(loc, inFp, powBitwidthFloatCst);

     Value resLow =

         rewriter.create<arith::FPToUIOp>(loc, newHalfType, remainder);


     Value high = appendX1Dim(rewriter, loc, resHigh);

     Value low = appendX1Dim(rewriter, loc, resLow);


     Value resultVec = constructResultVector(rewriter, loc, newTy, {low, high});


     rewriter.replaceOp(op, resultVec);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertTruncI

 //===----------------------------------------------------------------------===//


 struct ConvertTruncI final : OpConversionPattern<arith::TruncIOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(arith::TruncIOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Location loc = op.getLoc();

     // Check if the result type is legal for this target. Currently, we do not

     // support truncation to types wider than supported by the target.

     if (!getTypeConverter()->isLegal(op.getType()))

       return rewriter.notifyMatchFailure(

           loc, llvm::formatv("unsupported truncation result type: {0}",

                              op.getType()));


     // Discard the high half of the input. Truncate the low half, if

     // necessary.

     Value extracted = extractLastDimSlice(rewriter, loc, adaptor.getIn(), 0);

     extracted = dropTrailingX1Dim(rewriter, loc, extracted);

     Value truncated =

         rewriter.createOrFold<arith::TruncIOp>(loc, op.getType(), extracted);

     rewriter.replaceOp(op, truncated);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertVectorPrint

 //===----------------------------------------------------------------------===//


 struct ConvertVectorPrint final : OpConversionPattern<vector::PrintOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(vector::PrintOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     rewriter.replaceOpWithNewOp<vector::PrintOp>(op, adaptor.getSource());

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // Pass Definition

 //===----------------------------------------------------------------------===//


 struct EmulateWideIntPass final

     : arith::impl::ArithEmulateWideIntBase<EmulateWideIntPass> {

   using ArithEmulateWideIntBase::ArithEmulateWideIntBase;


   void runOnOperation() override {

     if (!llvm::isPowerOf2_32(widestIntSupported) || widestIntSupported < 2) {

       signalPassFailure();

       return;

     }


     Operation *op = getOperation();

     MLIRContext *ctx = op->getContext();


     arith::WideIntEmulationConverter typeConverter(widestIntSupported);

     ConversionTarget target(*ctx);

     target.addDynamicallyLegalOp<func::FuncOp>([&typeConverter](Operation *op) {

       return typeConverter.isLegal(cast<func::FuncOp>(op).getFunctionType());

     });

     auto opLegalCallback = [&typeConverter](Operation *op) {

       return typeConverter.isLegal(op);

     };

     target.addDynamicallyLegalOp<func::CallOp, func::ReturnOp>(opLegalCallback);

     target.addDynamicallyLegalOp<vector::PrintOp>(opLegalCallback);

     target.addDynamicallyLegalDialect<arith::ArithDialect>(opLegalCallback);

     target.addLegalDialect<vector::VectorDialect>();


     RewritePatternSet patterns(ctx);

     arith::populateArithWideIntEmulationPatterns(typeConverter, patterns);


     // Populate `func.*` conversion patterns.

     populateFunctionOpInterfaceTypeConversionPattern<func::FuncOp>(

         patterns, typeConverter);

     populateCallOpTypeConversionPattern(patterns, typeConverter);

     populateReturnOpTypeConversionPattern(patterns, typeConverter);


     if (failed(applyPartialConversion(op, target, std::move(patterns))))

       signalPassFailure();

   }

 };

 } // end anonymous namespace


 //===----------------------------------------------------------------------===//

 // Public Interface Definition

 //===----------------------------------------------------------------------===//


 arith::WideIntEmulationConverter::WideIntEmulationConverter(

     unsigned widestIntSupportedByTarget)

     : maxIntWidth(widestIntSupportedByTarget) {

   assert(llvm::isPowerOf2_32(widestIntSupportedByTarget) &&

          "Only power-of-two integers with are supported");

   assert(widestIntSupportedByTarget >= 2 && "Integer type too narrow");


   // Allow unknown types.

   addConversion([](Type ty) -> std::optional<Type> { return ty; });


   // Scalar case.

   addConversion([this](IntegerType ty) -> std::optional<Type> {

     unsigned width = ty.getWidth();

     if (width <= maxIntWidth)

       return ty;


     // i2N --> vector<2xiN>

     if (width == 2 * maxIntWidth)

       return VectorType::get(2, IntegerType::get(ty.getContext(), maxIntWidth));


     return nullptr;

   });


   // Vector case.

   addConversion([this](VectorType ty) -> std::optional<Type> {

     auto intTy = dyn_cast<IntegerType>(ty.getElementType());

     if (!intTy)

       return ty;


     unsigned width = intTy.getWidth();

     if (width <= maxIntWidth)

       return ty;


     // vector<...xi2N> --> vector<...x2xiN>

     if (width == 2 * maxIntWidth) {

       auto newShape = to_vector(ty.getShape());

       newShape.push_back(2);

       return VectorType::get(newShape,

                              IntegerType::get(ty.getContext(), maxIntWidth));

     }


     return nullptr;

   });


   // Function case.

   addConversion([this](FunctionType ty) -> std::optional<Type> {

     // Convert inputs and results, e.g.:

     //   (i2N, i2N) -> i2N --> (vector<2xiN>, vector<2xiN>) -> vector<2xiN>

     SmallVector<Type> inputs;

     if (failed(convertTypes(ty.getInputs(), inputs)))

       return nullptr;


     SmallVector<Type> results;

     if (failed(convertTypes(ty.getResults(), results)))

       return nullptr;


     return FunctionType::get(ty.getContext(), inputs, results);

   });

 }


 void arith::populateArithWideIntEmulationPatterns(

     const WideIntEmulationConverter &typeConverter,

     RewritePatternSet &patterns) {

   // Populate `arith.*` conversion patterns.

   patterns.add<

       // Misc ops.

       ConvertConstant, ConvertCmpI, ConvertSelect, ConvertVectorPrint,

       // Binary ops.

       ConvertAddI, ConvertMulI, ConvertShLI, ConvertShRSI, ConvertShRUI,

       ConvertMaxMin<arith::MaxUIOp, arith::CmpIPredicate::ugt>,

       ConvertMaxMin<arith::MaxSIOp, arith::CmpIPredicate::sgt>,

       ConvertMaxMin<arith::MinUIOp, arith::CmpIPredicate::ult>,

       ConvertMaxMin<arith::MinSIOp, arith::CmpIPredicate::slt>, ConvertSubI,

       // Bitwise binary ops.

       ConvertBitwiseBinary<arith::AndIOp>, ConvertBitwiseBinary<arith::OrIOp>,

       ConvertBitwiseBinary<arith::XOrIOp>,

       // Extension and truncation ops.

       ConvertExtSI, ConvertExtUI, ConvertTruncI,

       // Cast ops.

       ConvertIndexCastIntToIndex<arith::IndexCastOp>,

       ConvertIndexCastIntToIndex<arith::IndexCastUIOp>,

       ConvertIndexCastIndexToInt<arith::IndexCastOp, arith::ExtSIOp>,

       ConvertIndexCastIndexToInt<arith::IndexCastUIOp, arith::ExtUIOp>,

       ConvertSIToFP, ConvertUIToFP, ConvertFPToUI, ConvertFPToSI>(

       typeConverter, patterns.getContext());

 }

insertLastDimSlice
static Value insertLastDimSlice(ConversionPatternRewriter &rewriter, Location loc, Value source, Value dest, int64_t lastOffset)
Inserts the source vector slice into the dest vector at offset lastOffset in the last dimension.
Definition: EmulateWideInt.cpp:131

getHalves
static std::pair< APInt, APInt > getHalves(const APInt &value, unsigned newBitWidth)
Returns N bottom and N top bits from value, where N = newBitWidth.
Definition: EmulateWideInt.cpp:41

appendX1Dim
static Value appendX1Dim(ConversionPatternRewriter &rewriter, Location loc, Value input)
Performs a vector shape cast to append an x1 dimension.
Definition: EmulateWideInt.cpp:115

extractLastDimHalves
static std::pair< Value, Value > extractLastDimHalves(ConversionPatternRewriter &rewriter, Location loc, Value input)
Extracts two vector slices from the input whose type is vector<...x2T>, with the first element at off...
Definition: EmulateWideInt.cpp:90

reduceInnermostDim
static Type reduceInnermostDim(VectorType type)
Returns the type with the last (innermost) dimension reduced to x1.
Definition: EmulateWideInt.cpp:53

constructResultVector
static Value constructResultVector(ConversionPatternRewriter &rewriter, Location loc, VectorType resultType, ValueRange resultComponents)
Constructs a new vector of type resultType by creating a series of insertions of resultComponents,...
Definition: EmulateWideInt.cpp:154

dropTrailingX1Dim
static Value dropTrailingX1Dim(ConversionPatternRewriter &rewriter, Location loc, Value input)
Definition: EmulateWideInt.cpp:98

extractLastDimSlice
static Value extractLastDimSlice(ConversionPatternRewriter &rewriter, Location loc, Value input, int64_t lastOffset)
Extracts the input vector slice with elements at the last dimension offset by lastOffset.
Definition: EmulateWideInt.cpp:67

DialectConversion.h

Passes.h

Utils.h

FuncConversions.h

FuncOps.h

TypeUtilities.h

VectorOps.h

WideIntEmulationConverter.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVector
Definition: LLVM.h:72

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::Builder::getFloatAttr
FloatAttr getFloatAttr(Type type, double value)
Definition: Builders.cpp:250

mlir::Builder::getIntegerType
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:67

mlir::Builder::getZeroAttr
TypedAttr getZeroAttr(Type type)
Definition: Builders.cpp:320

mlir::ConversionPatternRewriter
This class implements a pattern rewriter for use with ConversionPatterns.
Definition: DialectConversion.h:726

mlir::ConversionPatternRewriter::replaceOp
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
Definition: DialectConversion.cpp:1655

mlir::ConversionTarget
This class describes a specific conversion target.
Definition: DialectConversion.h:868

mlir::DenseElementsAttr::get
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
Definition: BuiltinAttributes.cpp:911

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::OpBuilder::createOrFold
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:517

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:453

mlir::OpConversionPattern
OpConversionPattern is a wrapper around ConversionPattern that allows for matching and rewriting agai...
Definition: DialectConversion.h:583

mlir::OpConversionPattern::OpAdaptor
typename SourceOp::Adaptor OpAdaptor
Definition: DialectConversion.h:585

mlir::OpConversionPattern::OpConversionPattern
OpConversionPattern(MLIRContext *context, PatternBenefit benefit=1)
Definition: DialectConversion.h:589

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getContext
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216

mlir::RewritePatternSet
Definition: PatternMatch.h:772

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:682

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:500

mlir::TypeConverter::addConversion
void addConversion(FnT &&callback)
Register a conversion function.
Definition: DialectConversion.h:161

mlir::TypeConverter::convertTypes
LogicalResult convertTypes(TypeRange types, SmallVectorImpl< Type > &results) const
Convert the given set of types, filling 'results' as necessary.
Definition: DialectConversion.cpp:2913

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Type::getContext
MLIRContext * getContext() const
Return the MLIRContext in which this type was uniqued.
Definition: Types.cpp:35

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getContext
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition: Value.h:108

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::arith::WideIntEmulationConverter
Converts integer types that are too wide for the target by splitting them in two halves and thus turn...
Definition: WideIntEmulationConverter.h:23

mlir::arith::WideIntEmulationConverter::WideIntEmulationConverter
WideIntEmulationConverter(unsigned widestIntSupportedByTarget)
Definition: EmulateWideInt.cpp:1227

Arith.h

BuiltinTypes.h

mlir::arith
Definition: AttrToLLVMConverter.h:20

mlir::arith::populateArithWideIntEmulationPatterns
void populateArithWideIntEmulationPatterns(const WideIntEmulationConverter &typeConverter, RewritePatternSet &patterns)
Adds patterns to emulate wide Arith and Function ops over integer types into supported ones.
Definition: EmulateWideInt.cpp:1287

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir::presburger::abs
Fraction abs(const Fraction &f)
Definition: Fraction.h:107

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::createScalarOrSplatConstant
Value createScalarOrSplatConstant(OpBuilder &builder, Location loc, Type type, const APInt &value)
Create a constant of type type at location loc whose value is value (an APInt or APFloat whose type m...
Definition: Utils.cpp:271

mlir::getElementTypeOrSelf
Type getElementTypeOrSelf(Type type)
Return the element type or return the type itself.
Definition: TypeUtilities.cpp:23

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::populateCallOpTypeConversionPattern
void populateCallOpTypeConversionPattern(RewritePatternSet &patterns, const TypeConverter &converter)
Add a pattern to the given pattern list to convert the operand and result types of a CallOp with the ...
Definition: FuncConversions.cpp:67

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::populateReturnOpTypeConversionPattern
void populateReturnOpTypeConversionPattern(RewritePatternSet &patterns, const TypeConverter &converter)
Add a pattern to the given pattern list to rewrite return ops to use operands that have been legalize...
Definition: FuncConversions.cpp:159

mlir::applyPartialConversion
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.
Definition: DialectConversion.cpp:3383