doxygen/MemRef_2Transforms_2EmulateNarrowType_8cpp_source.html

 //===- EmulateNarrowType.cpp - Narrow type emulation ----*- C++

 //-*-===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/Arith/Transforms/NarrowTypeEmulationConverter.h"

 #include "mlir/Dialect/Arith/Transforms/Passes.h"

 #include "mlir/Dialect/Arith/Utils/Utils.h"

 #include "mlir/Dialect/MemRef/IR/MemRef.h"

 #include "mlir/Dialect/MemRef/Transforms/Transforms.h"

 #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 #include "mlir/IR/Builders.h"

 #include "mlir/IR/BuiltinTypes.h"

 #include "mlir/IR/OpDefinition.h"

 #include "mlir/Transforms/DialectConversion.h"

 #include "llvm/Support/FormatVariadic.h"

 #include "llvm/Support/MathExtras.h"

 #include <cassert>

 #include <type_traits>


 using namespace mlir;


 //===----------------------------------------------------------------------===//

 // Utility functions

 //===----------------------------------------------------------------------===//


 /// Converts a memref::ReinterpretCastOp to the converted type. The result

 /// MemRefType of the old op must have a rank and stride of 1, with static

 /// offset and size. The number of bits in the offset must evenly divide the

 /// bitwidth of the new converted type.

 static LogicalResult

 convertCastingOp(ConversionPatternRewriter &rewriter,

                  memref::ReinterpretCastOp::Adaptor adaptor,

                  memref::ReinterpretCastOp op, MemRefType newTy) {

   auto convertedElementType = newTy.getElementType();

   auto oldElementType = op.getType().getElementType();

   int srcBits = oldElementType.getIntOrFloatBitWidth();

   int dstBits = convertedElementType.getIntOrFloatBitWidth();

   if (dstBits % srcBits != 0) {

     return rewriter.notifyMatchFailure(op,

                                        "only dstBits % srcBits == 0 supported");

   }


   // Only support stride of 1.

   if (llvm::any_of(op.getStaticStrides(),

                    [](int64_t stride) { return stride != 1; })) {

     return rewriter.notifyMatchFailure(op->getLoc(),

                                        "stride != 1 is not supported");

   }


   auto sizes = op.getStaticSizes();

   int64_t offset = op.getStaticOffset(0);

   // Only support static sizes and offsets.

   if (llvm::is_contained(sizes, ShapedType::kDynamic) ||

       offset == ShapedType::kDynamic) {

     return rewriter.notifyMatchFailure(

         op, "dynamic size or offset is not supported");

   }


   int elementsPerByte = dstBits / srcBits;

   if (offset % elementsPerByte != 0) {

     return rewriter.notifyMatchFailure(

         op, "offset not multiple of elementsPerByte is not supported");

   }


   SmallVector<int64_t> size;

   if (sizes.size())

     size.push_back(llvm::divideCeilSigned(sizes[0], elementsPerByte));

   offset = offset / elementsPerByte;


   rewriter.replaceOpWithNewOp<memref::ReinterpretCastOp>(

       op, newTy, adaptor.getSource(), offset, size, op.getStaticStrides());

   return success();

 }


 /// When data is loaded/stored in `targetBits` granularity, but is used in

 /// `sourceBits` granularity (`sourceBits` < `targetBits`), the `targetBits` is

 /// treated as an array of elements of width `sourceBits`.

 /// Return the bit offset of the value at position `srcIdx`. For example, if

 /// `sourceBits` equals to 4 and `targetBits` equals to 8, the x-th element is

 /// located at (x % 2) * 4. Because there are two elements in one i8, and one

 /// element has 4 bits.

 static Value getOffsetForBitwidth(Location loc, OpFoldResult srcIdx,

                                   int sourceBits, int targetBits,

                                   OpBuilder &builder) {

   assert(targetBits % sourceBits == 0);

   AffineExpr s0;

   bindSymbols(builder.getContext(), s0);

   int scaleFactor = targetBits / sourceBits;

   AffineExpr offsetExpr = (s0 % scaleFactor) * sourceBits;

   OpFoldResult offsetVal =

       affine::makeComposedFoldedAffineApply(builder, loc, offsetExpr, {srcIdx});

   Value bitOffset = getValueOrCreateConstantIndexOp(builder, loc, offsetVal);

   IntegerType dstType = builder.getIntegerType(targetBits);

   return builder.create<arith::IndexCastOp>(loc, dstType, bitOffset);

 }


 /// When writing a subbyte size, masked bitwise operations are used to only

 /// modify the relevant bits. This function returns an and mask for clearing

 /// the destination bits in a subbyte write. E.g., when writing to the second

 /// i4 in an i32, 0xFFFFFF0F is created.

 static Value getSubByteWriteMask(Location loc, OpFoldResult linearizedIndices,

                                  int64_t srcBits, int64_t dstBits,

                                  Value bitwidthOffset, OpBuilder &builder) {

   auto dstIntegerType = builder.getIntegerType(dstBits);

   auto maskRightAlignedAttr =

       builder.getIntegerAttr(dstIntegerType, (1 << srcBits) - 1);

   Value maskRightAligned = builder.create<arith::ConstantOp>(

       loc, dstIntegerType, maskRightAlignedAttr);

   Value writeMaskInverse =

       builder.create<arith::ShLIOp>(loc, maskRightAligned, bitwidthOffset);

   auto flipValAttr = builder.getIntegerAttr(dstIntegerType, -1);

   Value flipVal =

       builder.create<arith::ConstantOp>(loc, dstIntegerType, flipValAttr);

   return builder.create<arith::XOrIOp>(loc, writeMaskInverse, flipVal);

 }


 /// Returns the scaled linearized index based on the `srcBits` and `dstBits`

 /// sizes. The input `linearizedIndex` has the granularity of `srcBits`, and

 /// the returned index has the granularity of `dstBits`

 static Value getIndicesForLoadOrStore(OpBuilder &builder, Location loc,

                                       OpFoldResult linearizedIndex,

                                       int64_t srcBits, int64_t dstBits) {

   AffineExpr s0;

   bindSymbols(builder.getContext(), s0);

   int64_t scaler = dstBits / srcBits;

   OpFoldResult scaledLinearizedIndices = affine::makeComposedFoldedAffineApply(

       builder, loc, s0.floorDiv(scaler), {linearizedIndex});

   return getValueOrCreateConstantIndexOp(builder, loc, scaledLinearizedIndices);

 }


 static OpFoldResult

 getLinearizedSrcIndices(OpBuilder &builder, Location loc, int64_t srcBits,

                         const SmallVector<OpFoldResult> &indices,

                         Value memref) {

   auto stridedMetadata =

       builder.create<memref::ExtractStridedMetadataOp>(loc, memref);

   OpFoldResult linearizedIndices;

   std::tie(std::ignore, linearizedIndices) =

       memref::getLinearizedMemRefOffsetAndSize(

           builder, loc, srcBits, srcBits,

           stridedMetadata.getConstifiedMixedOffset(),

           stridedMetadata.getConstifiedMixedSizes(),

           stridedMetadata.getConstifiedMixedStrides(), indices);

   return linearizedIndices;

 }


 namespace {


 //===----------------------------------------------------------------------===//

 // ConvertMemRefAllocation

 //===----------------------------------------------------------------------===//


 template <typename OpTy>

 struct ConvertMemRefAllocation final : OpConversionPattern<OpTy> {

   using OpConversionPattern<OpTy>::OpConversionPattern;


   LogicalResult

   matchAndRewrite(OpTy op, typename OpTy::Adaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     static_assert(std::is_same<OpTy, memref::AllocOp>() ||

                       std::is_same<OpTy, memref::AllocaOp>(),

                   "expected only memref::AllocOp or memref::AllocaOp");

     auto currentType = cast<MemRefType>(op.getMemref().getType());

     auto newResultType =

         this->getTypeConverter()->template convertType<MemRefType>(

             op.getType());

     if (!newResultType) {

       return rewriter.notifyMatchFailure(

           op->getLoc(),

           llvm::formatv("failed to convert memref type: {0}", op.getType()));

     }


     // Special case zero-rank memrefs.

     if (currentType.getRank() == 0) {

       rewriter.replaceOpWithNewOp<OpTy>(op, newResultType, ValueRange{},

                                         adaptor.getSymbolOperands(),

                                         adaptor.getAlignmentAttr());

       return success();

     }


     Location loc = op.getLoc();

     OpFoldResult zero = rewriter.getIndexAttr(0);


     // Get linearized type.

     int srcBits = currentType.getElementType().getIntOrFloatBitWidth();

     int dstBits = newResultType.getElementType().getIntOrFloatBitWidth();

     SmallVector<OpFoldResult> sizes = op.getMixedSizes();


     memref::LinearizedMemRefInfo linearizedMemRefInfo =

         memref::getLinearizedMemRefOffsetAndSize(

             rewriter, loc, srcBits, dstBits, /*offset =*/zero, sizes);

     SmallVector<Value> dynamicLinearizedSize;

     if (!newResultType.hasStaticShape()) {

       dynamicLinearizedSize.push_back(getValueOrCreateConstantIndexOp(

           rewriter, loc, linearizedMemRefInfo.linearizedSize));

     }


     rewriter.replaceOpWithNewOp<OpTy>(op, newResultType, dynamicLinearizedSize,

                                       adaptor.getSymbolOperands(),

                                       adaptor.getAlignmentAttr());

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMemRefAssumeAlignment

 //===----------------------------------------------------------------------===//


 struct ConvertMemRefAssumeAlignment final

     : OpConversionPattern<memref::AssumeAlignmentOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::AssumeAlignmentOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Type newTy = getTypeConverter()->convertType(op.getMemref().getType());

     if (!newTy) {

       return rewriter.notifyMatchFailure(

           op->getLoc(), llvm::formatv("failed to convert memref type: {0}",

                                       op.getMemref().getType()));

     }


     rewriter.replaceOpWithNewOp<memref::AssumeAlignmentOp>(

         op, newTy, adaptor.getMemref(), adaptor.getAlignmentAttr());

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMemRefCopy

 //===----------------------------------------------------------------------===//


 struct ConvertMemRefCopy final : OpConversionPattern<memref::CopyOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::CopyOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     auto maybeRankedSource = dyn_cast<MemRefType>(op.getSource().getType());

     auto maybeRankedDest = dyn_cast<MemRefType>(op.getTarget().getType());

     if (maybeRankedSource && maybeRankedDest &&

         maybeRankedSource.getLayout() != maybeRankedDest.getLayout())

       return rewriter.notifyMatchFailure(

           op, llvm::formatv("memref.copy emulation with distinct layouts ({0} "

                             "and {1}) is currently unimplemented",

                             maybeRankedSource.getLayout(),

                             maybeRankedDest.getLayout()));

     rewriter.replaceOpWithNewOp<memref::CopyOp>(op, adaptor.getSource(),

                                                 adaptor.getTarget());

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMemRefDealloc

 //===----------------------------------------------------------------------===//


 struct ConvertMemRefDealloc final : OpConversionPattern<memref::DeallocOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::DeallocOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     rewriter.replaceOpWithNewOp<memref::DeallocOp>(op, adaptor.getMemref());

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMemRefLoad

 //===----------------------------------------------------------------------===//


 struct ConvertMemRefLoad final : OpConversionPattern<memref::LoadOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::LoadOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     auto convertedType = cast<MemRefType>(adaptor.getMemref().getType());

     auto convertedElementType = convertedType.getElementType();

     auto oldElementType = op.getMemRefType().getElementType();

     int srcBits = oldElementType.getIntOrFloatBitWidth();

     int dstBits = convertedElementType.getIntOrFloatBitWidth();

     if (dstBits % srcBits != 0) {

       return rewriter.notifyMatchFailure(

           op, "only dstBits % srcBits == 0 supported");

     }


     Location loc = op.getLoc();

     // Special case 0-rank memref loads.

     Value bitsLoad;

     if (convertedType.getRank() == 0) {

       bitsLoad = rewriter.create<memref::LoadOp>(loc, adaptor.getMemref(),

                                                  ValueRange{});

     } else {

       // Linearize the indices of the original load instruction. Do not account

       // for the scaling yet. This will be accounted for later.

       OpFoldResult linearizedIndices = getLinearizedSrcIndices(

           rewriter, loc, srcBits, adaptor.getIndices(), op.getMemRef());


       Value newLoad = rewriter.create<memref::LoadOp>(

           loc, adaptor.getMemref(),

           getIndicesForLoadOrStore(rewriter, loc, linearizedIndices, srcBits,

                                    dstBits));


       // Get the offset and shift the bits to the rightmost.

       // Note, currently only the big-endian is supported.

       Value bitwidthOffset = getOffsetForBitwidth(loc, linearizedIndices,

                                                   srcBits, dstBits, rewriter);

       bitsLoad = rewriter.create<arith::ShRSIOp>(loc, newLoad, bitwidthOffset);

     }


     // Get the corresponding bits. If the arith computation bitwidth equals

     // to the emulated bitwidth, we apply a mask to extract the low bits.

     // It is not clear if this case actually happens in practice, but we keep

     // the operations just in case. Otherwise, if the arith computation bitwidth

     // is different from the emulated bitwidth we truncate the result.

     Operation *result;

     auto resultTy = getTypeConverter()->convertType(oldElementType);

     if (resultTy == convertedElementType) {

       auto mask = rewriter.create<arith::ConstantOp>(

           loc, convertedElementType,

           rewriter.getIntegerAttr(convertedElementType, (1 << srcBits) - 1));


       result = rewriter.create<arith::AndIOp>(loc, bitsLoad, mask);

     } else {

       result = rewriter.create<arith::TruncIOp>(loc, resultTy, bitsLoad);

     }


     rewriter.replaceOp(op, result->getResult(0));

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMemRefMemorySpaceCast

 //===----------------------------------------------------------------------===//


 struct ConvertMemRefMemorySpaceCast final

     : OpConversionPattern<memref::MemorySpaceCastOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::MemorySpaceCastOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Type newTy = getTypeConverter()->convertType(op.getDest().getType());

     if (!newTy) {

       return rewriter.notifyMatchFailure(

           op->getLoc(), llvm::formatv("failed to convert memref type: {0}",

                                       op.getDest().getType()));

     }


     rewriter.replaceOpWithNewOp<memref::MemorySpaceCastOp>(op, newTy,

                                                            adaptor.getSource());

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMemRefReinterpretCast

 //===----------------------------------------------------------------------===//


 /// Output types should be at most one dimensional, so only the 0 or 1

 /// dimensional cases are supported.

 struct ConvertMemRefReinterpretCast final

     : OpConversionPattern<memref::ReinterpretCastOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::ReinterpretCastOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     MemRefType newTy =

         getTypeConverter()->convertType<MemRefType>(op.getType());

     if (!newTy) {

       return rewriter.notifyMatchFailure(

           op->getLoc(),

           llvm::formatv("failed to convert memref type: {0}", op.getType()));

     }


     // Only support for 0 or 1 dimensional cases.

     if (op.getType().getRank() > 1) {

       return rewriter.notifyMatchFailure(

           op->getLoc(), "subview with rank > 1 is not supported");

     }


     return convertCastingOp(rewriter, adaptor, op, newTy);

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMemrefStore

 //===----------------------------------------------------------------------===//


 struct ConvertMemrefStore final : OpConversionPattern<memref::StoreOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::StoreOp op, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     auto convertedType = cast<MemRefType>(adaptor.getMemref().getType());

     int srcBits = op.getMemRefType().getElementTypeBitWidth();

     int dstBits = convertedType.getElementTypeBitWidth();

     auto dstIntegerType = rewriter.getIntegerType(dstBits);

     if (dstBits % srcBits != 0) {

       return rewriter.notifyMatchFailure(

           op, "only dstBits % srcBits == 0 supported");

     }


     Location loc = op.getLoc();

     Value extendedInput = rewriter.create<arith::ExtUIOp>(loc, dstIntegerType,

                                                           adaptor.getValue());


     // Special case 0-rank memref stores. No need for masking.

     if (convertedType.getRank() == 0) {

       rewriter.create<memref::AtomicRMWOp>(loc, arith::AtomicRMWKind::assign,

                                            extendedInput, adaptor.getMemref(),

                                            ValueRange{});

       rewriter.eraseOp(op);

       return success();

     }


     OpFoldResult linearizedIndices = getLinearizedSrcIndices(

         rewriter, loc, srcBits, adaptor.getIndices(), op.getMemRef());

     Value storeIndices = getIndicesForLoadOrStore(

         rewriter, loc, linearizedIndices, srcBits, dstBits);

     Value bitwidthOffset = getOffsetForBitwidth(loc, linearizedIndices, srcBits,

                                                 dstBits, rewriter);

     Value writeMask = getSubByteWriteMask(loc, linearizedIndices, srcBits,

                                           dstBits, bitwidthOffset, rewriter);

     // Align the value to write with the destination bits

     Value alignedVal =

         rewriter.create<arith::ShLIOp>(loc, extendedInput, bitwidthOffset);


     // Clear destination bits

     rewriter.create<memref::AtomicRMWOp>(loc, arith::AtomicRMWKind::andi,

                                          writeMask, adaptor.getMemref(),

                                          storeIndices);

     // Write srcs bits to destination

     rewriter.create<memref::AtomicRMWOp>(loc, arith::AtomicRMWKind::ori,

                                          alignedVal, adaptor.getMemref(),

                                          storeIndices);

     rewriter.eraseOp(op);

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMemRefSubview

 //===----------------------------------------------------------------------===//


 /// Emulating narrow ints on subview have limited support, supporting only

 /// static offset and size and stride of 1. Ideally, the subview should be

 /// folded away before running narrow type emulation, and this pattern should

 /// only run for cases that can't be folded.

 struct ConvertMemRefSubview final : OpConversionPattern<memref::SubViewOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::SubViewOp subViewOp, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     MemRefType newTy =

         getTypeConverter()->convertType<MemRefType>(subViewOp.getType());

     if (!newTy) {

       return rewriter.notifyMatchFailure(

           subViewOp->getLoc(),

           llvm::formatv("failed to convert memref type: {0}",

                         subViewOp.getType()));

     }


     Location loc = subViewOp.getLoc();

     Type convertedElementType = newTy.getElementType();

     Type oldElementType = subViewOp.getType().getElementType();

     int srcBits = oldElementType.getIntOrFloatBitWidth();

     int dstBits = convertedElementType.getIntOrFloatBitWidth();

     if (dstBits % srcBits != 0)

       return rewriter.notifyMatchFailure(

           subViewOp, "only dstBits % srcBits == 0 supported");


     // Only support stride of 1.

     if (llvm::any_of(subViewOp.getStaticStrides(),

                      [](int64_t stride) { return stride != 1; })) {

       return rewriter.notifyMatchFailure(subViewOp->getLoc(),

                                          "stride != 1 is not supported");

     }


     if (!memref::isStaticShapeAndContiguousRowMajor(subViewOp.getType())) {

       return rewriter.notifyMatchFailure(

           subViewOp, "the result memref type is not contiguous");

     }


     auto sizes = subViewOp.getStaticSizes();

     int64_t lastOffset = subViewOp.getStaticOffsets().back();

     // Only support static sizes and offsets.

     if (llvm::is_contained(sizes, ShapedType::kDynamic) ||

         lastOffset == ShapedType::kDynamic) {

       return rewriter.notifyMatchFailure(

           subViewOp->getLoc(), "dynamic size or offset is not supported");

     }


     // Transform the offsets, sizes and strides according to the emulation.

     auto stridedMetadata = rewriter.create<memref::ExtractStridedMetadataOp>(

         loc, subViewOp.getViewSource());


     OpFoldResult linearizedIndices;

     auto strides = stridedMetadata.getConstifiedMixedStrides();

     memref::LinearizedMemRefInfo linearizedInfo;

     std::tie(linearizedInfo, linearizedIndices) =

         memref::getLinearizedMemRefOffsetAndSize(

             rewriter, loc, srcBits, dstBits,

             stridedMetadata.getConstifiedMixedOffset(),

             subViewOp.getMixedSizes(), strides,

             getMixedValues(adaptor.getStaticOffsets(), adaptor.getOffsets(),

                            rewriter));


     rewriter.replaceOpWithNewOp<memref::SubViewOp>(

         subViewOp, newTy, adaptor.getSource(), linearizedIndices,

         linearizedInfo.linearizedSize, strides.back());

     return success();

   }

 };


 //===----------------------------------------------------------------------===//

 // ConvertMemRefCollapseShape

 //===----------------------------------------------------------------------===//


 /// Emulating a `memref.collapse_shape` becomes a no-op after emulation given

 /// that we flatten memrefs to a single dimension as part of the emulation and

 /// there is no dimension to collapse any further.

 struct ConvertMemRefCollapseShape final

     : OpConversionPattern<memref::CollapseShapeOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::CollapseShapeOp collapseShapeOp, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Value srcVal = adaptor.getSrc();

     auto newTy = dyn_cast<MemRefType>(srcVal.getType());

     if (!newTy)

       return failure();


     if (newTy.getRank() != 1)

       return failure();


     rewriter.replaceOp(collapseShapeOp, srcVal);

     return success();

   }

 };


 /// Emulating a `memref.expand_shape` becomes a no-op after emulation given

 /// that we flatten memrefs to a single dimension as part of the emulation and

 /// the expansion would just have been undone.

 struct ConvertMemRefExpandShape final

     : OpConversionPattern<memref::ExpandShapeOp> {

   using OpConversionPattern::OpConversionPattern;


   LogicalResult

   matchAndRewrite(memref::ExpandShapeOp expandShapeOp, OpAdaptor adaptor,

                   ConversionPatternRewriter &rewriter) const override {

     Value srcVal = adaptor.getSrc();

     auto newTy = dyn_cast<MemRefType>(srcVal.getType());

     if (!newTy)

       return failure();


     if (newTy.getRank() != 1)

       return failure();


     rewriter.replaceOp(expandShapeOp, srcVal);

     return success();

   }

 };

 } // end anonymous namespace


 //===----------------------------------------------------------------------===//

 // Public Interface Definition

 //===----------------------------------------------------------------------===//


 void memref::populateMemRefNarrowTypeEmulationPatterns(

     const arith::NarrowTypeEmulationConverter &typeConverter,

     RewritePatternSet &patterns) {


   // Populate `memref.*` conversion patterns.

   patterns.add<ConvertMemRefAllocation<memref::AllocOp>,

                ConvertMemRefAllocation<memref::AllocaOp>, ConvertMemRefCopy,

                ConvertMemRefDealloc, ConvertMemRefCollapseShape,

                ConvertMemRefExpandShape, ConvertMemRefLoad, ConvertMemrefStore,

                ConvertMemRefAssumeAlignment, ConvertMemRefMemorySpaceCast,

                ConvertMemRefSubview, ConvertMemRefReinterpretCast>(

       typeConverter, patterns.getContext());

   memref::populateResolveExtractStridedMetadataPatterns(patterns);

 }


 static SmallVector<int64_t> getLinearizedShape(MemRefType ty, int srcBits,

                                                int dstBits) {

   if (ty.getRank() == 0)

     return {};


   int64_t linearizedShape = 1;

   for (auto shape : ty.getShape()) {

     if (shape == ShapedType::kDynamic)

       return {ShapedType::kDynamic};

     linearizedShape *= shape;

   }

   int scale = dstBits / srcBits;

   // Scale the size to the ceilDiv(linearizedShape, scale)

   // to accomodate all the values.

   linearizedShape = (linearizedShape + scale - 1) / scale;

   return {linearizedShape};

 }


 void memref::populateMemRefNarrowTypeEmulationConversions(

     arith::NarrowTypeEmulationConverter &typeConverter) {

   typeConverter.addConversion(

       [&typeConverter](MemRefType ty) -> std::optional<Type> {

         auto intTy = dyn_cast<IntegerType>(ty.getElementType());

         if (!intTy)

           return ty;


         unsigned width = intTy.getWidth();

         unsigned loadStoreWidth = typeConverter.getLoadStoreBitwidth();

         if (width >= loadStoreWidth)

           return ty;


         // Currently only handle innermost stride being 1, checking

         SmallVector<int64_t> strides;

         int64_t offset;

         if (failed(ty.getStridesAndOffset(strides, offset)))

           return nullptr;

         if (!strides.empty() && strides.back() != 1)

           return nullptr;


         auto newElemTy = IntegerType::get(ty.getContext(), loadStoreWidth,

                                           intTy.getSignedness());

         if (!newElemTy)

           return nullptr;


         StridedLayoutAttr layoutAttr;

         // If the offset is 0, we do not need a strided layout as the stride is

         // 1, so we only use the strided layout if the offset is not 0.

         if (offset != 0) {

           if (offset == ShapedType::kDynamic) {

             layoutAttr = StridedLayoutAttr::get(ty.getContext(), offset,

                                                 ArrayRef<int64_t>{1});

           } else {

             // Check if the number of bytes are a multiple of the loadStoreWidth

             // and if so, divide it by the loadStoreWidth to get the offset.

             if ((offset * width) % loadStoreWidth != 0)

               return std::nullopt;

             offset = (offset * width) / loadStoreWidth;


             layoutAttr = StridedLayoutAttr::get(ty.getContext(), offset,

                                                 ArrayRef<int64_t>{1});

           }

         }


         return MemRefType::get(getLinearizedShape(ty, width, loadStoreWidth),

                                newElemTy, layoutAttr, ty.getMemorySpace());

       });

 }

AffineOps.h

Builders.h

DialectConversion.h

Passes.h

Utils.h

MemRefUtils.h

getOffsetForBitwidth
static Value getOffsetForBitwidth(Location loc, OpFoldResult srcIdx, int sourceBits, int targetBits, OpBuilder &builder)
When data is loaded/stored in targetBits granularity, but is used in sourceBits granularity (sourceBi...
Definition: EmulateNarrowType.cpp:90

getIndicesForLoadOrStore
static Value getIndicesForLoadOrStore(OpBuilder &builder, Location loc, OpFoldResult linearizedIndex, int64_t srcBits, int64_t dstBits)
Returns the scaled linearized index based on the srcBits and dstBits sizes.
Definition: EmulateNarrowType.cpp:128

getLinearizedShape
static SmallVector< int64_t > getLinearizedShape(MemRefType ty, int srcBits, int dstBits)
Definition: EmulateNarrowType.cpp:600

convertCastingOp
static LogicalResult convertCastingOp(ConversionPatternRewriter &rewriter, memref::ReinterpretCastOp::Adaptor adaptor, memref::ReinterpretCastOp op, MemRefType newTy)
Converts a memref::ReinterpretCastOp to the converted type.
Definition: EmulateNarrowType.cpp:39

getLinearizedSrcIndices
static OpFoldResult getLinearizedSrcIndices(OpBuilder &builder, Location loc, int64_t srcBits, const SmallVector< OpFoldResult > &indices, Value memref)
Definition: EmulateNarrowType.cpp:140

getSubByteWriteMask
static Value getSubByteWriteMask(Location loc, OpFoldResult linearizedIndices, int64_t srcBits, int64_t dstBits, Value bitwidthOffset, OpBuilder &builder)
When writing a subbyte size, masked bitwise operations are used to only modify the relevant bits.
Definition: EmulateNarrowType.cpp:109

NarrowTypeEmulationConverter.h

OpDefinition.h

VectorOps.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVector
Definition: LLVM.h:72

mlir::AffineExpr
Base type for affine expression.
Definition: AffineExpr.h:68

mlir::AffineExpr::floorDiv
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:921

mlir::Builder::getIndexAttr
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:106

mlir::Builder::getIntegerAttr
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition: Builders.cpp:226

mlir::Builder::getIntegerType
IntegerType getIntegerType(unsigned width)
Definition: Builders.cpp:69

mlir::Builder::getContext
MLIRContext * getContext() const
Definition: Builders.h:55

mlir::ConversionPatternRewriter
This class implements a pattern rewriter for use with ConversionPatterns.
Definition: DialectConversion.h:726

mlir::ConversionPatternRewriter::replaceOp
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
Definition: DialectConversion.cpp:1702

mlir::ConversionPatternRewriter::eraseOp
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
Definition: DialectConversion.cpp:1727

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:455

mlir::OpConversionPattern
OpConversionPattern is a wrapper around ConversionPattern that allows for matching and rewriting agai...
Definition: DialectConversion.h:583

mlir::OpConversionPattern::OpConversionPattern
OpConversionPattern(MLIRContext *context, PatternBenefit benefit=1)
Definition: DialectConversion.h:589

mlir::OpFoldResult
This class represents a single result from folding an operation.
Definition: OpDefinition.h:271

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getResult
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407

mlir::RewritePatternSet
Definition: PatternMatch.h:771

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:681

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:500

mlir::TypeConverter::addConversion
void addConversion(FnT &&callback)
Register a conversion function.
Definition: DialectConversion.h:161

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Type::getIntOrFloatBitWidth
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition: Types.cpp:122

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::arith::NarrowTypeEmulationConverter
Converts narrow integer or float types that are not supported by the target hardware to wider types.
Definition: NarrowTypeEmulationConverter.h:20

mlir::arith::NarrowTypeEmulationConverter::getLoadStoreBitwidth
unsigned getLoadStoreBitwidth() const
Definition: NarrowTypeEmulationConverter.h:24

Arith.h

MemRef.h

Transforms.h

BuiltinTypes.h

mlir::affine::makeComposedFoldedAffineApply
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1322

mlir::memref::populateMemRefNarrowTypeEmulationConversions
void populateMemRefNarrowTypeEmulationConversions(arith::NarrowTypeEmulationConverter &typeConverter)
Appends type conversions for emulating memref operations over narrow types with ops over wider types.
Definition: EmulateNarrowType.cpp:618

mlir::memref::getLinearizedMemRefOffsetAndSize
std::pair< LinearizedMemRefInfo, OpFoldResult > getLinearizedMemRefOffsetAndSize(OpBuilder &builder, Location loc, int srcBits, int dstBits, OpFoldResult offset, ArrayRef< OpFoldResult > sizes, ArrayRef< OpFoldResult > strides, ArrayRef< OpFoldResult > indices={})
Definition: MemRefUtils.cpp:52

mlir::memref::isStaticShapeAndContiguousRowMajor
bool isStaticShapeAndContiguousRowMajor(MemRefType type)
Returns true, if the memref type has static shapes and represents a contiguous chunk of memory.
Definition: MemRefUtils.cpp:24

mlir::memref::populateResolveExtractStridedMetadataPatterns
void populateResolveExtractStridedMetadataPatterns(RewritePatternSet &patterns)
Appends patterns for resolving memref.extract_strided_metadata into memref.extract_strided_metadata o...
Definition: ExpandStridedMetadata.cpp:1222

mlir::memref::populateMemRefNarrowTypeEmulationPatterns
void populateMemRefNarrowTypeEmulationPatterns(const arith::NarrowTypeEmulationConverter &typeConverter, RewritePatternSet &patterns)
Appends patterns for emulating memref operations over narrow types with ops over wider types.
Definition: EmulateNarrowType.cpp:585

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::bindSymbols
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325

mlir::getValueOrCreateConstantIndexOp
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:112

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::getMixedValues
SmallVector< OpFoldResult > getMixedValues(ArrayRef< int64_t > staticValues, ValueRange dynamicValues, MLIRContext *context)
Return a vector of OpFoldResults with the same size a staticValues, but all elements for which Shaped...
Definition: StaticValueUtils.cpp:187

mlir::memref::LinearizedMemRefInfo
For a memref with offset, sizes and strides, returns the offset, size, and potentially the size padde...
Definition: MemRefUtils.h:50

mlir::memref::LinearizedMemRefInfo::linearizedSize
OpFoldResult linearizedSize
Definition: MemRefUtils.h:52