doxygen/ConvertToDestinationStyle%5F8cpp%5Fsource.html

 //===- ConvertToDestinationStyle.cpp - Convert non-DPS to DPS ops ---------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file contains patterns to convert non-DPS ops to DPS ops. New

 // tensor.empty ops are inserted as a destination. Such tensor.empty can be

 // eliminated with "empty tensor elimination", allowing them to bufferize

 // without an allocation (assuming there are no further conflicts).

 //

 //===----------------------------------------------------------------------===//

 //

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"

 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"

 #include "mlir/Dialect/Linalg/IR/Linalg.h"

 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"

 #include "mlir/Dialect/Tensor/IR/Tensor.h"

 #include "mlir/Dialect/Utils/StaticValueUtils.h"

 #include "mlir/IR/Matchers.h"

 #include "mlir/IR/PatternMatch.h"

 #include "llvm/ADT/STLExtras.h"


 using namespace mlir;

 using namespace mlir::tensor;


 // Implements backtracking to traverse indices of the output buffer while

 // iterating over op.elements().

 static Value createInserts(RewriterBase &rewriter, Location loc, int dim,

                            Value destination, ArrayRef<int64_t> shape,

                            ArrayRef<Value> constants,

                            OperandRange::iterator &elementIt,

                            SmallVectorImpl<Value> &indices) {

   if (dim == static_cast<int>(shape.size()) - 1) {

     for (int i = 0; i < shape.back(); ++i) {

       indices.back() = constants[i];

       destination = tensor::InsertOp::create(rewriter, loc, *elementIt,

                                              destination, indices);

       ++elementIt;

     }

     return destination;

   }

   for (int i = 0; i < shape[dim]; ++i) {

     indices[dim] = constants[i];

     destination = createInserts(rewriter, loc, dim + 1, destination, shape,

                                 constants, elementIt, indices);

   }

   return destination;

 }


 /// Create a memcpy from the given source tensor to the given destination

 /// memref. The copy op type can be specified in the `options`.

 static void createMemcpy(OpBuilder &b, Location loc, Value tensorSource,

                          Value memrefDest,

                          const linalg::BufferizeToAllocationOptions &options) {

   auto tensorType = dyn_cast<RankedTensorType>(tensorSource.getType());

   assert(tensorType && "expected ranked tensor");

   assert(isa<MemRefType>(memrefDest.getType()) && "expected ranked memref");


   switch (options.memcpyOp) {

   case linalg::BufferizeToAllocationOptions::MemcpyOp::

       MaterializeInDestination: {

     // Note: This is the preferred way of memcpy'ing because no layout map

     // and/or memory space must be specified for the source.

     auto materializeOp = bufferization::MaterializeInDestinationOp::create(

         b, loc, tensorSource, memrefDest);

     materializeOp.setWritable(true);

   } break;

   case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy: {

     // TODO: Support custom memory space on source.

     // We do not know the layout map of the source yet, so use a fully dynamic

     // layout for best compatibility.

     Value toBuffer = bufferization::ToBufferOp::create(

         b, loc, bufferization::getMemRefTypeWithFullyDynamicLayout(tensorType),

         tensorSource, /*readOnly=*/true);

     memref::CopyOp::create(b, loc, toBuffer, memrefDest);

   } break;

   case linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy: {

     // TODO: Support custom memory space on source.

     // We do not know the layout map of the source yet, so use a fully dynamic

     // layout for best compatibility.

     Value toBuffer = bufferization::ToBufferOp::create(

         b, loc, bufferization::getMemRefTypeWithFullyDynamicLayout(tensorType),

         tensorSource, /*readOnly=*/true);

     linalg::CopyOp::create(b, loc, toBuffer, memrefDest);

   } break;

   };

 }


 static Operation *movePaddingToFillOrGenericOp(RewriterBase &rewriter,

                                                Location loc, PadOp padOp,

                                                Value dest) {

   OpBuilder::InsertionGuard g(rewriter);

   RankedTensorType resultType = padOp.getResultType();


   // Examine the yielded value to decide if a linalg.generic is neede or a

   // linalg.fill is sufficient.

   Value yieldedValue =

       cast<tensor::YieldOp>(padOp.getBody()->getTerminator()).getValue();

   Attribute constYieldedValue;

   // Is the yielded value a bbArg defined outside of the PadOp?

   bool outsideBbArg =

       isa<BlockArgument>(yieldedValue) &&

       cast<BlockArgument>(yieldedValue).getOwner()->getParentOp() !=

           padOp.getOperation();

   // Is the yielded value an OpResult defined outside of the PadOp?

   bool outsideOpResult =

       isa<OpResult>(yieldedValue) &&

       yieldedValue.getDefiningOp()->getParentOp() != padOp.getOperation();

   bool invariantYieldedValue = outsideBbArg || outsideOpResult;

   if (matchPattern(yieldedValue, m_Constant(&constYieldedValue))) {

     // Padding with a constant: Create linalg.fill.

     Dialect *arithDialect =

         rewriter.getContext()->getLoadedDialect<arith::ArithDialect>();

     Value fillValue =

         arithDialect

             ->materializeConstant(rewriter, constYieldedValue,

                                   yieldedValue.getType(), yieldedValue.getLoc())

             ->getResult(0);

     auto fillOp = linalg::FillOp::create(rewriter, loc, ValueRange(fillValue),

                                          ValueRange(dest));

     return fillOp;

   }


   if (invariantYieldedValue) {

     // Padding with an invariant value.

     auto fillOp = linalg::FillOp::create(

         rewriter, loc, ValueRange(yieldedValue), ValueRange(dest));

     return fillOp;

   }


   // Create linalg.generic.

   SmallVector<utils::IteratorType> iteratorTypes(resultType.getRank(),

                                                  utils::IteratorType::parallel);

   SmallVector<AffineMap> indexingMaps(

       1, rewriter.getMultiDimIdentityMap(resultType.getRank()));

   auto genericOp = linalg::GenericOp::create(

       rewriter, loc, resultType, /*inputs=*/ValueRange(),

       /*outputs=*/ValueRange{dest}, /*indexingMaps=*/

       indexingMaps, iteratorTypes);

   Block *body = rewriter.createBlock(&genericOp->getRegion(0), {},

                                      resultType.getElementType(), loc);

   rewriter.setInsertionPointToStart(body);

   SmallVector<Value> bbArgReplacements;

   for (int64_t i = 0; i < resultType.getRank(); ++i)

     bbArgReplacements.push_back(linalg::IndexOp::create(rewriter, loc, i));

   rewriter.mergeBlocks(padOp.getBody(), body, bbArgReplacements);


   // Update terminator.

   auto yieldOp = cast<tensor::YieldOp>(body->getTerminator());

   rewriter.replaceOpWithNewOp<linalg::YieldOp>(yieldOp, yieldOp.getValue());

   return genericOp;

 }


 static SmallVector<Value> reifyOrComputeDynamicSizes(OpBuilder &b,

                                                      Value value) {

   auto tensorType = cast<RankedTensorType>(value.getType());

   if (tensorType.hasStaticShape())

     return {};


   // Try to reify dynamic sizes.

   ReifiedRankedShapedTypeDims reifiedShape;

   if (isa<OpResult>(value) &&

       succeeded(reifyResultShapes(b, value.getDefiningOp(), reifiedShape))) {

     SmallVector<Value> dynSizes;

     for (int64_t i = 0; i < tensorType.getRank(); ++i) {

       if (tensorType.isDynamicDim(i))

         dynSizes.push_back(cast<Value>(

             reifiedShape[cast<OpResult>(value).getResultNumber()][i]));

     }

     return dynSizes;

   }


   // Create tensor.dim ops.

   SmallVector<Value> dynSizes;

   for (int64_t i = 0; i < tensorType.getRank(); ++i) {

     if (tensorType.isDynamicDim(i))

       dynSizes.push_back(

           DimOp::create(b, value.getLoc(), value,

                         arith::ConstantIndexOp::create(b, value.getLoc(), i)));

   }

   return dynSizes;

 }


 static Value

 createAllocationForTensor(RewriterBase &rewriter, Location loc, Value value,

                           const linalg::BufferizeToAllocationOptions &options,

                           Attribute memorySpace = {}) {

   OpBuilder::InsertionGuard g(rewriter);

   auto tensorType = cast<RankedTensorType>(value.getType());


   // Create buffer allocation.

   auto memrefType =

       cast<MemRefType>(bufferization::getMemRefTypeWithStaticIdentityLayout(

           tensorType, memorySpace));

   SmallVector<Value> dynamicSizes = reifyOrComputeDynamicSizes(rewriter, value);


   Value alloc;

   if (options.allocOp ==

       linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloc) {

     alloc = memref::AllocOp::create(rewriter, loc, memrefType, dynamicSizes);

     if (options.emitDealloc) {

       // Place deallocation at the end of the block.

       rewriter.setInsertionPoint(rewriter.getInsertionBlock()->getTerminator());

       memref::DeallocOp::create(rewriter, loc, alloc);

     }

   } else if (options.allocOp ==

              linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloca) {

     alloc = memref::AllocaOp::create(rewriter, loc, memrefType, dynamicSizes);

     // No dealloc is needed.

   }


   return alloc;

 }


 Value linalg::bufferizeToAllocation(

     RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,

     PadOp padOp, Attribute memorySpace, Operation *insertionPoint) {

   // tensor.pad does not have a destination operand.

   assert(!options.bufferizeDestinationOnly && "invalid options");


   OpBuilder::InsertionGuard g(rewriter);

   rewriter.setInsertionPoint(insertionPoint ? insertionPoint : padOp);

   Location loc = padOp.getLoc();


   // Create buffer allocation.

   Value alloc = createAllocationForTensor(rewriter, loc, padOp.getResult(),

                                           options, memorySpace);

   rewriter.setInsertionPoint(padOp);


   if (!padOp.hasZeroLowPad() || !padOp.hasZeroHighPad()) {

     // Create linalg.fill or linalg.generic. Not needed if there is no padding.

     Operation *fillOp =

         movePaddingToFillOrGenericOp(rewriter, loc, padOp, alloc);

     rewriter.setInsertionPointAfter(fillOp);

   }


   // Create memcpy.

   SmallVector<OpFoldResult> sizes =

       getMixedSizes(rewriter, loc, padOp.getSource());

   SmallVector<OpFoldResult> strides(padOp.getResultType().getRank(),

                                     rewriter.getIndexAttr(1));

   Value subview = memref::SubViewOp::create(

       rewriter, loc, alloc, /*offsets=*/padOp.getMixedLowPad(), sizes, strides);

   createMemcpy(rewriter, loc, padOp.getSource(), subview, options);


   // Create bufferization.to_tensor with "restrict" and "writable". The returned

   // tensor is a new buffer allocation, so it does not alias with any buffer.

   Value toTensorOp = bufferization::ToTensorOp::create(

       rewriter, loc, padOp.getResult().getType(), alloc, /*restrict=*/true,

       /*writable=*/true);

   rewriter.replaceOp(padOp, toTensorOp);

   return alloc;

 }


 Value linalg::bufferizeToAllocation(

     RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,

     vector::MaskOp maskOp, Attribute memorySpace, Operation *insertionPoint) {

   assert(llvm::range_size(maskOp.getMaskBlock()->without_terminator()) == 1 &&

          "expected single masked op");

   OpBuilder::InsertionGuard g(rewriter);


   // Should the bufferization options and state be function arguments?

   bufferization::BufferizationOptions bufferizationOptions;

   bufferization::BufferizationState bufferizationState;


   Operation *yieldOp = maskOp.getMaskRegion().front().getTerminator();

   assert(isa<vector::YieldOp>(yieldOp) && "expected yield op terminator");


   // Bufferize maskable op. By default, place the buffer allocation right before

   // the mask op.

   Value alloc = bufferizeToAllocation(

       rewriter, options, maskOp.getMaskableOp(), memorySpace,

       /*insertionPoint=*/insertionPoint ? insertionPoint : maskOp);


   if (options.bufferizeDestinationOnly)

     return alloc;


   // Bufferize terminator.

   rewriter.setInsertionPoint(yieldOp);

   if (failed(cast<bufferization::BufferizableOpInterface>(yieldOp).bufferize(

           rewriter, bufferizationOptions, bufferizationState)))

     return nullptr;


   // Erase dead to_tensor ops inside of the mask op. This is necessary because

   // there only be one op (apart from the terminator) inside the mask op.

   // TODO: Remove dead to_tensor ops more aggressively during bufferization.

   SmallVector<Operation *> toTensorOps;

   maskOp.walk([&](bufferization::ToTensorOp toTensorOp) {

     if (toTensorOp->getUses().empty())

       toTensorOps.push_back(toTensorOp.getOperation());

   });

   for (Operation *op : toTensorOps)

     rewriter.eraseOp(op);


   // Bufferize mask op.

   SmallVector<OpOperand *> resultUses;

   for (Value result : maskOp.getResults())

     if (isa<TensorType>(result.getType()))

       for (OpOperand &use : result.getUses())

         resultUses.push_back(&use);

   rewriter.setInsertionPoint(maskOp);

   if (failed(

           cast<bufferization::BufferizableOpInterface>(maskOp.getOperation())

               .bufferize(rewriter, bufferizationOptions, bufferizationState)))

     return nullptr;


   // Set "restrict" attribute, indicating that no other tensor aliases with

   // this tensor. That is because we just allocated a new buffer for the tensor.

   for (OpOperand *resultUse : resultUses) {

     auto toTensorOp =

         resultUse->get().getDefiningOp<bufferization::ToTensorOp>();

     assert(toTensorOp && "expected to_tensor op");

     rewriter.modifyOpInPlace(toTensorOp, [&]() {

       toTensorOp.setRestrict(true);

       toTensorOp.setWritable(true);

     });

   }


   return alloc;

 }


 Value linalg::bufferizeToAllocation(

     RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,

     bufferization::AllocTensorOp allocTensorOp, Attribute memorySpace,

     Operation *insertionPoint) {

   Location loc = allocTensorOp.getLoc();

   OpBuilder::InsertionGuard g(rewriter);

   rewriter.setInsertionPoint(insertionPoint ? insertionPoint : allocTensorOp);

   bufferization::BufferizationOptions bufferizationOptions;


   // Create buffer allocation.

   Value alloc = createAllocationForTensor(

       rewriter, loc, allocTensorOp.getResult(), options, memorySpace);


   // Create bufferization.to_tensor with "restrict" and "writable". The returned

   // tensor is a new buffer allocation, so it does not alias with any buffer.

   Value toTensorOp = bufferization::ToTensorOp::create(

       rewriter, loc, allocTensorOp.getResult().getType(), alloc,

       /*restrict=*/true,

       /*writable=*/true);

   rewriter.replaceOp(allocTensorOp, toTensorOp);

   return alloc;

 }


 /// Lower tensor.from_elements to a sequence of chained tensor.insert.

 FailureOr<Operation *> mlir::linalg::rewriteInDestinationPassingStyle(

     RewriterBase &rewriter, tensor::FromElementsOp fromElementsOp) {

   Location loc = fromElementsOp.getLoc();

   RankedTensorType tensorType =

       cast<RankedTensorType>(fromElementsOp.getType());

   auto shape = tensorType.getShape();


   // Create tensor.empty.

   auto emptyOp = EmptyOp::create(rewriter, loc, tensorType, ValueRange());


   // Case: tensor<elem_type>.

   if (shape.empty()) {

     Operation *res = rewriter.replaceOpWithNewOp<tensor::InsertOp>(

         fromElementsOp, fromElementsOp.getElements().front(),

         emptyOp.getResult(), ValueRange());

     return res;

   }


   // Create constants for the range of possible indices [0, max{shape_i}).

   auto maxDim = *llvm::max_element(shape);

   SmallVector<Value, 2> constants;

   constants.reserve(maxDim);

   for (int i = 0; i < maxDim; ++i)

     constants.push_back(arith::ConstantIndexOp::create(rewriter, loc, i));


   // Traverse all elements and create tensor.insert ops.

   auto elementIt = fromElementsOp.getElements().begin();

   SmallVector<Value, 2> indices(tensorType.getRank(), constants[0]);

   Value result = createInserts(rewriter, loc, /*dim=*/0, emptyOp.getResult(),

                                shape, constants, elementIt, indices);


   // Replace tensor.from_elements.

   rewriter.replaceOp(fromElementsOp, result);

   return result.getDefiningOp();

 }


 /// Lower tensor.generate to linalg.generic.

 FailureOr<Operation *>

 mlir::linalg::rewriteInDestinationPassingStyle(RewriterBase &rewriter,

                                                tensor::GenerateOp generateOp) {

   // Only ops with exactly one block are supported.

   if (!generateOp.getBody().hasOneBlock())

     return failure();


   Location loc = generateOp.getLoc();

   RankedTensorType tensorType = cast<RankedTensorType>(generateOp.getType());


   // Create tensor.empty.

   auto emptyOp = EmptyOp::create(rewriter, loc, tensorType,

                                  generateOp.getDynamicExtents());


   // Create linalg.generic.

   SmallVector<utils::IteratorType> iteratorTypes(tensorType.getRank(),

                                                  utils::IteratorType::parallel);

   SmallVector<AffineMap> indexingMaps(

       1, rewriter.getMultiDimIdentityMap(tensorType.getRank()));

   auto genericOp = linalg::GenericOp::create(

       rewriter, loc, tensorType, /*inputs=*/ValueRange(),

       /*outputs=*/ValueRange{emptyOp.getResult()}, /*indexingMaps=*/

       indexingMaps, iteratorTypes);

   Block *body = rewriter.createBlock(&genericOp->getRegion(0), {},

                                      tensorType.getElementType(), loc);

   rewriter.setInsertionPointToStart(body);

   SmallVector<Value> bbArgReplacements;

   for (int64_t i = 0; i < tensorType.getRank(); ++i)

     bbArgReplacements.push_back(linalg::IndexOp::create(rewriter, loc, i));

   rewriter.mergeBlocks(&generateOp.getBody().front(), body, bbArgReplacements);


   // Update terminator.

   auto yieldOp = cast<tensor::YieldOp>(body->getTerminator());

   rewriter.replaceOpWithNewOp<linalg::YieldOp>(yieldOp, yieldOp.getValue());


   // Replace tensor.generate.

   rewriter.replaceOp(generateOp, genericOp->getResult(0));

   return genericOp.getOperation();

 }


 /// Lower tensor.pad to linalg.generic + tensor.insert_slice.

 FailureOr<Operation *>

 mlir::linalg::rewriteInDestinationPassingStyle(RewriterBase &rewriter,

                                                tensor::PadOp padOp) {

   // Only ops with exactly one block are supported.

   if (!padOp.getBodyRegion().hasOneBlock())

     return failure();


   // Create tensor.empty.

   Location loc = padOp.getLoc();

   RankedTensorType resultType = padOp.getResultType();

   ReifiedRankedShapedTypeDims reifiedShape;

   if (failed(reifyResultShapes(rewriter, padOp, reifiedShape)))

     return rewriter.notifyMatchFailure(

         padOp, "failed to reify tensor.pad op result shape");

   SmallVector<Value> dynamicSizes;

   for (int64_t i = 0; i < resultType.getRank(); ++i)

     if (resultType.isDynamicDim(i))

       dynamicSizes.push_back(cast<Value>(reifiedShape[0][i]));


   // If the `padOp` has a nofold attribute and all paddings are known to be 0,

   // explicitly insert a `linalg.copy`.

   if (padOp.getNofoldAttr() &&

       llvm::all_of(padOp.getMixedLowPad(), isZeroInteger) &&

       llvm::all_of(padOp.getMixedHighPad(), isZeroInteger)) {

     using bufferization::AllocTensorOp;

     Value allocated =

         AllocTensorOp::create(rewriter, loc, resultType, dynamicSizes);

     auto copyOp = rewriter.replaceOpWithNewOp<linalg::CopyOp>(

         padOp, padOp.getSource(), allocated);

     return copyOp.getOperation();

   }


   Value empty = EmptyOp::create(rewriter, loc, resultType, dynamicSizes);

   // Create linalg.fill or linalg.generic.

   Operation *fillOp = movePaddingToFillOrGenericOp(rewriter, loc, padOp, empty);

   rewriter.setInsertionPointAfter(fillOp);


   // Create tensor::InsertSliceOp.

   SmallVector<OpFoldResult> sliceSizes =

       getMixedSizes(rewriter, loc, padOp.getSource());

   SmallVector<OpFoldResult> sliceStrides(resultType.getRank(),

                                          rewriter.getIndexAttr(1));

   auto insertSliceOp = rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(

       padOp, padOp.getSource(), fillOp->getResult(0),

       /*offsets=*/padOp.getMixedLowPad(), sliceSizes, sliceStrides);

   return insertSliceOp.getOperation();

 }


 Value linalg::bufferizeToAllocation(

     RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,

     Operation *op, Attribute memorySpace, Operation *insertionPoint) {

   using namespace bufferization;


   // Call specialized overload for certain ops.

   if (auto padOp = dyn_cast<tensor::PadOp>(op))

     return bufferizeToAllocation(rewriter, options, padOp, memorySpace);

   if (auto maskOp = dyn_cast<vector::MaskOp>(op))

     return bufferizeToAllocation(rewriter, options, maskOp, memorySpace);

   if (auto allocTensorOp = dyn_cast<bufferization::AllocTensorOp>(op))

     return bufferizeToAllocation(rewriter, options, allocTensorOp, memorySpace);


   // Only bufferizable ops are supported.

   auto bufferizableOp = dyn_cast<BufferizableOpInterface>(op);

   if (!bufferizableOp)

     return nullptr;


   // Should the bufferization options and states be function arguments?

   BufferizationOptions bufferizationOptions;

   AnalysisState analysisState(bufferizationOptions);

   BufferizationState bufferizationState;


 #ifndef NDEBUG

   if (!options.bufferizeDestinationOnly) {

     // Ops with nested tensor ops are not supported yet. At the moment, this

     // function just bufferizes the given op itself, but not its body.

     op->walk([&](Operation *nestedOp) {

       if (op == nestedOp)

         return;

       if (llvm::any_of(nestedOp->getOperands(),

                        [](Value v) { return isa<TensorType>(v.getType()); }))

         llvm_unreachable("ops with nested tensor ops are not supported yet");

       if (llvm::any_of(nestedOp->getResults(),

                        [](Value v) { return isa<TensorType>(v.getType()); }))

         llvm_unreachable("ops with nested tensor ops are not supported yet");

     });

   }

 #endif // NDEBUG


   // Gather tensor results.

   SmallVector<OpResult> tensorResults;

   for (OpResult result : op->getResults()) {

     if (!isa<TensorType>(result.getType()))

       continue;

     // Unranked tensors are not supported

     if (!isa<RankedTensorType>(result.getType()))

       return nullptr;

     // Ops that bufferize to an allocation are not supported.

     if (bufferizableOp.bufferizesToAllocation(result))

       return nullptr;

     tensorResults.push_back(result);

   }


   // Gather all operands that should bufferize to a new allocation. I.e.,

   // bufferize out-of-place.

   SmallVector<OpOperand *> outOfPlaceOperands, resultUses;

   auto addOutOfPlaceOperand = [&](OpOperand *operand) {

     if (!llvm::is_contained(outOfPlaceOperands, operand))

       outOfPlaceOperands.push_back(operand);

   };

   for (OpResult result : tensorResults) {

     AliasingOpOperandList aliasingOperands =

         analysisState.getAliasingOpOperands(result);

     for (const AliasingOpOperand &operand : aliasingOperands) {

       addOutOfPlaceOperand(operand.opOperand);

       for (OpOperand &resultUse : result.getUses())

         resultUses.push_back(&resultUse);

     }

   }

   for (OpOperand &operand : op->getOpOperands()) {

     if (!analysisState.bufferizesToMemoryWrite(operand))

       continue;

     if (!isa<RankedTensorType>(operand.get().getType()))

       continue;

     addOutOfPlaceOperand(&operand);

   }

   // TODO: Support multiple buffers.

   if (outOfPlaceOperands.size() != 1)

     return nullptr;


   // Allocate buffers.

   OpBuilder::InsertionGuard g(rewriter);

   rewriter.setInsertionPoint(insertionPoint ? insertionPoint : op);

   SmallVector<Value> allocs;

   for (OpOperand *operand : outOfPlaceOperands) {

     Value alloc = createAllocationForTensor(

         rewriter, op->getLoc(), operand->get(), options, memorySpace);

     allocs.push_back(alloc);

     if (!analysisState.findDefinitions(operand).empty()) {

       // Initialize buffer with a copy of the operand data. Not needed if the

       // tensor is uninitialized.

       createMemcpy(rewriter, op->getLoc(), operand->get(), alloc, options);

     }

     rewriter.modifyOpInPlace(op, [&]() {

       auto toTensorOp = ToTensorOp::create(rewriter, op->getLoc(),

                                            operand->get().getType(), alloc);

       operand->set(toTensorOp);

       if (options.bufferizeDestinationOnly) {

         rewriter.modifyOpInPlace(toTensorOp, [&]() {

           toTensorOp.setRestrict(true);

           toTensorOp.setWritable(true);

         });

       }

     });

   }


   if (options.bufferizeDestinationOnly)

     return allocs.front();


   // Bufferize the op.

   rewriter.setInsertionPoint(op);

   if (failed(bufferizableOp.bufferize(rewriter, bufferizationOptions,

                                       bufferizationState)))

     return nullptr;


   // Set "restrict" attribute, indicating that no other tensor aliases with

   // this tensor. That is because we just allocated a new buffer for the tensor.

   for (OpOperand *resultUse : resultUses) {

     auto toTensorOp = resultUse->get().getDefiningOp<ToTensorOp>();

     assert(toTensorOp && "expected to_tensor op");

     rewriter.modifyOpInPlace(toTensorOp, [&]() {

       toTensorOp.setRestrict(true);

       toTensorOp.setWritable(true);

     });

   }

   return allocs.front();

 }


 namespace {


 template <typename OpTy>

 LogicalResult rewriteOpInDestinationPassingStyle(OpTy op,

                                                  PatternRewriter &rewriter) {

   return linalg::rewriteInDestinationPassingStyle(rewriter, op);

 }


 } // namespace


 void linalg::populateConvertToDestinationStylePatterns(

     RewritePatternSet &patterns) {

   patterns.add(rewriteOpInDestinationPassingStyle<tensor::FromElementsOp>);

   patterns.add(rewriteOpInDestinationPassingStyle<tensor::GenerateOp>);

   patterns.add(rewriteOpInDestinationPassingStyle<tensor::PadOp>);

 }

BufferizableOpInterface.h

Bufferization.h

movePaddingToFillOrGenericOp
static Operation * movePaddingToFillOrGenericOp(RewriterBase &rewriter, Location loc, PadOp padOp, Value dest)
Definition: ConvertToDestinationStyle.cpp:93

createAllocationForTensor
static Value createAllocationForTensor(RewriterBase &rewriter, Location loc, Value value, const linalg::BufferizeToAllocationOptions &options, Attribute memorySpace={})
Definition: ConvertToDestinationStyle.cpp:189

createMemcpy
static void createMemcpy(OpBuilder &b, Location loc, Value tensorSource, Value memrefDest, const linalg::BufferizeToAllocationOptions &options)
Create a memcpy from the given source tensor to the given destination memref.
Definition: ConvertToDestinationStyle.cpp:56

reifyOrComputeDynamicSizes
static SmallVector< Value > reifyOrComputeDynamicSizes(OpBuilder &b, Value value)
Definition: ConvertToDestinationStyle.cpp:158

createInserts
static Value createInserts(RewriterBase &rewriter, Location loc, int dim, Value destination, ArrayRef< int64_t > shape, ArrayRef< Value > constants, OperandRange::iterator &elementIt, SmallVectorImpl< Value > &indices)
Definition: ConvertToDestinationStyle.cpp:32

Matchers.h

options
static llvm::ManagedStatic< PassManagerOptions > options
Definition: PassManagerOptions.cpp:89

PatternMatch.h

StaticValueUtils.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVectorImpl
Definition: LLVM.h:74

llvm::SmallVector
Definition: LLVM.h:72

mlir::AnalysisState
Base class for generic analysis states.
Definition: DataFlowFramework.h:488

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::Block
Block represents an ordered list of Operations.
Definition: Block.h:33

mlir::Block::getTerminator
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:244

mlir::Builder::getIndexAttr
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:103

mlir::Builder::getMultiDimIdentityMap
AffineMap getMultiDimIdentityMap(unsigned rank)
Definition: Builders.cpp:382

mlir::Builder::getContext
MLIRContext * getContext() const
Definition: Builders.h:55

mlir::Dialect
Dialects are groups of MLIR operations, types and attributes, as well as behavior associated with the...
Definition: Dialect.h:38

mlir::Dialect::materializeConstant
virtual Operation * materializeConstant(OpBuilder &builder, Attribute value, Type type, Location loc)
Registered hook to materialize a single constant operation from a given attribute value with the desi...
Definition: Dialect.h:83

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext::getLoadedDialect
Dialect * getLoadedDialect(StringRef name)
Get a registered IR dialect with the given namespace.
Definition: MLIRContext.cpp:432

mlir::OpBuilder::InsertionGuard
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpBuilder::createBlock
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Definition: Builders.cpp:425

mlir::OpBuilder::setInsertionPointToStart
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429

mlir::OpBuilder::setInsertionPoint
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396

mlir::OpBuilder::setInsertionPointAfter
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:410

mlir::OpBuilder::getInsertionBlock
Block * getInsertionBlock() const
Return the block the current insertion point belongs to.
Definition: Builders.h:440

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:257

mlir::OpResult
This is a value defined by a result of an operation.
Definition: Value.h:447

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::getResult
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition: Operation.h:407

mlir::Operation::walk
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797

mlir::Operation::getLoc
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223

mlir::Operation::getParentOp
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234

mlir::Operation::getOpOperands
MutableArrayRef< OpOperand > getOpOperands()
Definition: Operation.h:383

mlir::Operation::getOperands
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378

mlir::Operation::getResults
result_range getResults()
Definition: Operation.h:415

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:783

mlir::RewritePatternSet
Definition: PatternMatch.h:806

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition: PatternMatch.h:716

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition: PatternMatch.cpp:127

mlir::RewriterBase::eraseOp
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Definition: PatternMatch.cpp:155

mlir::RewriterBase::mergeBlocks
void mergeBlocks(Block *source, Block *dest, ValueRange argValues={})
Inline the operations of block 'source' into the end of block 'dest'.
Definition: PatternMatch.cpp:343

mlir::RewriterBase::modifyOpInPlace
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:628

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition: PatternMatch.h:519

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::Value::getLoc
Location getLoc() const
Return the location of this value.
Definition: Value.cpp:24

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18

mlir::arith::ConstantIndexOp::create
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition: ArithOps.cpp:359

mlir::bufferization::BufferizationState
BufferizationState provides information about the state of the IR during the bufferization process.
Definition: BufferizableOpInterface.h:584

Arith.h

Linalg.h

Transforms.h

Tensor.h

mlir::bufferization::getMemRefTypeWithStaticIdentityLayout
BaseMemRefType getMemRefTypeWithStaticIdentityLayout(TensorType tensorType, Attribute memorySpace=nullptr)
Return a MemRef type with a static identity layout (i.e., no layout map).
Definition: BufferizableOpInterface.cpp:843

mlir::bufferization::AliasingOpOperandList
AliasList< AliasingOpOperand > AliasingOpOperandList
A list of possible aliasing OpOperands.
Definition: BufferizableOpInterface.h:95

mlir::bufferization::getMemRefTypeWithFullyDynamicLayout
BaseMemRefType getMemRefTypeWithFullyDynamicLayout(TensorType tensorType, Attribute memorySpace=nullptr)
Return a MemRef type with fully dynamic layout.
Definition: BufferizableOpInterface.cpp:819

mlir::linalg::bufferizeToAllocation
Value bufferizeToAllocation(RewriterBase &rewriter, const BufferizeToAllocationOptions &options, tensor::PadOp padOp, Attribute memorySpace={}, Operation *insertionPoint=nullptr)
Materialize a buffer allocation for the given tensor.pad op and lower the op to linalg....

mlir::linalg::rewriteInDestinationPassingStyle
FailureOr< Operation * > rewriteInDestinationPassingStyle(RewriterBase &rewriter, tensor::FromElementsOp fromElementsOp)
Rewrite tensor.from_elements to linalg.generic.
Definition: ConvertToDestinationStyle.cpp:350

mlir::linalg::populateConvertToDestinationStylePatterns
void populateConvertToDestinationStylePatterns(RewritePatternSet &patterns)
Populate patterns that convert non-destination-style ops to destination style ops.
Definition: ConvertToDestinationStyle.cpp:615

mlir::tensor
Definition: BufferizationTransformOps.h:19

mlir::tensor::getMixedSizes
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition: TensorOps.cpp:70

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::matchPattern
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:490

mlir::reifyResultShapes
LogicalResult reifyResultShapes(OpBuilder &b, Operation *op, ReifiedRankedShapedTypeDims &reifiedReturnShapes)
Reify the shape of the result of an operation (typically in terms of the shape of its operands).
Definition: InferTypeOpInterface.cpp:27

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::isZeroInteger
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
Definition: StaticValueUtils.cpp:18

mlir::m_Constant
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369

mlir::bufferization::BufferizationOptions
Options for BufferizableOpInterface-based bufferization.
Definition: BufferizableOpInterface.h:253

mlir::linalg::BufferizeToAllocationOptions
Definition: Transforms.h:53

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy
@ MemrefCopy

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy
@ LinalgCopy

mlir::linalg::BufferizeToAllocationOptions::MemcpyOp::MaterializeInDestination
@ MaterializeInDestination

mlir::linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloc
@ MemrefAlloc

mlir::linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloca
@ MemrefAlloca