doxygen/KernelOutlining_8cpp_source.html

 //===- KernelOutlining.cpp - Implementation of GPU kernel outlining -------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements the GPU dialect kernel outlining pass.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/GPU/Transforms/Passes.h"


 #include "mlir/AsmParser/AsmParser.h"

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"

 #include "mlir/Dialect/DLTI/DLTI.h"

 #include "mlir/Dialect/Func/IR/FuncOps.h"

 #include "mlir/Dialect/GPU/IR/GPUDialect.h"

 #include "mlir/Dialect/GPU/Utils/GPUUtils.h"

 #include "mlir/Dialect/MemRef/IR/MemRef.h"

 #include "mlir/IR/Builders.h"

 #include "mlir/IR/BuiltinAttributes.h"

 #include "mlir/IR/IRMapping.h"

 #include "mlir/IR/Matchers.h"

 #include "mlir/IR/SymbolTable.h"

 #include "mlir/Support/LLVM.h"

 #include "mlir/Transforms/RegionUtils.h"

 #include <limits>


 namespace mlir {

 #define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONSPASS

 #define GEN_PASS_DEF_GPUKERNELOUTLININGPASS

 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"

 } // namespace mlir


 using namespace mlir;


 template <typename OpTy>

 static void createForAllDimensions(OpBuilder &builder, Location loc,

                                    SmallVectorImpl<Value> &values) {

   for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z})

     values.push_back(builder.create<OpTy>(loc, builder.getIndexType(), dim));

 }


 /// Adds operations generating block/thread ids and grid/block dimensions at the

 /// beginning of the `launchFuncOpBody` region. Add mapping from argument in

 /// entry block of `launchOpBody`, to the corresponding result value of the

 /// added operations.

 static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody,

                                      Region &launchOpBody, IRMapping &map,

                                      bool hasCluster = false) {

   OpBuilder builder(loc->getContext());

   Block &firstBlock = launchOpBody.front();

   builder.setInsertionPointToStart(&launchFuncOpBody.front());

   SmallVector<Value> indexOps;

   // The order is important here, as it must match the order of the arguments

   createForAllDimensions<gpu::BlockIdOp>(builder, loc, indexOps);

   createForAllDimensions<gpu::ThreadIdOp>(builder, loc, indexOps);

   createForAllDimensions<gpu::GridDimOp>(builder, loc, indexOps);

   createForAllDimensions<gpu::BlockDimOp>(builder, loc, indexOps);

   if (hasCluster) {

     createForAllDimensions<gpu::ClusterIdOp>(builder, loc, indexOps);

     createForAllDimensions<gpu::ClusterDimOp>(builder, loc, indexOps);

   }

   // Replace the leading 12 function args with the respective thread/block index

   // operations. Iterate backwards since args are erased and indices change.

   for (const auto &indexOp : enumerate(indexOps))

     map.map(firstBlock.getArgument(indexOp.index()), indexOp.value());

 }


 /// Identifies operations that are beneficial to sink into kernels. These

 /// operations may not have side-effects, as otherwise sinking (and hence

 /// duplicating them) is not legal.

 static bool isLikelyAnIndexComputation(Operation *op) {

   return matchPattern(op, m_Constant()) ||

          isa<memref::DimOp, arith::SelectOp, arith::CmpIOp>(op);

 }


 /// For a given operation `op`, computes whether it is beneficial to sink the

 /// operation into the kernel. An operation can be sunk if doing so does not

 /// introduce new kernel arguments. Whether a value is already available in the

 /// kernel (and hence does not introduce new arguments) is checked by

 /// querying `existingDependencies` and `availableValues`.

 /// If an operand is not yet available, we recursively check whether it can be

 /// made available by siking its defining op.

 /// Operations that are indentified for sinking are added to `beneficiaryOps` in

 /// the order they should appear in the kernel. Furthermore, `availableValues`

 /// is updated with results that will be available after sinking the identified

 /// ops.

 static bool extractBeneficiaryOps(

     Operation *op, const SetVector<Value> &existingDependencies,

     SetVector<Operation *> &beneficiaryOps,

     llvm::SmallPtrSetImpl<Value> &availableValues,

     llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {

   if (beneficiaryOps.count(op))

     return true;


   if (!isSinkingBeneficiary(op))

     return false;


   for (Value operand : op->getOperands()) {

     // It is already visible in the kernel, keep going.

     if (availableValues.count(operand))

       continue;

     // Else check whether it can be made available via sinking or already is a

     // dependency.

     Operation *definingOp = operand.getDefiningOp();

     if ((!definingOp || !extractBeneficiaryOps(definingOp, existingDependencies,

                                                beneficiaryOps, availableValues,

                                                isSinkingBeneficiary)) &&

         !existingDependencies.count(operand))

       return false;

   }

   // We will sink the operation, mark its results as now available.

   beneficiaryOps.insert(op);

   for (Value result : op->getResults())

     availableValues.insert(result);

   return true;

 }


 LogicalResult mlir::sinkOperationsIntoLaunchOp(

     gpu::LaunchOp launchOp,

     llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {

   assert(isSinkingBeneficiary);

   Region &launchOpBody = launchOp.getBody();


   // Identify uses from values defined outside of the scope of the launch

   // operation.

   SetVector<Value> sinkCandidates;

   getUsedValuesDefinedAbove(launchOpBody, sinkCandidates);


   SetVector<Operation *> toBeSunk;

   llvm::SmallPtrSet<Value, 4> availableValues;

   for (Value operand : sinkCandidates) {

     Operation *operandOp = operand.getDefiningOp();

     if (!operandOp)

       continue;

     extractBeneficiaryOps(operandOp, sinkCandidates, toBeSunk, availableValues,

                           isSinkingBeneficiary);

   }


   // Insert operations so that the defs get cloned before uses.

   IRMapping map;

   OpBuilder builder(launchOpBody);

   for (Operation *op : toBeSunk) {

     Operation *clonedOp = builder.clone(*op, map);

     // Only replace uses within the launch op.

     for (auto pair : llvm::zip(op->getResults(), clonedOp->getResults()))

       replaceAllUsesInRegionWith(std::get<0>(pair), std::get<1>(pair),

                                  launchOp.getBody());

   }

   return success();

 }


 /// Return the provided KernelDim3 as an array of i32 constants if possible.

 static DenseI32ArrayAttr maybeConstantDimsAttr(gpu::KernelDim3 dims) {

   SmallVector<int32_t, 3> constants;

   MLIRContext *ctx = dims.x.getContext();

   for (Value v : {dims.x, dims.y, dims.z}) {

     APInt constValue;

     if (!matchPattern(v, m_ConstantInt(&constValue)))

       return nullptr;

     // In the event someone called for a too-large block or grid dimension,

     // don't set bounds as it is likely to cause more confusing behavior.

     if (constValue.ugt(std::numeric_limits<uint32_t>::max()))

       return nullptr;

     constants.push_back(

         constValue.getLimitedValue(std::numeric_limits<uint32_t>::max()));

   }

   return DenseI32ArrayAttr::get(ctx, constants);

 }


 /// Outline the `gpu.launch` operation body into a kernel function. Replace

 /// `gpu.terminator` operations by `gpu.return` in the generated function.

 /// Set block and grid size bounds if known.

 static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,

                                             StringRef kernelFnName,

                                             SetVector<Value> &operands) {

   Location loc = launchOp.getLoc();

   // Create a builder with no insertion point, insertion will happen separately

   // due to symbol table manipulation.

   OpBuilder builder(launchOp.getContext());

   Region &launchOpBody = launchOp.getBody();


   // Identify uses from values defined outside of the scope of the launch

   // operation.

   getUsedValuesDefinedAbove(launchOpBody, operands);


   // Create the gpu.func operation.

   SmallVector<Type, 4> kernelOperandTypes;

   kernelOperandTypes.reserve(operands.size());

   for (Value operand : operands) {

     kernelOperandTypes.push_back(operand.getType());

   }

   FunctionType type =

       FunctionType::get(launchOp.getContext(), kernelOperandTypes, {});

   auto outlinedFunc = builder.create<gpu::GPUFuncOp>(

       loc, kernelFnName, type,

       TypeRange(ValueRange(launchOp.getWorkgroupAttributions())),

       TypeRange(ValueRange(launchOp.getPrivateAttributions())));

   outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),

                         builder.getUnitAttr());


   // If we can infer bounds on the grid and/or block sizes from the arguments

   // to the launch op, propagate them to the generated kernel. This is safe

   // because multiple launches with the same body are not deduplicated.

   if (auto blockBounds =

           maybeConstantDimsAttr(launchOp.getBlockSizeOperandValues()))

     outlinedFunc.setKnownBlockSizeAttr(blockBounds);

   if (auto gridBounds =

           maybeConstantDimsAttr(launchOp.getGridSizeOperandValues()))

     outlinedFunc.setKnownGridSizeAttr(gridBounds);


   IRMapping map;


   // Map the arguments corresponding to the launch parameters like blockIdx,

   // threadIdx, etc. If cluster is present, then we also generate clusterIdx and

   // clusterDim.

   Region &outlinedFuncBody = outlinedFunc.getBody();

   injectGpuIndexOperations(loc, outlinedFuncBody, launchOpBody, map,

                            launchOp.hasClusterSize());


   // Map memory attributions from the LaunOp op to the GPUFuncOp attributions.

   for (const auto &[launchArg, funcArg] :

        llvm::zip(launchOp.getWorkgroupAttributions(),

                  outlinedFunc.getWorkgroupAttributions()))

     map.map(launchArg, funcArg);

   for (const auto &[launchArg, funcArg] :

        llvm::zip(launchOp.getPrivateAttributions(),

                  outlinedFunc.getPrivateAttributions()))

     map.map(launchArg, funcArg);


   // Map arguments from gpu.launch region to the arguments of the gpu.func

   // operation.

   Block &entryBlock = outlinedFuncBody.front();

   for (const auto &operand : enumerate(operands))

     map.map(operand.value(), entryBlock.getArgument(operand.index()));


   // Clone the region of the gpu.launch operation into the gpu.func operation.

   launchOpBody.cloneInto(&outlinedFuncBody, map);


   // Replace the terminator op with returns.

   for (Block &block : launchOpBody) {

     Block *clonedBlock = map.lookup(&block);

     auto terminator = dyn_cast<gpu::TerminatorOp>(clonedBlock->getTerminator());

     if (!terminator)

       continue;

     OpBuilder replacer(terminator);

     replacer.create<gpu::ReturnOp>(terminator->getLoc());

     terminator->erase();

   }


   // Splice now the entry block of the gpu.launch operation at the end of the

   // gpu.func entry block and erase the redundant block.

   Block *clonedLaunchOpEntry = map.lookup(&launchOpBody.front());

   entryBlock.getOperations().splice(entryBlock.getOperations().end(),

                                     clonedLaunchOpEntry->getOperations());

   clonedLaunchOpEntry->erase();


   return outlinedFunc;

 }


 gpu::GPUFuncOp mlir::outlineKernelFunc(gpu::LaunchOp launchOp,

                                        StringRef kernelFnName,

                                        llvm::SmallVectorImpl<Value> &operands) {

   DenseSet<Value> inputOperandSet;

   inputOperandSet.insert_range(operands);

   SetVector<Value> operandSet(llvm::from_range, operands);

   auto funcOp = outlineKernelFuncImpl(launchOp, kernelFnName, operandSet);

   for (auto operand : operandSet) {

     if (!inputOperandSet.count(operand))

       operands.push_back(operand);

   }

   return funcOp;

 }


 /// Replace `gpu.launch` operations with an `gpu.launch_func` operation

 /// launching `kernelFunc`. The kernel func contains the body of the

 /// `gpu.launch` with constant region arguments inlined.

 static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,

                                   gpu::GPUFuncOp kernelFunc,

                                   ValueRange operands) {

   OpBuilder builder(launchOp);

   // The launch op has an optional dynamic shared memory size. If it doesn't

   // exist, we use zero.

   Value asyncToken = launchOp.getAsyncToken();

   std::optional<gpu::KernelDim3> clusterSize =

       launchOp.getClusterSizeOperandValues();

   auto launchFunc = builder.create<gpu::LaunchFuncOp>(

       launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),

       launchOp.getBlockSizeOperandValues(),

       launchOp.getDynamicSharedMemorySize(), operands,

       asyncToken ? asyncToken.getType() : nullptr,

       launchOp.getAsyncDependencies(), clusterSize);

   launchOp.replaceAllUsesWith(launchFunc);

   launchOp.erase();

 }


 namespace {

 /// Pass that moves ops which are likely an index computation into gpu.launch

 /// body.

 class GpuLaunchSinkIndexComputationsPass

     : public impl::GpuLaunchSinkIndexComputationsPassBase<

           GpuLaunchSinkIndexComputationsPass> {

 public:

   void runOnOperation() override {

     Operation *op = getOperation();

     if (op->walk([](gpu::LaunchOp launch) {

             // Pull in instructions that can be sunk

             if (failed(sinkOperationsIntoLaunchOp(launch,

                                                   isLikelyAnIndexComputation)))

               return WalkResult::interrupt();


             return WalkResult::advance();

           }).wasInterrupted())

       signalPassFailure();

   }

 };


 /// Pass that moves the kernel of each LaunchOp into its separate nested module.

 ///

 /// This pass moves the kernel code of each LaunchOp into a function created

 /// inside a nested module. It also creates an external function of the same

 /// name in the parent module.

 ///

 /// The gpu.modules are intended to be compiled to a cubin blob independently in

 /// a separate pass. The external functions can then be annotated with the

 /// symbol of the cubin accessor function.

 class GpuKernelOutliningPass

     : public impl::GpuKernelOutliningPassBase<GpuKernelOutliningPass> {

 public:

   using Base::Base;


   LogicalResult initialize(MLIRContext *context) override {

     // Initialize the data layout specification from the data layout string.

     if (!dataLayoutStr.empty()) {

       Attribute resultAttr = mlir::parseAttribute(dataLayoutStr, context);

       if (!resultAttr)

         return failure();


       dataLayoutSpec = dyn_cast<DataLayoutSpecInterface>(resultAttr);

       if (!dataLayoutSpec)

         return failure();

     }


     return success();

   }


   void runOnOperation() override {

     SymbolTable symbolTable(getOperation());

     bool modified = false;

     for (auto func : getOperation().getOps<SymbolOpInterface>()) {

       // Insert just after the function.

       Block::iterator insertPt(func->getNextNode());

       auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {

         SetVector<Value> operands;

         std::string kernelFnName;

         if (op.getKernelFunc()) {

           kernelFnName = op.getKernelFunc()->getRootReference().str();

         } else {

           kernelFnName =

               Twine(op->getParentOfType<SymbolOpInterface>().getName(),

                     "_kernel")

                   .str();

         }


         gpu::GPUFuncOp outlinedFunc =

             outlineKernelFuncImpl(op, kernelFnName, operands);


         // Create nested module and insert outlinedFunc. The module will

         // originally get the same name as the function, but may be renamed on

         // insertion into the parent module.

         auto kernelModule = createKernelModule(op, outlinedFunc, symbolTable);

         symbolTable.insert(kernelModule, insertPt);


         // Potentially changes signature, pulling in constants.

         convertToLaunchFuncOp(op, outlinedFunc, operands.getArrayRef());

         modified = true;

         return WalkResult::advance();

       });

       if (funcWalkResult.wasInterrupted())

         return signalPassFailure();

     }


     // If any new module was inserted in this module, annotate this module as

     // a container module.

     if (modified)

       getOperation()->setAttr(gpu::GPUDialect::getContainerModuleAttrName(),

                               UnitAttr::get(&getContext()));

   }


 private:

   /// Returns a gpu.module containing kernelFunc and all callees (recursive).

   gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp,

                                       gpu::GPUFuncOp kernelFunc,

                                       const SymbolTable &parentSymbolTable) {

     // TODO: This code cannot use an OpBuilder because it must be inserted into

     // a SymbolTable by the caller. SymbolTable needs to be refactored to

     // prevent manual building of Ops with symbols in code using SymbolTables

     // and then this needs to use the OpBuilder.

     auto *context = getOperation().getContext();

     OpBuilder builder(context);

     std::string kernelModuleName;

     gpu::GPUModuleOp kernelModule;

     if (gpuLaunchOp.getKernelModule()) {

       kernelModuleName =

           gpuLaunchOp.getKernelModule()->getRootReference().str();

       kernelModule =

           parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName);

     } else {

       kernelModuleName = kernelFunc.getName();

     }


     // Check if the module already exists in the symbol table

     if (!kernelModule) {

       // If not found, create a new GPU module

       kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),

                                                       kernelModuleName);

     }


     // If a valid data layout spec was provided, attach it to the kernel module.

     // Otherwise, the default data layout will be used.

     if (dataLayoutSpec)

       kernelModule->setAttr(DLTIDialect::kDataLayoutAttrName, dataLayoutSpec);


     SymbolTable symbolTable(kernelModule);

     symbolTable.insert(kernelFunc);


     SmallVector<Operation *, 8> symbolDefWorklist = {kernelFunc};

     while (!symbolDefWorklist.empty()) {

       if (std::optional<SymbolTable::UseRange> symbolUses =

               SymbolTable::getSymbolUses(symbolDefWorklist.pop_back_val())) {

         for (SymbolTable::SymbolUse symbolUse : *symbolUses) {

           StringRef symbolName =

               cast<FlatSymbolRefAttr>(symbolUse.getSymbolRef()).getValue();

           if (symbolTable.lookup(symbolName))

             continue;


           Operation *symbolDefClone =

               parentSymbolTable.lookup(symbolName)->clone();

           symbolDefWorklist.push_back(symbolDefClone);

           symbolTable.insert(symbolDefClone);

         }

       }

     }


     return kernelModule;

   }


   DataLayoutSpecInterface dataLayoutSpec;

 };


 } // namespace

AsmParser.h

Builders.h

ControlFlowOps.h

DLTI.h

Passes.h

FuncOps.h

GPUDialect.h

GPUUtils.h

IRMapping.h

getContext
static MLIRContext * getContext(OpFoldResult val)
Definition: IndexingUtils.cpp:295

maybeConstantDimsAttr
static DenseI32ArrayAttr maybeConstantDimsAttr(gpu::KernelDim3 dims)
Return the provided KernelDim3 as an array of i32 constants if possible.
Definition: KernelOutlining.cpp:158

outlineKernelFuncImpl
static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, StringRef kernelFnName, SetVector< Value > &operands)
Outline the gpu.launch operation body into a kernel function.
Definition: KernelOutlining.cpp:178

isLikelyAnIndexComputation
static bool isLikelyAnIndexComputation(Operation *op)
Identifies operations that are beneficial to sink into kernels.
Definition: KernelOutlining.cpp:76

convertToLaunchFuncOp
static void convertToLaunchFuncOp(gpu::LaunchOp launchOp, gpu::GPUFuncOp kernelFunc, ValueRange operands)
Replace gpu.launch operations with an gpu.launch_func operation launching kernelFunc.
Definition: KernelOutlining.cpp:282

createForAllDimensions
static void createForAllDimensions(OpBuilder &builder, Location loc, SmallVectorImpl< Value > &values)
Definition: KernelOutlining.cpp:41

injectGpuIndexOperations
static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody, Region &launchOpBody, IRMapping &map, bool hasCluster=false)
Adds operations generating block/thread ids and grid/block dimensions at the beginning of the launchF...
Definition: KernelOutlining.cpp:51

extractBeneficiaryOps
static bool extractBeneficiaryOps(Operation *op, const SetVector< Value > &existingDependencies, SetVector< Operation * > &beneficiaryOps, llvm::SmallPtrSetImpl< Value > &availableValues, llvm::function_ref< bool(Operation *)> isSinkingBeneficiary)
For a given operation op, computes whether it is beneficial to sink the operation into the kernel.
Definition: KernelOutlining.cpp:92

Matchers.h

max
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Definition: PolynomialApproximation.cpp:213

RegionUtils.h

SymbolTable.h

llvm::DenseSet
Definition: LLVM.h:59

llvm::SetVector
Definition: LLVM.h:66

llvm::SmallPtrSetImpl
Definition: LLVM.h:70

llvm::SmallPtrSet
Definition: LLVM.h:68

llvm::SmallVectorImpl
Definition: LLVM.h:74

llvm::SmallVector
Definition: LLVM.h:72

llvm::function_ref
Definition: LLVM.h:90

mlir::Attribute
Attributes are known-constant values of operations.
Definition: Attributes.h:25

mlir::Attribute::getContext
MLIRContext * getContext() const
Return the context this attribute belongs to.
Definition: Attributes.cpp:37

mlir::Block
Block represents an ordered list of Operations.
Definition: Block.h:33

mlir::Block::iterator
OpListType::iterator iterator
Definition: Block.h:140

mlir::Block::getArgument
BlockArgument getArgument(unsigned i)
Definition: Block.h:129

mlir::Block::erase
void erase()
Unlink this Block from its parent region and delete it.
Definition: Block.cpp:68

mlir::Block::getTerminator
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:246

mlir::Block::getOperations
OpListType & getOperations()
Definition: Block.h:137

mlir::Builder::getUnitAttr
UnitAttr getUnitAttr()
Definition: Builders.cpp:96

mlir::Builder::getIndexType
IndexType getIndexType()
Definition: Builders.cpp:53

mlir::IRMapping
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26

mlir::IRMapping::lookup
auto lookup(T from) const
Lookup a mapped value within the map.
Definition: IRMapping.h:72

mlir::IRMapping::map
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition: IRMapping.h:30

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpBuilder::clone
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:551

mlir::OpBuilder::setInsertionPointToStart
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:455

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Operation::clone
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
Definition: Operation.cpp:719

mlir::Operation::walk
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797

mlir::Operation::setAttr
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:582

mlir::Operation::getName
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119

mlir::Operation::getOperands
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378

mlir::Operation::replaceAllUsesWith
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272

mlir::Operation::getResults
result_range getResults()
Definition: Operation.h:415

mlir::Operation::erase
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539

mlir::Region
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26

mlir::Region::cloneInto
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition: Region.cpp:70

mlir::Region::front
Block & front()
Definition: Region.h:65

mlir::SymbolTable::SymbolUse
This class represents a specific symbol use.
Definition: SymbolTable.h:183

mlir::SymbolTable
This class allows for representing and managing the symbol table used by operations with the 'SymbolT...
Definition: SymbolTable.h:24

mlir::SymbolTable::lookup
Operation * lookup(StringRef name) const
Look up a symbol with the specified name, returning null if no such name exists.
Definition: SymbolTable.cpp:143

mlir::SymbolTable::getSymbolUses
static std::optional< UseRange > getSymbolUses(Operation *from)
Get an iterator range for all of the uses, for any symbol, that are nested within the given operation...
Definition: SymbolTable.cpp:784

mlir::TypeRange
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getContext
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition: Value.h:108

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition: Value.h:105

mlir::WalkResult::advance
static WalkResult advance()
Definition: Visitors.h:51

mlir::detail::DenseArrayAttrImpl< int32_t >

mlir::detail::DenseArrayAttrImpl< int32_t >::get
static DenseArrayAttrImpl get(MLIRContext *context, ArrayRef< int32_t > content)
Builder from ArrayRef<T>.
Definition: BuiltinAttributes.cpp:873

Arith.h

MemRef.h

BuiltinAttributes.h

LLVM.h

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::matchPattern
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:490

mlir::m_ConstantInt
detail::constant_int_value_binder m_ConstantInt(IntegerAttr::ValueType *bind_value)
Matches a constant holding a scalar/vector/tensor integer (splat) and writes the integer value to bin...
Definition: Matchers.h:527

mlir::replaceAllUsesInRegionWith
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:35

mlir::parseAttribute
Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context, Type type={}, size_t *numRead=nullptr, bool isKnownNullTerminated=false)
This parses a single MLIR attribute to an MLIR context if it was valid.

mlir::getUsedValuesDefinedAbove
void getUsedValuesDefinedAbove(Region &region, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Definition: RegionUtils.cpp:70

mlir::sinkOperationsIntoLaunchOp
LogicalResult sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp, llvm::function_ref< bool(Operation *)> isSinkingBeneficiary)
Sink operations into the launchOp to reduce the number of values that are used within the region of t...
Definition: KernelOutlining.cpp:123

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::m_Constant
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369

mlir::outlineKernelFunc
gpu::GPUFuncOp outlineKernelFunc(gpu::LaunchOp launchOp, StringRef kernelFnName, SmallVectorImpl< Value > &operands)
Get a gpu.func created from outlining the region of a gpu.launch op with the given kernelFnName.
Definition: KernelOutlining.cpp:265

mlir::gpu::KernelDim3
Utility class for the GPU dialect to represent triples of Values accessible through ....
Definition: GPUDialect.h:39

mlir::gpu::KernelDim3::y
Value y
Definition: GPUDialect.h:41

mlir::gpu::KernelDim3::z
Value z
Definition: GPUDialect.h:42

mlir::gpu::KernelDim3::x
Value x
Definition: GPUDialect.h:40