MLIR  22.0.0git
KernelOutlining.cpp
Go to the documentation of this file.
1 //===- KernelOutlining.cpp - Implementation of GPU kernel outlining -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the GPU dialect kernel outlining pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
18 #include "mlir/Dialect/DLTI/DLTI.h"
22 #include "mlir/IR/Builders.h"
24 #include "mlir/IR/IRMapping.h"
25 #include "mlir/IR/Matchers.h"
26 #include "mlir/IR/SymbolTable.h"
27 #include "mlir/Support/LLVM.h"
29 #include <limits>
30 
31 namespace mlir {
32 #define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONSPASS
33 #define GEN_PASS_DEF_GPUKERNELOUTLININGPASS
34 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
35 } // namespace mlir
36 
37 using namespace mlir;
38 
39 template <typename OpTy>
40 static void createForAllDimensions(OpBuilder &builder, Location loc,
41  SmallVectorImpl<Value> &values) {
42  for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z})
43  values.push_back(OpTy::create(builder, loc, builder.getIndexType(), dim));
44 }
45 
46 /// Adds operations generating block/thread ids and grid/block dimensions at the
47 /// beginning of the `launchFuncOpBody` region. Add mapping from argument in
48 /// entry block of `launchOpBody`, to the corresponding result value of the
49 /// added operations.
50 static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody,
51  Region &launchOpBody, IRMapping &map,
52  bool hasCluster = false) {
53  OpBuilder builder(loc->getContext());
54  Block &firstBlock = launchOpBody.front();
55  builder.setInsertionPointToStart(&launchFuncOpBody.front());
56  SmallVector<Value> indexOps;
57  // The order is important here, as it must match the order of the arguments
58  createForAllDimensions<gpu::BlockIdOp>(builder, loc, indexOps);
59  createForAllDimensions<gpu::ThreadIdOp>(builder, loc, indexOps);
60  createForAllDimensions<gpu::GridDimOp>(builder, loc, indexOps);
61  createForAllDimensions<gpu::BlockDimOp>(builder, loc, indexOps);
62  if (hasCluster) {
63  createForAllDimensions<gpu::ClusterIdOp>(builder, loc, indexOps);
64  createForAllDimensions<gpu::ClusterDimOp>(builder, loc, indexOps);
65  }
66  // Replace the leading 12 function args with the respective thread/block index
67  // operations. Iterate backwards since args are erased and indices change.
68  for (const auto &indexOp : enumerate(indexOps))
69  map.map(firstBlock.getArgument(indexOp.index()), indexOp.value());
70 }
71 
72 /// Identifies operations that are beneficial to sink into kernels. These
73 /// operations may not have side-effects, as otherwise sinking (and hence
74 /// duplicating them) is not legal.
76  return matchPattern(op, m_Constant()) ||
77  isa<memref::DimOp, arith::SelectOp, arith::CmpIOp>(op);
78 }
79 
80 /// For a given operation `op`, computes whether it is beneficial to sink the
81 /// operation into the kernel. An operation can be sunk if doing so does not
82 /// introduce new kernel arguments. Whether a value is already available in the
83 /// kernel (and hence does not introduce new arguments) is checked by
84 /// querying `existingDependencies` and `availableValues`.
85 /// If an operand is not yet available, we recursively check whether it can be
86 /// made available by siking its defining op.
87 /// Operations that are indentified for sinking are added to `beneficiaryOps` in
88 /// the order they should appear in the kernel. Furthermore, `availableValues`
89 /// is updated with results that will be available after sinking the identified
90 /// ops.
92  Operation *op, const SetVector<Value> &existingDependencies,
93  SetVector<Operation *> &beneficiaryOps,
94  llvm::SmallPtrSetImpl<Value> &availableValues,
95  llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {
96  if (beneficiaryOps.count(op))
97  return true;
98 
99  if (!isSinkingBeneficiary(op))
100  return false;
101 
102  for (Value operand : op->getOperands()) {
103  // It is already visible in the kernel, keep going.
104  if (availableValues.count(operand))
105  continue;
106  // Else check whether it can be made available via sinking or already is a
107  // dependency.
108  Operation *definingOp = operand.getDefiningOp();
109  if ((!definingOp || !extractBeneficiaryOps(definingOp, existingDependencies,
110  beneficiaryOps, availableValues,
111  isSinkingBeneficiary)) &&
112  !existingDependencies.count(operand))
113  return false;
114  }
115  // We will sink the operation, mark its results as now available.
116  beneficiaryOps.insert(op);
117  for (Value result : op->getResults())
118  availableValues.insert(result);
119  return true;
120 }
121 
123  gpu::LaunchOp launchOp,
124  llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {
125  assert(isSinkingBeneficiary);
126  Region &launchOpBody = launchOp.getBody();
127 
128  // Identify uses from values defined outside of the scope of the launch
129  // operation.
130  SetVector<Value> sinkCandidates;
131  getUsedValuesDefinedAbove(launchOpBody, sinkCandidates);
132 
133  SetVector<Operation *> toBeSunk;
134  llvm::SmallPtrSet<Value, 4> availableValues;
135  for (Value operand : sinkCandidates) {
136  Operation *operandOp = operand.getDefiningOp();
137  if (!operandOp)
138  continue;
139  extractBeneficiaryOps(operandOp, sinkCandidates, toBeSunk, availableValues,
140  isSinkingBeneficiary);
141  }
142 
143  // Insert operations so that the defs get cloned before uses.
144  IRMapping map;
145  OpBuilder builder(launchOpBody);
146  for (Operation *op : toBeSunk) {
147  Operation *clonedOp = builder.clone(*op, map);
148  // Only replace uses within the launch op.
149  for (auto pair : llvm::zip(op->getResults(), clonedOp->getResults()))
150  replaceAllUsesInRegionWith(std::get<0>(pair), std::get<1>(pair),
151  launchOp.getBody());
152  }
153  return success();
154 }
155 
156 /// Return the provided KernelDim3 as an array of i32 constants if possible.
158  SmallVector<int32_t, 3> constants;
159  MLIRContext *ctx = dims.x.getContext();
160  for (Value v : {dims.x, dims.y, dims.z}) {
161  APInt constValue;
162  if (!matchPattern(v, m_ConstantInt(&constValue)))
163  return nullptr;
164  // In the event someone called for a too-large block or grid dimension,
165  // don't set bounds as it is likely to cause more confusing behavior.
166  if (constValue.ugt(std::numeric_limits<uint32_t>::max()))
167  return nullptr;
168  constants.push_back(
169  constValue.getLimitedValue(std::numeric_limits<uint32_t>::max()));
170  }
171  return DenseI32ArrayAttr::get(ctx, constants);
172 }
173 
174 /// Outline the `gpu.launch` operation body into a kernel function. Replace
175 /// `gpu.terminator` operations by `gpu.return` in the generated function.
176 /// Set block and grid size bounds if known.
177 static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
178  StringRef kernelFnName,
179  SetVector<Value> &operands) {
180  Location loc = launchOp.getLoc();
181  // Create a builder with no insertion point, insertion will happen separately
182  // due to symbol table manipulation.
183  OpBuilder builder(launchOp.getContext());
184  Region &launchOpBody = launchOp.getBody();
185 
186  // Identify uses from values defined outside of the scope of the launch
187  // operation.
188  getUsedValuesDefinedAbove(launchOpBody, operands);
189 
190  // Create the gpu.func operation.
191  SmallVector<Type, 4> kernelOperandTypes;
192  kernelOperandTypes.reserve(operands.size());
193  for (Value operand : operands) {
194  kernelOperandTypes.push_back(operand.getType());
195  }
196  FunctionType type =
197  FunctionType::get(launchOp.getContext(), kernelOperandTypes, {});
198  auto outlinedFunc = gpu::GPUFuncOp::create(
199  builder, loc, kernelFnName, type,
200  TypeRange(ValueRange(launchOp.getWorkgroupAttributions())),
201  TypeRange(ValueRange(launchOp.getPrivateAttributions())));
202  outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
203  builder.getUnitAttr());
204 
205  // If we can infer bounds on the grid and/or block sizes from the arguments
206  // to the launch op, propagate them to the generated kernel. This is safe
207  // because multiple launches with the same body are not deduplicated.
208  if (auto blockBounds =
209  maybeConstantDimsAttr(launchOp.getBlockSizeOperandValues()))
210  outlinedFunc.setKnownBlockSizeAttr(blockBounds);
211  if (auto gridBounds =
212  maybeConstantDimsAttr(launchOp.getGridSizeOperandValues()))
213  outlinedFunc.setKnownGridSizeAttr(gridBounds);
214 
215  IRMapping map;
216 
217  // Map the arguments corresponding to the launch parameters like blockIdx,
218  // threadIdx, etc. If cluster is present, then we also generate clusterIdx and
219  // clusterDim.
220  Region &outlinedFuncBody = outlinedFunc.getBody();
221  injectGpuIndexOperations(loc, outlinedFuncBody, launchOpBody, map,
222  launchOp.hasClusterSize());
223 
224  // Map memory attributions from the LaunOp op to the GPUFuncOp attributions.
225  for (const auto &[launchArg, funcArg] :
226  llvm::zip(launchOp.getWorkgroupAttributions(),
227  outlinedFunc.getWorkgroupAttributions()))
228  map.map(launchArg, funcArg);
229  for (const auto &[launchArg, funcArg] :
230  llvm::zip(launchOp.getPrivateAttributions(),
231  outlinedFunc.getPrivateAttributions()))
232  map.map(launchArg, funcArg);
233 
234  // Map arguments from gpu.launch region to the arguments of the gpu.func
235  // operation.
236  Block &entryBlock = outlinedFuncBody.front();
237  for (const auto &operand : enumerate(operands))
238  map.map(operand.value(), entryBlock.getArgument(operand.index()));
239 
240  // Clone the region of the gpu.launch operation into the gpu.func operation.
241  launchOpBody.cloneInto(&outlinedFuncBody, map);
242 
243  // Replace the terminator op with returns.
244  for (Block &block : launchOpBody) {
245  Block *clonedBlock = map.lookup(&block);
246  auto terminator = dyn_cast<gpu::TerminatorOp>(clonedBlock->getTerminator());
247  if (!terminator)
248  continue;
249  OpBuilder replacer(terminator);
250  gpu::ReturnOp::create(replacer, terminator->getLoc());
251  terminator->erase();
252  }
253 
254  // Splice now the entry block of the gpu.launch operation at the end of the
255  // gpu.func entry block and erase the redundant block.
256  Block *clonedLaunchOpEntry = map.lookup(&launchOpBody.front());
257  entryBlock.getOperations().splice(entryBlock.getOperations().end(),
258  clonedLaunchOpEntry->getOperations());
259  clonedLaunchOpEntry->erase();
260 
261  return outlinedFunc;
262 }
263 
264 gpu::GPUFuncOp mlir::outlineKernelFunc(gpu::LaunchOp launchOp,
265  StringRef kernelFnName,
266  llvm::SmallVectorImpl<Value> &operands) {
267  DenseSet<Value> inputOperandSet;
268  inputOperandSet.insert_range(operands);
269  SetVector<Value> operandSet(llvm::from_range, operands);
270  auto funcOp = outlineKernelFuncImpl(launchOp, kernelFnName, operandSet);
271  for (auto operand : operandSet) {
272  if (!inputOperandSet.count(operand))
273  operands.push_back(operand);
274  }
275  return funcOp;
276 }
277 
278 /// Replace `gpu.launch` operations with an `gpu.launch_func` operation
279 /// launching `kernelFunc`. The kernel func contains the body of the
280 /// `gpu.launch` with constant region arguments inlined.
281 static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
282  gpu::GPUFuncOp kernelFunc,
283  ValueRange operands) {
284  OpBuilder builder(launchOp);
285  // The launch op has an optional dynamic shared memory size. If it doesn't
286  // exist, we use zero.
287  Value asyncToken = launchOp.getAsyncToken();
288  std::optional<gpu::KernelDim3> clusterSize =
289  launchOp.getClusterSizeOperandValues();
290  auto launchFunc = gpu::LaunchFuncOp::create(
291  builder, launchOp.getLoc(), kernelFunc,
292  launchOp.getGridSizeOperandValues(), launchOp.getBlockSizeOperandValues(),
293  launchOp.getDynamicSharedMemorySize(), operands,
294  asyncToken ? asyncToken.getType() : nullptr,
295  launchOp.getAsyncDependencies(), clusterSize);
296  launchOp.replaceAllUsesWith(launchFunc);
297  launchOp.erase();
298 }
299 
300 namespace {
301 /// Pass that moves ops which are likely an index computation into gpu.launch
302 /// body.
303 class GpuLaunchSinkIndexComputationsPass
304  : public impl::GpuLaunchSinkIndexComputationsPassBase<
305  GpuLaunchSinkIndexComputationsPass> {
306 public:
307  void runOnOperation() override {
308  Operation *op = getOperation();
309  if (op->walk([](gpu::LaunchOp launch) {
310  // Pull in instructions that can be sunk
311  if (failed(sinkOperationsIntoLaunchOp(launch,
312  isLikelyAnIndexComputation)))
313  return WalkResult::interrupt();
314 
315  return WalkResult::advance();
316  }).wasInterrupted())
317  signalPassFailure();
318  }
319 };
320 
321 /// Pass that moves the kernel of each LaunchOp into its separate nested module.
322 ///
323 /// This pass moves the kernel code of each LaunchOp into a function created
324 /// inside a nested module. It also creates an external function of the same
325 /// name in the parent module.
326 ///
327 /// The gpu.modules are intended to be compiled to a cubin blob independently in
328 /// a separate pass. The external functions can then be annotated with the
329 /// symbol of the cubin accessor function.
330 class GpuKernelOutliningPass
331  : public impl::GpuKernelOutliningPassBase<GpuKernelOutliningPass> {
332 public:
333  using Base::Base;
334 
335  LogicalResult initialize(MLIRContext *context) override {
336  // Initialize the data layout specification from the data layout string.
337  if (!dataLayoutStr.empty()) {
338  Attribute resultAttr = mlir::parseAttribute(dataLayoutStr, context);
339  if (!resultAttr)
340  return failure();
341 
342  dataLayoutSpec = dyn_cast<DataLayoutSpecInterface>(resultAttr);
343  if (!dataLayoutSpec)
344  return failure();
345  }
346 
347  return success();
348  }
349 
350  void runOnOperation() override {
351  SymbolTable symbolTable(getOperation());
352  bool modified = false;
353  for (auto func : getOperation().getOps<SymbolOpInterface>()) {
354  // Insert just after the function.
355  Block::iterator insertPt(func->getNextNode());
356  auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
357  SetVector<Value> operands;
358  std::string kernelFnName;
359  if (op.getFunction()) {
360  kernelFnName = op.getFunction()->str();
361  } else {
362  kernelFnName =
363  Twine(op->getParentOfType<SymbolOpInterface>().getName(),
364  "_kernel")
365  .str();
366  }
367 
368  gpu::GPUFuncOp outlinedFunc =
369  outlineKernelFuncImpl(op, kernelFnName, operands);
370 
371  // Create nested module and insert outlinedFunc. The module will
372  // originally get the same name as the function, but may be renamed on
373  // insertion into the parent module.
374  auto kernelModule = createKernelModule(op, outlinedFunc, symbolTable);
375  symbolTable.insert(kernelModule, insertPt);
376 
377  // Potentially changes signature, pulling in constants.
378  convertToLaunchFuncOp(op, outlinedFunc, operands.getArrayRef());
379  modified = true;
380  return WalkResult::advance();
381  });
382  if (funcWalkResult.wasInterrupted())
383  return signalPassFailure();
384  }
385 
386  // If any new module was inserted in this module, annotate this module as
387  // a container module.
388  if (modified)
389  getOperation()->setAttr(gpu::GPUDialect::getContainerModuleAttrName(),
391  }
392 
393 private:
394  /// Returns a gpu.module containing kernelFunc and all callees (recursive).
395  gpu::GPUModuleOp createKernelModule(gpu::LaunchOp gpuLaunchOp,
396  gpu::GPUFuncOp kernelFunc,
397  const SymbolTable &parentSymbolTable) {
398  // TODO: This code cannot use an OpBuilder because it must be inserted into
399  // a SymbolTable by the caller. SymbolTable needs to be refactored to
400  // prevent manual building of Ops with symbols in code using SymbolTables
401  // and then this needs to use the OpBuilder.
402  auto *context = getOperation().getContext();
403  OpBuilder builder(context);
404  std::string kernelModuleName;
405  gpu::GPUModuleOp kernelModule;
406  if (gpuLaunchOp.getModule()) {
407  kernelModuleName = gpuLaunchOp.getModule()->str();
408  kernelModule =
409  parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName);
410  } else {
411  kernelModuleName = kernelFunc.getName();
412  }
413 
414  // Check if the module already exists in the symbol table
415  if (!kernelModule) {
416  // If not found, create a new GPU module
417  kernelModule = gpu::GPUModuleOp::create(builder, kernelFunc.getLoc(),
418  kernelModuleName);
419  }
420 
421  // If a valid data layout spec was provided, attach it to the kernel module.
422  // Otherwise, the default data layout will be used.
423  if (dataLayoutSpec)
424  kernelModule->setAttr(DLTIDialect::kDataLayoutAttrName, dataLayoutSpec);
425 
426  SymbolTable symbolTable(kernelModule);
427  symbolTable.insert(kernelFunc);
428 
429  SmallVector<Operation *, 8> symbolDefWorklist = {kernelFunc};
430  while (!symbolDefWorklist.empty()) {
431  if (std::optional<SymbolTable::UseRange> symbolUses =
432  SymbolTable::getSymbolUses(symbolDefWorklist.pop_back_val())) {
433  for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
434  StringAttr symbolName = symbolUse.getSymbolRef().getLeafReference();
435  if (symbolTable.lookup(symbolName))
436  continue;
437 
438  Operation *symbolDefClone =
439  parentSymbolTable.lookup(symbolName)->clone();
440  symbolDefWorklist.push_back(symbolDefClone);
441  symbolTable.insert(symbolDefClone);
442  }
443  }
444  }
445 
446  return kernelModule;
447  }
448 
449  DataLayoutSpecInterface dataLayoutSpec;
450 };
451 
452 } // namespace
static MLIRContext * getContext(OpFoldResult val)
static DenseI32ArrayAttr maybeConstantDimsAttr(gpu::KernelDim3 dims)
Return the provided KernelDim3 as an array of i32 constants if possible.
static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, StringRef kernelFnName, SetVector< Value > &operands)
Outline the gpu.launch operation body into a kernel function.
static bool isLikelyAnIndexComputation(Operation *op)
Identifies operations that are beneficial to sink into kernels.
static void convertToLaunchFuncOp(gpu::LaunchOp launchOp, gpu::GPUFuncOp kernelFunc, ValueRange operands)
Replace gpu.launch operations with an gpu.launch_func operation launching kernelFunc.
static void createForAllDimensions(OpBuilder &builder, Location loc, SmallVectorImpl< Value > &values)
static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody, Region &launchOpBody, IRMapping &map, bool hasCluster=false)
Adds operations generating block/thread ids and grid/block dimensions at the beginning of the launchF...
static bool extractBeneficiaryOps(Operation *op, const SetVector< Value > &existingDependencies, SetVector< Operation * > &beneficiaryOps, llvm::SmallPtrSetImpl< Value > &availableValues, llvm::function_ref< bool(Operation *)> isSinkingBeneficiary)
For a given operation op, computes whether it is beneficial to sink the operation into the kernel.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
MLIRContext * getContext() const
Return the context this attribute belongs to.
Definition: Attributes.cpp:37
Block represents an ordered list of Operations.
Definition: Block.h:33
OpListType::iterator iterator
Definition: Block.h:140
BlockArgument getArgument(unsigned i)
Definition: Block.h:129
void erase()
Unlink this Block from its parent region and delete it.
Definition: Block.cpp:66
Operation * getTerminator()
Get the terminator operation of this block.
Definition: Block.cpp:244
OpListType & getOperations()
Definition: Block.h:137
UnitAttr getUnitAttr()
Definition: Builders.cpp:93
IndexType getIndexType()
Definition: Builders.cpp:50
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
auto lookup(T from) const
Lookup a mapped value within the map.
Definition: IRMapping.h:72
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition: IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
This class helps build Operations.
Definition: Builders.h:205
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:548
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:429
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
Definition: Operation.cpp:718
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
OperationName getName()
The name of an operation is the key identifier for it.
Definition: Operation.h:119
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:378
result_range getResults()
Definition: Operation.h:415
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition: Region.cpp:70
Block & front()
Definition: Region.h:65
This class represents a specific symbol use.
Definition: SymbolTable.h:183
This class allows for representing and managing the symbol table used by operations with the 'SymbolT...
Definition: SymbolTable.h:24
Operation * lookup(StringRef name) const
Look up a symbol with the specified name, returning null if no such name exists.
static std::optional< UseRange > getSymbolUses(Operation *from)
Get an iterator range for all of the uses, for any symbol, that are nested within the given operation...
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition: Value.h:108
Type getType() const
Return the type of this value.
Definition: Value.h:105
static WalkResult advance()
Definition: WalkResult.h:47
static DenseArrayAttrImpl get(MLIRContext *context, ArrayRef< int32_t > content)
Builder from ArrayRef<T>.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:490
detail::constant_int_value_binder m_ConstantInt(IntegerAttr::ValueType *bind_value)
Matches a constant holding a scalar/vector/tensor integer (splat) and writes the integer value to bin...
Definition: Matchers.h:527
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:35
Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context, Type type={}, size_t *numRead=nullptr, bool isKnownNullTerminated=false)
This parses a single MLIR attribute to an MLIR context if it was valid.
void getUsedValuesDefinedAbove(Region &region, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Definition: RegionUtils.cpp:70
LogicalResult sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp, llvm::function_ref< bool(Operation *)> isSinkingBeneficiary)
Sink operations into the launchOp to reduce the number of values that are used within the region of t...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:369
gpu::GPUFuncOp outlineKernelFunc(gpu::LaunchOp launchOp, StringRef kernelFnName, SmallVectorImpl< Value > &operands)
Get a gpu.func created from outlining the region of a gpu.launch op with the given kernelFnName.
Utility class for the GPU dialect to represent triples of Values accessible through ....
Definition: GPUDialect.h:39