MLIR  19.0.0git
KernelOutlining.cpp
Go to the documentation of this file.
1 //===- KernelOutlining.cpp - Implementation of GPU kernel outlining -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the GPU dialect kernel outlining pass.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 
18 #include "mlir/Dialect/DLTI/DLTI.h"
23 #include "mlir/IR/Builders.h"
25 #include "mlir/IR/IRMapping.h"
26 #include "mlir/IR/Matchers.h"
27 #include "mlir/IR/SymbolTable.h"
28 #include "mlir/Support/LLVM.h"
30 #include <limits>
31 
32 namespace mlir {
33 #define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONS
34 #define GEN_PASS_DEF_GPUKERNELOUTLINING
35 #include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
36 } // namespace mlir
37 
38 using namespace mlir;
39 
40 template <typename OpTy>
41 static void createForAllDimensions(OpBuilder &builder, Location loc,
42  SmallVectorImpl<Value> &values) {
43  for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z})
44  values.push_back(builder.create<OpTy>(loc, builder.getIndexType(), dim));
45 }
46 
47 /// Adds operations generating block/thread ids and grid/block dimensions at the
48 /// beginning of the `launchFuncOpBody` region. Add mapping from argument in
49 /// entry block of `launchOpBody`, to the corresponding result value of the
50 /// added operations.
51 static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody,
52  Region &launchOpBody, IRMapping &map,
53  bool hasCluster = false) {
54  OpBuilder builder(loc->getContext());
55  Block &firstBlock = launchOpBody.front();
56  builder.setInsertionPointToStart(&launchFuncOpBody.front());
57  SmallVector<Value> indexOps;
58  // The order is important here, as it must match the order of the arguments
59  createForAllDimensions<gpu::BlockIdOp>(builder, loc, indexOps);
60  createForAllDimensions<gpu::ThreadIdOp>(builder, loc, indexOps);
61  createForAllDimensions<gpu::GridDimOp>(builder, loc, indexOps);
62  createForAllDimensions<gpu::BlockDimOp>(builder, loc, indexOps);
63  if (hasCluster) {
64  createForAllDimensions<gpu::ClusterIdOp>(builder, loc, indexOps);
65  createForAllDimensions<gpu::ClusterDimOp>(builder, loc, indexOps);
66  }
67  // Replace the leading 12 function args with the respective thread/block index
68  // operations. Iterate backwards since args are erased and indices change.
69  for (const auto &indexOp : enumerate(indexOps))
70  map.map(firstBlock.getArgument(indexOp.index()), indexOp.value());
71 }
72 
73 /// Identifies operations that are beneficial to sink into kernels. These
74 /// operations may not have side-effects, as otherwise sinking (and hence
75 /// duplicating them) is not legal.
77  return matchPattern(op, m_Constant()) ||
78  isa<memref::DimOp, arith::SelectOp, arith::CmpIOp>(op);
79 }
80 
81 /// For a given operation `op`, computes whether it is beneficial to sink the
82 /// operation into the kernel. An operation can be sunk if doing so does not
83 /// introduce new kernel arguments. Whether a value is already available in the
84 /// kernel (and hence does not introduce new arguments) is checked by
85 /// querying `existingDependencies` and `availableValues`.
86 /// If an operand is not yet available, we recursively check whether it can be
87 /// made available by siking its defining op.
88 /// Operations that are indentified for sinking are added to `beneficiaryOps` in
89 /// the order they should appear in the kernel. Furthermore, `availableValues`
90 /// is updated with results that will be available after sinking the identified
91 /// ops.
93  Operation *op, const SetVector<Value> &existingDependencies,
94  SetVector<Operation *> &beneficiaryOps,
95  llvm::SmallPtrSetImpl<Value> &availableValues,
96  llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {
97  if (beneficiaryOps.count(op))
98  return true;
99 
100  if (!isSinkingBeneficiary(op))
101  return false;
102 
103  for (Value operand : op->getOperands()) {
104  // It is already visible in the kernel, keep going.
105  if (availableValues.count(operand))
106  continue;
107  // Else check whether it can be made available via sinking or already is a
108  // dependency.
109  Operation *definingOp = operand.getDefiningOp();
110  if ((!definingOp || !extractBeneficiaryOps(definingOp, existingDependencies,
111  beneficiaryOps, availableValues,
112  isSinkingBeneficiary)) &&
113  !existingDependencies.count(operand))
114  return false;
115  }
116  // We will sink the operation, mark its results as now available.
117  beneficiaryOps.insert(op);
118  for (Value result : op->getResults())
119  availableValues.insert(result);
120  return true;
121 }
122 
124  gpu::LaunchOp launchOp,
125  llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {
126  assert(isSinkingBeneficiary);
127  Region &launchOpBody = launchOp.getBody();
128 
129  // Identify uses from values defined outside of the scope of the launch
130  // operation.
131  SetVector<Value> sinkCandidates;
132  getUsedValuesDefinedAbove(launchOpBody, sinkCandidates);
133 
134  SetVector<Operation *> toBeSunk;
135  llvm::SmallPtrSet<Value, 4> availableValues;
136  for (Value operand : sinkCandidates) {
137  Operation *operandOp = operand.getDefiningOp();
138  if (!operandOp)
139  continue;
140  extractBeneficiaryOps(operandOp, sinkCandidates, toBeSunk, availableValues,
141  isSinkingBeneficiary);
142  }
143 
144  // Insert operations so that the defs get cloned before uses.
145  IRMapping map;
146  OpBuilder builder(launchOpBody);
147  for (Operation *op : toBeSunk) {
148  Operation *clonedOp = builder.clone(*op, map);
149  // Only replace uses within the launch op.
150  for (auto pair : llvm::zip(op->getResults(), clonedOp->getResults()))
151  replaceAllUsesInRegionWith(std::get<0>(pair), std::get<1>(pair),
152  launchOp.getBody());
153  }
154  return success();
155 }
156 
157 /// Return the provided KernelDim3 as an array of i32 constants if possible.
159  SmallVector<int32_t, 3> constants;
160  MLIRContext *ctx = dims.x.getContext();
161  for (Value v : {dims.x, dims.y, dims.z}) {
162  APInt constValue;
163  if (!matchPattern(v, m_ConstantInt(&constValue)))
164  return nullptr;
165  // In the event someone called for a too-large block or grid dimension,
166  // don't set bounds as it is likely to cause more confusing behavior.
167  if (constValue.ugt(std::numeric_limits<uint32_t>::max()))
168  return nullptr;
169  constants.push_back(
170  constValue.getLimitedValue(std::numeric_limits<uint32_t>::max()));
171  }
172  return DenseI32ArrayAttr::get(ctx, constants);
173 }
174 
175 /// Outline the `gpu.launch` operation body into a kernel function. Replace
176 /// `gpu.terminator` operations by `gpu.return` in the generated function.
177 /// Set block and grid size bounds if known.
178 static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
179  StringRef kernelFnName,
180  SetVector<Value> &operands) {
181  Location loc = launchOp.getLoc();
182  // Create a builder with no insertion point, insertion will happen separately
183  // due to symbol table manipulation.
184  OpBuilder builder(launchOp.getContext());
185  Region &launchOpBody = launchOp.getBody();
186 
187  // Identify uses from values defined outside of the scope of the launch
188  // operation.
189  getUsedValuesDefinedAbove(launchOpBody, operands);
190 
191  // Create the gpu.func operation.
192  SmallVector<Type, 4> kernelOperandTypes;
193  kernelOperandTypes.reserve(operands.size());
194  for (Value operand : operands) {
195  kernelOperandTypes.push_back(operand.getType());
196  }
197  FunctionType type =
198  FunctionType::get(launchOp.getContext(), kernelOperandTypes, {});
199  auto outlinedFunc = builder.create<gpu::GPUFuncOp>(
200  loc, kernelFnName, type,
201  TypeRange(ValueRange(launchOp.getWorkgroupAttributions())),
202  TypeRange(ValueRange(launchOp.getPrivateAttributions())));
203  outlinedFunc->setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
204  builder.getUnitAttr());
205 
206  // If we can infer bounds on the grid and/or block sizes from the arguments
207  // to the launch op, propagate them to the generated kernel. This is safe
208  // because multiple launches with the same body are not deduplicated.
209  if (auto blockBounds =
210  maybeConstantDimsAttr(launchOp.getBlockSizeOperandValues()))
211  outlinedFunc->setAttr(gpu::GPUFuncOp::getKnownBlockSizeAttrName(),
212  blockBounds);
213  if (auto gridBounds =
214  maybeConstantDimsAttr(launchOp.getGridSizeOperandValues()))
215  outlinedFunc->setAttr(gpu::GPUFuncOp::getKnownGridSizeAttrName(),
216  gridBounds);
217 
218  IRMapping map;
219 
220  // Map the arguments corresponding to the launch parameters like blockIdx,
221  // threadIdx, etc. If cluster is present, then we also generate clusterIdx and
222  // clusterDim.
223  Region &outlinedFuncBody = outlinedFunc.getBody();
224  injectGpuIndexOperations(loc, outlinedFuncBody, launchOpBody, map,
225  launchOp.hasClusterSize());
226 
227  // Map memory attributions from the LaunOp op to the GPUFuncOp attributions.
228  for (const auto &[launchArg, funcArg] :
229  llvm::zip(launchOp.getWorkgroupAttributions(),
230  outlinedFunc.getWorkgroupAttributions()))
231  map.map(launchArg, funcArg);
232  for (const auto &[launchArg, funcArg] :
233  llvm::zip(launchOp.getPrivateAttributions(),
234  outlinedFunc.getPrivateAttributions()))
235  map.map(launchArg, funcArg);
236 
237  // Map arguments from gpu.launch region to the arguments of the gpu.func
238  // operation.
239  Block &entryBlock = outlinedFuncBody.front();
240  for (const auto &operand : enumerate(operands))
241  map.map(operand.value(), entryBlock.getArgument(operand.index()));
242 
243  // Clone the region of the gpu.launch operation into the gpu.func operation.
244  // TODO: If cloneInto can be modified such that if a mapping for
245  // a block exists, that block will be used to clone operations into (at the
246  // end of the block), instead of creating a new block, this would be much
247  // cleaner.
248  launchOpBody.cloneInto(&outlinedFuncBody, map);
249 
250  // Branch from entry of the gpu.func operation to the block that is cloned
251  // from the entry block of the gpu.launch operation.
252  Block &launchOpEntry = launchOpBody.front();
253  Block *clonedLaunchOpEntry = map.lookup(&launchOpEntry);
254  builder.setInsertionPointToEnd(&entryBlock);
255  builder.create<cf::BranchOp>(loc, clonedLaunchOpEntry);
256 
257  outlinedFunc.walk([](gpu::TerminatorOp op) {
258  OpBuilder replacer(op);
259  replacer.create<gpu::ReturnOp>(op.getLoc());
260  op.erase();
261  });
262  return outlinedFunc;
263 }
264 
265 gpu::GPUFuncOp mlir::outlineKernelFunc(gpu::LaunchOp launchOp,
266  StringRef kernelFnName,
267  llvm::SmallVectorImpl<Value> &operands) {
268  DenseSet<Value> inputOperandSet;
269  inputOperandSet.insert(operands.begin(), operands.end());
270  SetVector<Value> operandSet(operands.begin(), operands.end());
271  auto funcOp = outlineKernelFuncImpl(launchOp, kernelFnName, operandSet);
272  for (auto operand : operandSet) {
273  if (!inputOperandSet.count(operand))
274  operands.push_back(operand);
275  }
276  return funcOp;
277 }
278 
279 /// Replace `gpu.launch` operations with an `gpu.launch_func` operation
280 /// launching `kernelFunc`. The kernel func contains the body of the
281 /// `gpu.launch` with constant region arguments inlined.
282 static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
283  gpu::GPUFuncOp kernelFunc,
284  ValueRange operands) {
285  OpBuilder builder(launchOp);
286  // The launch op has an optional dynamic shared memory size. If it doesn't
287  // exist, we use zero.
288  Value asyncToken = launchOp.getAsyncToken();
289  std::optional<gpu::KernelDim3> clusterSize =
290  launchOp.getClusterSizeOperandValues();
291  auto launchFunc = builder.create<gpu::LaunchFuncOp>(
292  launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
293  launchOp.getBlockSizeOperandValues(),
294  launchOp.getDynamicSharedMemorySize(), operands,
295  asyncToken ? asyncToken.getType() : nullptr,
296  launchOp.getAsyncDependencies(), clusterSize);
297  launchOp.replaceAllUsesWith(launchFunc);
298  launchOp.erase();
299 }
300 
301 namespace {
302 /// Pass that moves ops which are likely an index computation into gpu.launch
303 /// body.
304 class GpuLaunchSinkIndexComputationsPass
305  : public impl::GpuLaunchSinkIndexComputationsBase<
306  GpuLaunchSinkIndexComputationsPass> {
307 public:
308  void runOnOperation() override {
309  Operation *op = getOperation();
310  if (op->walk([](gpu::LaunchOp launch) {
311  // Pull in instructions that can be sunk
312  if (failed(sinkOperationsIntoLaunchOp(launch,
313  isLikelyAnIndexComputation)))
314  return WalkResult::interrupt();
315 
316  return WalkResult::advance();
317  }).wasInterrupted())
318  signalPassFailure();
319  }
320 };
321 
322 /// Pass that moves the kernel of each LaunchOp into its separate nested module.
323 ///
324 /// This pass moves the kernel code of each LaunchOp into a function created
325 /// inside a nested module. It also creates an external function of the same
326 /// name in the parent module.
327 ///
328 /// The gpu.modules are intended to be compiled to a cubin blob independently in
329 /// a separate pass. The external functions can then be annotated with the
330 /// symbol of the cubin accessor function.
331 class GpuKernelOutliningPass
332  : public impl::GpuKernelOutliningBase<GpuKernelOutliningPass> {
333 public:
334  GpuKernelOutliningPass(StringRef dlStr) {
335  if (!dlStr.empty() && !dataLayoutStr.hasValue())
336  dataLayoutStr = dlStr.str();
337  }
338 
339  GpuKernelOutliningPass(const GpuKernelOutliningPass &other)
340  : GpuKernelOutliningBase(other), dataLayoutSpec(other.dataLayoutSpec) {
341  dataLayoutStr = other.dataLayoutStr.getValue();
342  }
343 
344  LogicalResult initialize(MLIRContext *context) override {
345  // Initialize the data layout specification from the data layout string.
346  if (!dataLayoutStr.empty()) {
347  Attribute resultAttr = mlir::parseAttribute(dataLayoutStr, context);
348  if (!resultAttr)
349  return failure();
350 
351  dataLayoutSpec = dyn_cast<DataLayoutSpecInterface>(resultAttr);
352  if (!dataLayoutSpec)
353  return failure();
354  }
355 
356  return success();
357  }
358 
359  void runOnOperation() override {
360  SymbolTable symbolTable(getOperation());
361  bool modified = false;
362  for (auto func : getOperation().getOps<SymbolOpInterface>()) {
363  // Insert just after the function.
364  Block::iterator insertPt(func->getNextNode());
365  auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
366  SetVector<Value> operands;
367  std::string kernelFnName =
368  Twine(op->getParentOfType<SymbolOpInterface>().getName(), "_kernel")
369  .str();
370 
371  gpu::GPUFuncOp outlinedFunc =
372  outlineKernelFuncImpl(op, kernelFnName, operands);
373 
374  // Create nested module and insert outlinedFunc. The module will
375  // originally get the same name as the function, but may be renamed on
376  // insertion into the parent module.
377  auto kernelModule = createKernelModule(outlinedFunc, symbolTable);
378  symbolTable.insert(kernelModule, insertPt);
379 
380  // Potentially changes signature, pulling in constants.
381  convertToLaunchFuncOp(op, outlinedFunc, operands.getArrayRef());
382  modified = true;
383  return WalkResult::advance();
384  });
385  if (funcWalkResult.wasInterrupted())
386  return signalPassFailure();
387  }
388 
389  // If any new module was inserted in this module, annotate this module as
390  // a container module.
391  if (modified)
392  getOperation()->setAttr(gpu::GPUDialect::getContainerModuleAttrName(),
394  }
395 
396 private:
397  /// Returns a gpu.module containing kernelFunc and all callees (recursive).
398  gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
399  const SymbolTable &parentSymbolTable) {
400  // TODO: This code cannot use an OpBuilder because it must be inserted into
401  // a SymbolTable by the caller. SymbolTable needs to be refactored to
402  // prevent manual building of Ops with symbols in code using SymbolTables
403  // and then this needs to use the OpBuilder.
404  auto *context = getOperation().getContext();
405  OpBuilder builder(context);
406  auto kernelModule = builder.create<gpu::GPUModuleOp>(kernelFunc.getLoc(),
407  kernelFunc.getName());
408 
409  // If a valid data layout spec was provided, attach it to the kernel module.
410  // Otherwise, the default data layout will be used.
411  if (dataLayoutSpec)
412  kernelModule->setAttr(DLTIDialect::kDataLayoutAttrName, dataLayoutSpec);
413 
414  SymbolTable symbolTable(kernelModule);
415  symbolTable.insert(kernelFunc);
416 
417  SmallVector<Operation *, 8> symbolDefWorklist = {kernelFunc};
418  while (!symbolDefWorklist.empty()) {
419  if (std::optional<SymbolTable::UseRange> symbolUses =
420  SymbolTable::getSymbolUses(symbolDefWorklist.pop_back_val())) {
421  for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
422  StringRef symbolName =
423  cast<FlatSymbolRefAttr>(symbolUse.getSymbolRef()).getValue();
424  if (symbolTable.lookup(symbolName))
425  continue;
426 
427  Operation *symbolDefClone =
428  parentSymbolTable.lookup(symbolName)->clone();
429  symbolDefWorklist.push_back(symbolDefClone);
430  symbolTable.insert(symbolDefClone);
431  }
432  }
433  }
434 
435  return kernelModule;
436  }
437 
438  Option<std::string> dataLayoutStr{
439  *this, "data-layout-str",
440  llvm::cl::desc("String containing the data layout specification to be "
441  "attached to the GPU kernel module")};
442 
443  DataLayoutSpecInterface dataLayoutSpec;
444 };
445 
446 } // namespace
447 
449  return std::make_unique<GpuLaunchSinkIndexComputationsPass>();
450 }
451 
452 std::unique_ptr<OperationPass<ModuleOp>>
453 mlir::createGpuKernelOutliningPass(StringRef dataLayoutStr) {
454  return std::make_unique<GpuKernelOutliningPass>(dataLayoutStr);
455 }
static MLIRContext * getContext(OpFoldResult val)
static DenseI32ArrayAttr maybeConstantDimsAttr(gpu::KernelDim3 dims)
Return the provided KernelDim3 as an array of i32 constants if possible.
static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, StringRef kernelFnName, SetVector< Value > &operands)
Outline the gpu.launch operation body into a kernel function.
static bool isLikelyAnIndexComputation(Operation *op)
Identifies operations that are beneficial to sink into kernels.
static void convertToLaunchFuncOp(gpu::LaunchOp launchOp, gpu::GPUFuncOp kernelFunc, ValueRange operands)
Replace gpu.launch operations with an gpu.launch_func operation launching kernelFunc.
static void createForAllDimensions(OpBuilder &builder, Location loc, SmallVectorImpl< Value > &values)
static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody, Region &launchOpBody, IRMapping &map, bool hasCluster=false)
Adds operations generating block/thread ids and grid/block dimensions at the beginning of the launchF...
static bool extractBeneficiaryOps(Operation *op, const SetVector< Value > &existingDependencies, SetVector< Operation * > &beneficiaryOps, llvm::SmallPtrSetImpl< Value > &availableValues, llvm::function_ref< bool(Operation *)> isSinkingBeneficiary)
For a given operation op, computes whether it is beneficial to sink the operation into the kernel.
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
Definition: Attributes.h:25
MLIRContext * getContext() const
Return the context this attribute belongs to.
Definition: Attributes.cpp:37
Block represents an ordered list of Operations.
Definition: Block.h:30
OpListType::iterator iterator
Definition: Block.h:137
BlockArgument getArgument(unsigned i)
Definition: Block.h:126
UnitAttr getUnitAttr()
Definition: Builders.cpp:114
IndexType getIndexType()
Definition: Builders.cpp:71
This is a utility class for mapping one set of IR entities to another.
Definition: IRMapping.h:26
auto lookup(T from) const
Lookup a mapped value within the map.
Definition: IRMapping.h:72
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition: IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
This class helps build Operations.
Definition: Builders.h:209
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:555
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:433
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:438
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:464
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
Definition: Operation.cpp:717
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
Location getLoc()
The source location the operation was defined or derived from.
Definition: Operation.h:223
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:577
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition: Operation.h:373
void replaceAllUsesWith(ValuesT &&values)
Replace all uses of results of this operation with the provided 'values'.
Definition: Operation.h:272
result_range getResults()
Definition: Operation.h:410
void erase()
Remove this operation from its parent block and delete it.
Definition: Operation.cpp:539
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition: Region.cpp:70
Block & front()
Definition: Region.h:65
This class represents a specific symbol use.
Definition: SymbolTable.h:183
This class allows for representing and managing the symbol table used by operations with the 'SymbolT...
Definition: SymbolTable.h:24
Operation * lookup(StringRef name) const
Look up a symbol with the specified name, returning null if no such name exists.
static std::optional< UseRange > getSymbolUses(Operation *from)
Get an iterator range for all of the uses, for any symbol, that are nested within the given operation...
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition: Value.h:132
Type getType() const
Return the type of this value.
Definition: Value.h:129
static WalkResult advance()
Definition: Visitors.h:52
static DenseArrayAttrImpl get(MLIRContext *context, ArrayRef< int32_t > content)
Builder from ArrayRef<T>.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:285
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition: Matchers.h:401
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
detail::constant_int_value_binder m_ConstantInt(IntegerAttr::ValueType *bind_value)
Matches a constant holding a scalar/vector/tensor integer (splat) and writes the integer value to bin...
Definition: Matchers.h:438
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
Definition: RegionUtils.cpp:28
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
std::unique_ptr< Pass > createGpuLauchSinkIndexComputationsPass()
Pass that moves ops which are likely an index computation into gpu.launch body.
Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context, Type type={}, size_t *numRead=nullptr, bool isKnownNullTerminated=false)
This parses a single MLIR attribute to an MLIR context if it was valid.
void getUsedValuesDefinedAbove(Region &region, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Definition: RegionUtils.cpp:63
LogicalResult sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp, llvm::function_ref< bool(Operation *)> isSinkingBeneficiary)
Sink operations into the launchOp to reduce the number of values that are used within the region of t...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition: Matchers.h:310
std::unique_ptr< OperationPass< ModuleOp > > createGpuKernelOutliningPass(StringRef dataLayoutStr=StringRef())
Replaces gpu.launch with gpu.launch_func by moving the region into a separate kernel function.
gpu::GPUFuncOp outlineKernelFunc(gpu::LaunchOp launchOp, StringRef kernelFnName, SmallVectorImpl< Value > &operands)
Get a gpu.func created from outlining the region of a gpu.launch op with the given kernelFnName.
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
Utility class for the GPU dialect to represent triples of Values accessible through ....
Definition: GPUDialect.h:38