MLIR 23.0.0git
KernelOutlining.cpp
Go to the documentation of this file.
1//===- KernelOutlining.cpp - Implementation of GPU kernel outlining -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the GPU dialect kernel outlining pass.
10//
11//===----------------------------------------------------------------------===//
12
14
22#include "mlir/IR/Builders.h"
24#include "mlir/IR/IRMapping.h"
25#include "mlir/IR/Matchers.h"
26#include "mlir/IR/SymbolTable.h"
27#include "mlir/Support/LLVM.h"
29#include <limits>
30
31namespace mlir {
32#define GEN_PASS_DEF_GPULAUNCHSINKINDEXCOMPUTATIONSPASS
33#define GEN_PASS_DEF_GPUKERNELOUTLININGPASS
34#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
35} // namespace mlir
36
37using namespace mlir;
38
39template <typename OpTy>
40static void createForAllDimensions(OpBuilder &builder, Location loc,
41 SmallVectorImpl<Value> &values) {
42 for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z})
43 values.push_back(OpTy::create(builder, loc, builder.getIndexType(), dim));
44}
45
46/// Adds operations generating block/thread ids and grid/block dimensions at the
47/// beginning of the `launchFuncOpBody` region. Add mapping from argument in
48/// entry block of `launchOpBody`, to the corresponding result value of the
49/// added operations.
50static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody,
51 Region &launchOpBody, IRMapping &map,
52 bool hasCluster = false) {
53 OpBuilder builder(loc->getContext());
54 Block &firstBlock = launchOpBody.front();
55 builder.setInsertionPointToStart(&launchFuncOpBody.front());
56 SmallVector<Value> indexOps;
57 // The order is important here, as it must match the order of the arguments
58 createForAllDimensions<gpu::BlockIdOp>(builder, loc, indexOps);
59 createForAllDimensions<gpu::ThreadIdOp>(builder, loc, indexOps);
60 createForAllDimensions<gpu::GridDimOp>(builder, loc, indexOps);
61 createForAllDimensions<gpu::BlockDimOp>(builder, loc, indexOps);
62 if (hasCluster) {
63 createForAllDimensions<gpu::ClusterIdOp>(builder, loc, indexOps);
64 createForAllDimensions<gpu::ClusterDimOp>(builder, loc, indexOps);
65 }
66 // Replace the leading 12 function args with the respective thread/block index
67 // operations. Iterate backwards since args are erased and indices change.
68 for (const auto &indexOp : enumerate(indexOps))
69 map.map(firstBlock.getArgument(indexOp.index()), indexOp.value());
70}
71
72/// Identifies operations that are beneficial to sink into kernels. These
73/// operations may not have side-effects, as otherwise sinking (and hence
74/// duplicating them) is not legal.
76 return matchPattern(op, m_Constant()) ||
77 isa<memref::DimOp, arith::SelectOp, arith::CmpIOp>(op);
78}
79
80/// For a given operation `op`, computes whether it is beneficial to sink the
81/// operation into the kernel. An operation can be sunk if doing so does not
82/// introduce new kernel arguments. Whether a value is already available in the
83/// kernel (and hence does not introduce new arguments) is checked by
84/// querying `existingDependencies` and `availableValues`.
85/// If an operand is not yet available, we recursively check whether it can be
86/// made available by siking its defining op.
87/// Operations that are indentified for sinking are added to `beneficiaryOps` in
88/// the order they should appear in the kernel. Furthermore, `availableValues`
89/// is updated with results that will be available after sinking the identified
90/// ops.
92 Operation *op, const SetVector<Value> &existingDependencies,
93 SetVector<Operation *> &beneficiaryOps,
94 llvm::SmallPtrSetImpl<Value> &availableValues,
95 llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {
96 if (beneficiaryOps.count(op))
97 return true;
98
99 if (!isSinkingBeneficiary(op))
100 return false;
101
102 for (Value operand : op->getOperands()) {
103 // It is already visible in the kernel, keep going.
104 if (availableValues.count(operand))
105 continue;
106 // Else check whether it can be made available via sinking or already is a
107 // dependency.
108 Operation *definingOp = operand.getDefiningOp();
109 if ((!definingOp || !extractBeneficiaryOps(definingOp, existingDependencies,
110 beneficiaryOps, availableValues,
111 isSinkingBeneficiary)) &&
112 !existingDependencies.count(operand))
113 return false;
114 }
115 // We will sink the operation, mark its results as now available.
116 beneficiaryOps.insert(op);
117 for (Value result : op->getResults())
118 availableValues.insert(result);
119 return true;
120}
121
123 gpu::LaunchOp launchOp,
124 llvm::function_ref<bool(Operation *)> isSinkingBeneficiary) {
125 assert(isSinkingBeneficiary);
126 Region &launchOpBody = launchOp.getBody();
127
128 // Identify uses from values defined outside of the scope of the launch
129 // operation.
130 SetVector<Value> sinkCandidates;
131 getUsedValuesDefinedAbove(launchOpBody, sinkCandidates);
132
133 SetVector<Operation *> toBeSunk;
134 llvm::SmallPtrSet<Value, 4> availableValues;
135 for (Value operand : sinkCandidates) {
136 Operation *operandOp = operand.getDefiningOp();
137 if (!operandOp)
138 continue;
139 extractBeneficiaryOps(operandOp, sinkCandidates, toBeSunk, availableValues,
140 isSinkingBeneficiary);
141 }
142
143 // Insert operations so that the defs get cloned before uses.
144 IRMapping map;
145 OpBuilder builder(launchOpBody);
146 for (Operation *op : toBeSunk) {
147 Operation *clonedOp = builder.clone(*op, map);
148 // Only replace uses within the launch op.
149 for (auto pair : llvm::zip(op->getResults(), clonedOp->getResults()))
150 replaceAllUsesInRegionWith(std::get<0>(pair), std::get<1>(pair),
151 launchOp.getBody());
152 }
153 return success();
154}
155
156/// Return the provided KernelDim3 as an array of i32 constants if possible.
158 SmallVector<int32_t, 3> constants;
159 MLIRContext *ctx = dims.x.getContext();
160 for (Value v : {dims.x, dims.y, dims.z}) {
161 APInt constValue;
162 if (!matchPattern(v, m_ConstantInt(&constValue)))
163 return nullptr;
164 // In the event someone called for a too-large block or grid dimension,
165 // don't set bounds as it is likely to cause more confusing behavior.
166 if (constValue.ugt(std::numeric_limits<uint32_t>::max()))
167 return nullptr;
168 constants.push_back(
169 constValue.getLimitedValue(std::numeric_limits<uint32_t>::max()));
170 }
171 return DenseI32ArrayAttr::get(ctx, constants);
172}
173
174/// Outline the `gpu.launch` operation body into a kernel function. Replace
175/// `gpu.terminator` operations by `gpu.return` in the generated function.
176/// Set block and grid size bounds if known.
177static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
178 StringRef kernelFnName,
179 SetVector<Value> &operands) {
180 Location loc = launchOp.getLoc();
181 // Create a builder with no insertion point, insertion will happen separately
182 // due to symbol table manipulation.
183 OpBuilder builder(launchOp.getContext());
184 Region &launchOpBody = launchOp.getBody();
185
186 // Identify uses from values defined outside of the scope of the launch
187 // operation.
188 getUsedValuesDefinedAbove(launchOpBody, operands);
189
190 // Create the gpu.func operation.
191 SmallVector<Type, 4> kernelOperandTypes;
192 kernelOperandTypes.reserve(operands.size());
193 for (Value operand : operands) {
194 kernelOperandTypes.push_back(operand.getType());
195 }
196 FunctionType type =
197 FunctionType::get(launchOp.getContext(), kernelOperandTypes, {});
198 auto outlinedFunc = gpu::GPUFuncOp::create(
199 builder, loc, kernelFnName, type,
200 TypeRange(ValueRange(launchOp.getWorkgroupAttributionBBArgs())),
201 TypeRange(ValueRange(launchOp.getPrivateAttributions())));
202 outlinedFunc.setKernel(true);
203
204 // If we can infer bounds on the grid and/or block sizes from the arguments
205 // to the launch op, propagate them to the generated kernel. This is safe
206 // because multiple launches with the same body are not deduplicated.
207 if (auto blockBounds =
208 maybeConstantDimsAttr(launchOp.getBlockSizeOperandValues()))
209 outlinedFunc.setKnownBlockSizeAttr(blockBounds);
210 if (auto gridBounds =
211 maybeConstantDimsAttr(launchOp.getGridSizeOperandValues()))
212 outlinedFunc.setKnownGridSizeAttr(gridBounds);
213 if (auto clusterSize = launchOp.getClusterSizeOperandValues()) {
214 if (auto clusterBounds = maybeConstantDimsAttr(*clusterSize))
215 outlinedFunc.setKnownClusterSizeAttr(clusterBounds);
216 }
217
218 IRMapping map;
219
220 // Map the arguments corresponding to the launch parameters like blockIdx,
221 // threadIdx, etc. If cluster is present, then we also generate clusterIdx and
222 // clusterDim.
223 Region &outlinedFuncBody = outlinedFunc.getBody();
224 injectGpuIndexOperations(loc, outlinedFuncBody, launchOpBody, map,
225 launchOp.hasClusterSize());
226
227 // Map memory attributions from the LaunOp op to the GPUFuncOp attributions.
228 for (const auto &[launchArg, funcArg] :
229 llvm::zip(launchOp.getWorkgroupAttributionBBArgs(),
230 outlinedFunc.getWorkgroupAttributionBBArgs()))
231 map.map(launchArg, funcArg);
232 for (const auto &[launchArg, funcArg] :
233 llvm::zip(launchOp.getPrivateAttributions(),
234 outlinedFunc.getPrivateAttributions()))
235 map.map(launchArg, funcArg);
236
237 // Map arguments from gpu.launch region to the arguments of the gpu.func
238 // operation.
239 Block &entryBlock = outlinedFuncBody.front();
240 for (const auto &operand : enumerate(operands))
241 map.map(operand.value(), entryBlock.getArgument(operand.index()));
242
243 // Clone the region of the gpu.launch operation into the gpu.func operation.
244 launchOpBody.cloneInto(&outlinedFuncBody, map);
245
246 // Replace the terminator op with returns.
247 for (Block &block : launchOpBody) {
248 Block *clonedBlock = map.lookup(&block);
249 auto terminator = dyn_cast<gpu::TerminatorOp>(clonedBlock->getTerminator());
250 if (!terminator)
251 continue;
252 OpBuilder replacer(terminator);
253 gpu::ReturnOp::create(replacer, terminator->getLoc());
254 terminator->erase();
255 }
256
257 // Splice now the entry block of the gpu.launch operation at the end of the
258 // gpu.func entry block and erase the redundant block.
259 Block *clonedLaunchOpEntry = map.lookup(&launchOpBody.front());
260 entryBlock.getOperations().splice(entryBlock.getOperations().end(),
261 clonedLaunchOpEntry->getOperations());
262 clonedLaunchOpEntry->erase();
263
264 return outlinedFunc;
265}
266
267gpu::GPUFuncOp mlir::outlineKernelFunc(gpu::LaunchOp launchOp,
268 StringRef kernelFnName,
270 DenseSet<Value> inputOperandSet;
271 inputOperandSet.insert_range(operands);
272 SetVector<Value> operandSet(llvm::from_range, operands);
273 auto funcOp = outlineKernelFuncImpl(launchOp, kernelFnName, operandSet);
274 for (auto operand : operandSet) {
275 if (!inputOperandSet.count(operand))
276 operands.push_back(operand);
278 return funcOp;
281/// Replace `gpu.launch` operations with an `gpu.launch_func` operation
282/// launching `kernelFunc`. The kernel func contains the body of the
283/// `gpu.launch` with constant region arguments inlined.
284static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
285 gpu::GPUFuncOp kernelFunc,
286 ValueRange operands) {
287 OpBuilder builder(launchOp);
288 // The launch op has an optional dynamic shared memory size. If it doesn't
289 // exist, we use zero.
290 Value asyncToken = launchOp.getAsyncToken();
291 std::optional<gpu::KernelDim3> clusterSize =
292 launchOp.getClusterSizeOperandValues();
293 auto launchFunc = gpu::LaunchFuncOp::create(
294 builder, launchOp.getLoc(), kernelFunc,
295 launchOp.getGridSizeOperandValues(), launchOp.getBlockSizeOperandValues(),
296 launchOp.getDynamicSharedMemorySize(), operands,
297 asyncToken ? asyncToken.getType() : nullptr,
298 launchOp.getAsyncDependencies(), clusterSize);
299 if (launchOp.getCooperative())
300 launchFunc.setCooperative(true);
301 launchOp.replaceAllUsesWith(launchFunc);
302 launchOp.erase();
304
305namespace {
306/// Pass that moves ops which are likely an index computation into gpu.launch
307/// body.
308class GpuLaunchSinkIndexComputationsPass
310 GpuLaunchSinkIndexComputationsPass> {
311public:
312 void runOnOperation() override {
313 Operation *op = getOperation();
314 if (op->walk([](gpu::LaunchOp launch) {
315 // Pull in instructions that can be sunk
316 if (failed(sinkOperationsIntoLaunchOp(launch,
317 isLikelyAnIndexComputation)))
318 return WalkResult::interrupt();
319
320 return WalkResult::advance();
321 }).wasInterrupted())
323 }
324};
326/// Pass that moves the kernel of each LaunchOp into its separate nested module.
327///
328/// This pass moves the kernel code of each LaunchOp into a function created
329/// inside a nested module. It also creates an external function of the same
330/// name in the parent module.
331///
332/// The gpu.modules are intended to be compiled to a cubin blob independently in
333/// a separate pass. The external functions can then be annotated with the
334/// symbol of the cubin accessor function.
335class GpuKernelOutliningPass
336 : public impl::GpuKernelOutliningPassBase<GpuKernelOutliningPass> {
337public:
338 using Base::Base;
340 LogicalResult initialize(MLIRContext *context) override {
341 // Initialize the data layout specification from the data layout string.
342 if (!dataLayoutStr.empty()) {
343 Attribute resultAttr = mlir::parseAttribute(dataLayoutStr, context);
344 if (!resultAttr)
345 return failure();
346
347 dataLayoutSpec = dyn_cast<DataLayoutSpecInterface>(resultAttr);
348 if (!dataLayoutSpec)
349 return failure();
350 }
351
352 return success();
353 }
354
355 void runOnOperation() override {
356 SymbolTable symbolTable(getOperation());
357 bool modified = false;
358 for (auto func : getOperation().getOps<SymbolOpInterface>()) {
359 // Insert just after the function.
360 Block::iterator insertPt(func->getNextNode());
361 auto funcWalkResult = func.walk([&](gpu::LaunchOp op) {
363 std::string kernelFnName;
364 if (op.getFunction()) {
365 kernelFnName = op.getFunction()->str();
366 } else {
367 kernelFnName =
368 Twine(op->getParentOfType<SymbolOpInterface>().getName(),
369 "_kernel")
370 .str();
371 }
373 gpu::GPUFuncOp outlinedFunc =
374 outlineKernelFuncImpl(op, kernelFnName, operands);
376 // Create nested module and insert outlinedFunc. The module will
377 // originally get the same name as the function, but may be renamed on
378 // insertion into the parent module.
379 FailureOr<gpu::GPUModuleOp> kernelModule =
380 createKernelModule(op, outlinedFunc, symbolTable);
381 if (failed(kernelModule))
382 return WalkResult::interrupt();
383 symbolTable.insert(*kernelModule, insertPt);
384
385 // Potentially changes signature, pulling in constants.
386 convertToLaunchFuncOp(op, outlinedFunc, operands.getArrayRef());
387 modified = true;
388 return WalkResult::advance();
389 });
390 if (funcWalkResult.wasInterrupted())
392 }
393
394 // If any new module was inserted in this module, annotate this module as
395 // a container module.
396 if (modified)
397 getOperation()->setAttr(gpu::GPUDialect::getContainerModuleAttrName(),
398 UnitAttr::get(&getContext()));
399 }
400
401private:
402 /// Returns a gpu.module containing kernelFunc and all callees (recursive).
403 FailureOr<gpu::GPUModuleOp>
404 createKernelModule(gpu::LaunchOp gpuLaunchOp, gpu::GPUFuncOp kernelFunc,
405 const SymbolTable &parentSymbolTable) {
406 // TODO: This code cannot use an OpBuilder because it must be inserted into
407 // a SymbolTable by the caller. SymbolTable needs to be refactored to
408 // prevent manual building of Ops with symbols in code using SymbolTables
409 // and then this needs to use the OpBuilder.
410 auto *context = getOperation().getContext();
411 OpBuilder builder(context);
412 std::string kernelModuleName;
413 gpu::GPUModuleOp kernelModule;
414 if (gpuLaunchOp.getModule()) {
415 kernelModuleName = gpuLaunchOp.getModule()->str();
416 kernelModule =
417 parentSymbolTable.lookup<gpu::GPUModuleOp>(kernelModuleName);
418 } else {
419 kernelModuleName = kernelFunc.getName();
420 }
421
422 // Check if the module already exists in the symbol table
423 if (!kernelModule) {
424 // If not found, create a new GPU module
425 kernelModule = gpu::GPUModuleOp::create(builder, kernelFunc.getLoc(),
426 kernelModuleName);
427 }
428
429 // If a valid data layout spec was provided, attach it to the kernel module.
430 // Otherwise, the default data layout will be used.
431 if (dataLayoutSpec)
432 kernelModule->setAttr(DLTIDialect::kDataLayoutAttrName, dataLayoutSpec);
433
434 SymbolTable symbolTable(kernelModule);
435 symbolTable.insert(kernelFunc);
436
437 SmallVector<Operation *, 8> symbolDefWorklist = {kernelFunc};
438 while (!symbolDefWorklist.empty()) {
439 if (std::optional<SymbolTable::UseRange> symbolUses =
440 SymbolTable::getSymbolUses(symbolDefWorklist.pop_back_val())) {
441 for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
442 // Nested symbol references (e.g. @M::@F) cannot be resolved inside
443 // the kernel module when @M exists in the parent: @M will not be
444 // available inside the outlined module after the transformation.
445 // Ignore references whose root does not exist in the parent, as those
446 // are phantom references (e.g. in unregistered-op attributes) that
447 // were already unresolvable and are simply copied as-is.
448 if (!symbolUse.getSymbolRef().getNestedReferences().empty() &&
449 parentSymbolTable.lookup(
450 symbolUse.getSymbolRef().getRootReference())) {
451 symbolUse.getUser()->emitError("nested symbol reference '")
452 << symbolUse.getSymbolRef()
453 << "' cannot be resolved inside the outlined kernel module; "
454 "gpu-kernel-outlining does not support cross-module symbol "
455 "references inside gpu.launch bodies";
456 kernelModule->erase();
457 return failure();
458 }
459 StringAttr symbolName = symbolUse.getSymbolRef().getLeafReference();
460 if (symbolTable.lookup(symbolName))
461 continue;
462
463 Operation *symbolDef = parentSymbolTable.lookup(symbolName);
464 if (!symbolDef)
465 continue;
466 Operation *symbolDefClone = symbolDef->clone();
467 symbolDefWorklist.push_back(symbolDefClone);
468 symbolTable.insert(symbolDefClone);
469 }
470 }
471 }
472
473 return kernelModule;
474 }
475
476 DataLayoutSpecInterface dataLayoutSpec;
477};
478
479} // namespace
return success()
LogicalResult initialize(unsigned origNumLoops, ArrayRef< ReassociationIndices > foldedIterationDims)
static DenseI32ArrayAttr maybeConstantDimsAttr(gpu::KernelDim3 dims)
Return the provided KernelDim3 as an array of i32 constants if possible.
static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp, StringRef kernelFnName, SetVector< Value > &operands)
Outline the gpu.launch operation body into a kernel function.
static bool isLikelyAnIndexComputation(Operation *op)
Identifies operations that are beneficial to sink into kernels.
static void convertToLaunchFuncOp(gpu::LaunchOp launchOp, gpu::GPUFuncOp kernelFunc, ValueRange operands)
Replace gpu.launch operations with an gpu.launch_func operation launching kernelFunc.
static void createForAllDimensions(OpBuilder &builder, Location loc, SmallVectorImpl< Value > &values)
static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody, Region &launchOpBody, IRMapping &map, bool hasCluster=false)
Adds operations generating block/thread ids and grid/block dimensions at the beginning of the launchF...
static bool extractBeneficiaryOps(Operation *op, const SetVector< Value > &existingDependencies, SetVector< Operation * > &beneficiaryOps, llvm::SmallPtrSetImpl< Value > &availableValues, llvm::function_ref< bool(Operation *)> isSinkingBeneficiary)
For a given operation op, computes whether it is beneficial to sink the operation into the kernel.
b getContext())
Attributes are known-constant values of operations.
Definition Attributes.h:25
MLIRContext * getContext() const
Return the context this attribute belongs to.
Block represents an ordered list of Operations.
Definition Block.h:33
OpListType::iterator iterator
Definition Block.h:150
BlockArgument getArgument(unsigned i)
Definition Block.h:139
void erase()
Unlink this Block from its parent region and delete it.
Definition Block.cpp:66
OpListType & getOperations()
Definition Block.h:147
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249
IndexType getIndexType()
Definition Builders.cpp:55
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26
auto lookup(T from) const
Lookup a mapped value within the map.
Definition IRMapping.h:72
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This class helps build Operations.
Definition Builders.h:209
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition Builders.cpp:566
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition Builders.h:433
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
operand_range getOperands()
Returns an iterator on the underlying Value's.
Definition Operation.h:404
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:823
result_range getResults()
Definition Operation.h:441
Operation * clone(IRMapping &mapper, const CloneOptions &options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
virtual void runOnOperation()=0
The polymorphic API that runs the pass over the currently held operation.
void signalPassFailure()
Signal that some invariant was broken when running.
Definition Pass.h:226
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26
Block & front()
Definition Region.h:65
void cloneInto(Region *dest, IRMapping &mapper)
Clone the internal blocks from this region into dest.
Definition Region.cpp:70
This class allows for representing and managing the symbol table used by operations with the 'SymbolT...
Definition SymbolTable.h:24
Operation * lookup(StringRef name) const
Look up a symbol with the specified name, returning null if no such name exists.
static std::optional< UseRange > getSymbolUses(Operation *from)
Get an iterator range for all of the uses, for any symbol, that are nested within the given operation...
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:40
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:389
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
MLIRContext * getContext() const
Utility to get the associated MLIRContext that this value is defined in.
Definition Value.h:108
Type getType() const
Return the type of this value.
Definition Value.h:105
static WalkResult advance()
Definition WalkResult.h:47
static WalkResult interrupt()
Definition WalkResult.h:46
static DenseArrayAttrImpl get(MLIRContext *context, ArrayRef< int32_t > content)
Include the generated interface declarations.
bool matchPattern(Value value, const Pattern &pattern)
Entry point for matching a pattern over a Value.
Definition Matchers.h:490
detail::constant_int_value_binder m_ConstantInt(IntegerAttr::ValueType *bind_value)
Matches a constant holding a scalar/vector/tensor integer (splat) and writes the integer value to bin...
Definition Matchers.h:527
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region &region)
Replace all uses of orig within the given region with replacement.
llvm::DenseSet< ValueT, ValueInfoT > DenseSet
Definition LLVM.h:122
Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context, Type type={}, size_t *numRead=nullptr, bool isKnownNullTerminated=false)
This parses a single MLIR attribute to an MLIR context if it was valid.
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:125
detail::DenseArrayAttrImpl< int32_t > DenseI32ArrayAttr
void getUsedValuesDefinedAbove(Region &region, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
LogicalResult sinkOperationsIntoLaunchOp(gpu::LaunchOp launchOp, llvm::function_ref< bool(Operation *)> isSinkingBeneficiary)
Sink operations into the launchOp to reduce the number of values that are used within the region of t...
detail::constant_op_matcher m_Constant()
Matches a constant foldable operation.
Definition Matchers.h:369
gpu::GPUFuncOp outlineKernelFunc(gpu::LaunchOp launchOp, StringRef kernelFnName, SmallVectorImpl< Value > &operands)
Get a gpu.func created from outlining the region of a gpu.launch op with the given kernelFnName.
Utility class for the GPU dialect to represent triples of Values accessible through ....
Definition GPUDialect.h:39