MLIR  22.0.0git
Namespaces | Macros | Functions
Utils.cpp File Reference
#include "mlir/Dialect/GPU/TransformOps/Utils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/Value.h"
#include "mlir/IR/Visitors.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DebugLog.h"
#include "llvm/Support/InterleavedRange.h"

Go to the source code of this file.

Namespaces

 mlir
 Include the generated interface declarations.
 
 mlir::transform
 
 mlir::transform::gpu
 

Macros

#define DEBUG_TYPE   "gpu-transforms"
 

Functions

static FailureOr< SmallVector< Value > > buildPredicates (RewriterBase &rewriter, Location loc, ArrayRef< Value > activeIds, ArrayRef< int64_t > activeMappingSizes, ArrayRef< int64_t > availableMappingSizes, std::string &errorMsg)
 Build predicates to filter execution by only the activeIds. More...
 
template<typename ThreadOrBlockIdOp >
static Value buildLinearId (RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > originalBasisOfr)
 Return a flattened thread id for the workgroup with given sizes. More...
 
template<typename ThreadOrBlockIdOp >
static GpuIdBuilderFnType commonLinearIdBuilderFn (int64_t multiplicity=1, DeviceMaskingAttrInterface mask=nullptr)
 Create a linear id builder that takes the originalBasisOfr and decompose it in the basis of forallMappingSizes. More...
 
template<typename ThreadOrBlockIdOp >
static GpuIdBuilderFnType common3DIdBuilderFn (int64_t multiplicity=1)
 Create a simple 3-D id builder that takes the originalBasisOfr The 3-D id builder returns a 3-D vector of ids for indexing and 3-D sizes. More...
 
static GpuIdBuilderFnType laneIdBuilderFn (int64_t warpSize)
 Create a lane id builder that takes the originalBasis and decompose it in the basis of forallMappingSizes. More...
 
DiagnosedSilenceableFailure mlir::transform::gpu::checkGpuLimits (TransformOpInterface transformOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
 Determine if the size of the kernel configuration is supported by the GPU architecture being used. More...
 
DiagnosedSilenceableFailure mlir::transform::gpu::createGpuLaunch (RewriterBase &rewriter, Location loc, TransformOpInterface transformOp, LaunchOp &launchOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
 
DiagnosedSilenceableFailure mlir::transform::gpu::alterGpuLaunch (RewriterBase &rewriter, LaunchOp gpuLaunch, TransformOpInterface transformOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
 Alter kernel configuration of the given kernel. More...
 

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "gpu-transforms"

Definition at line 38 of file Utils.cpp.

Function Documentation

◆ buildLinearId()

template<typename ThreadOrBlockIdOp >
static Value buildLinearId ( RewriterBase rewriter,
Location  loc,
ArrayRef< OpFoldResult originalBasisOfr 
)
static

◆ buildPredicates()

static FailureOr<SmallVector<Value> > buildPredicates ( RewriterBase rewriter,
Location  loc,
ArrayRef< Value activeIds,
ArrayRef< int64_t >  activeMappingSizes,
ArrayRef< int64_t >  availableMappingSizes,
std::string &  errorMsg 
)
static

Build predicates to filter execution by only the activeIds.

Along each dimension, 3 cases appear:

  1. activeMappingSize > availableMappingSize: this is an unsupported case as this requires additional looping. An error message is produced to advise the user to tile more or to use more threads.
  2. activeMappingSize == availableMappingSize: no predication is needed.
  3. activeMappingSize < availableMappingSize: only a subset of threads should be active and we produce the boolean id < activeMappingSize for further use in building predicated execution.

Definition at line 50 of file Utils.cpp.

References mlir::arith::ConstantIndexOp::create().

Referenced by common3DIdBuilderFn(), commonLinearIdBuilderFn(), and laneIdBuilderFn().

◆ common3DIdBuilderFn()

template<typename ThreadOrBlockIdOp >
static GpuIdBuilderFnType common3DIdBuilderFn ( int64_t  multiplicity = 1)
static

Create a simple 3-D id builder that takes the originalBasisOfr The 3-D id builder returns a 3-D vector of ids for indexing and 3-D sizes.

  • ids for predicate generation.

Definition at line 195 of file Utils.cpp.

References buildPredicates(), mlir::AffineExpr::floorDiv(), mlir::getAffineDimExpr(), mlir::Builder::getContext(), mlir::Builder::getIndexType(), and mlir::affine::makeComposedFoldedAffineApply().

◆ commonLinearIdBuilderFn()

template<typename ThreadOrBlockIdOp >
static GpuIdBuilderFnType commonLinearIdBuilderFn ( int64_t  multiplicity = 1,
DeviceMaskingAttrInterface  mask = nullptr 
)
static

Create a linear id builder that takes the originalBasisOfr and decompose it in the basis of forallMappingSizes.

The linear id builder returns an n-D vector of ids for indexing and 1-D size + id for predicate generation.

Definition at line 107 of file Utils.cpp.

References buildPredicates(), mlir::computeProduct(), mlir::computeStrides(), mlir::delinearize(), mlir::AffineExpr::floorDiv(), mlir::getAffineDimExpr(), mlir::getAsIndexOpFoldResult(), mlir::getValueOrCreateConstantIndexOp(), mlir::affine::makeComposedAffineApply(), and mlir::affine::makeComposedFoldedAffineApply().

◆ laneIdBuilderFn()

static GpuIdBuilderFnType laneIdBuilderFn ( int64_t  warpSize)
static

Create a lane id builder that takes the originalBasis and decompose it in the basis of forallMappingSizes.

The linear id builder returns an n-D vector of ids for indexing and 1-D size + id for predicate generation.

Definition at line 231 of file Utils.cpp.

References buildPredicates(), mlir::computeProduct(), mlir::computeStrides(), mlir::delinearize(), mlir::getAffineDimExpr(), mlir::getAsIndexOpFoldResult(), mlir::Builder::getContext(), mlir::affine::makeComposedAffineApply(), and mlir::affine::makeComposedFoldedAffineApply().