MLIR 22.0.0git
Utils.cpp File Reference
#include "mlir/Dialect/GPU/TransformOps/Utils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
#include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"
#include "mlir/Dialect/Utils/IndexingUtils.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/Value.h"
#include "mlir/IR/Visitors.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DebugLog.h"
#include "llvm/Support/InterleavedRange.h"

Go to the source code of this file.

Namespaces

namespace  mlir
 Include the generated interface declarations.
namespace  mlir::transform
namespace  mlir::transform::gpu

Macros

#define DEBUG_TYPE   "gpu-transforms"

Functions

static FailureOr< SmallVector< Value > > buildPredicates (RewriterBase &rewriter, Location loc, ArrayRef< Value > activeIds, ArrayRef< int64_t > activeMappingSizes, ArrayRef< int64_t > availableMappingSizes, std::string &errorMsg)
 Build predicates to filter execution by only the activeIds.
template<typename ThreadOrBlockIdOp>
static Value buildLinearId (RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > originalBasisOfr)
 Return a flattened thread id for the workgroup with given sizes.
template<typename ThreadOrBlockIdOp>
static GpuIdBuilderFnType commonLinearIdBuilderFn (int64_t multiplicity=1, DeviceMaskingAttrInterface mask=nullptr)
 Create a linear id builder that takes the originalBasisOfr and decompose it in the basis of forallMappingSizes.
template<typename ThreadOrBlockIdOp>
static GpuIdBuilderFnType common3DIdBuilderFn (int64_t multiplicity=1)
 Create a simple 3-D id builder that takes the originalBasisOfr The 3-D id builder returns a 3-D vector of ids for indexing and 3-D sizes.
static GpuIdBuilderFnType laneIdBuilderFn (int64_t warpSize)
 Create a lane id builder that takes the originalBasis and decompose it in the basis of forallMappingSizes.
DiagnosedSilenceableFailure mlir::transform::gpu::checkGpuLimits (TransformOpInterface transformOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
 Determine if the size of the kernel configuration is supported by the GPU architecture being used.
DiagnosedSilenceableFailure mlir::transform::gpu::createGpuLaunch (RewriterBase &rewriter, Location loc, TransformOpInterface transformOp, LaunchOp &launchOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
DiagnosedSilenceableFailure mlir::transform::gpu::alterGpuLaunch (RewriterBase &rewriter, LaunchOp gpuLaunch, TransformOpInterface transformOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
 Alter kernel configuration of the given kernel.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "gpu-transforms"

Definition at line 38 of file Utils.cpp.

Function Documentation

◆ buildLinearId()

template<typename ThreadOrBlockIdOp>
Value buildLinearId ( RewriterBase & rewriter,
Location loc,
ArrayRef< OpFoldResult > originalBasisOfr )
static

◆ buildPredicates()

FailureOr< SmallVector< Value > > buildPredicates ( RewriterBase & rewriter,
Location loc,
ArrayRef< Value > activeIds,
ArrayRef< int64_t > activeMappingSizes,
ArrayRef< int64_t > availableMappingSizes,
std::string & errorMsg )
static

Build predicates to filter execution by only the activeIds.

Along each dimension, 3 cases appear:

  1. activeMappingSize > availableMappingSize: this is an unsupported case as this requires additional looping. An error message is produced to advise the user to tile more or to use more threads.
  2. activeMappingSize == availableMappingSize: no predication is needed.
  3. activeMappingSize < availableMappingSize: only a subset of threads should be active and we produce the boolean id < activeMappingSize for further use in building predicated execution.

Definition at line 50 of file Utils.cpp.

References mlir::arith::ConstantIndexOp::create().

Referenced by common3DIdBuilderFn(), commonLinearIdBuilderFn(), and laneIdBuilderFn().

◆ common3DIdBuilderFn()

template<typename ThreadOrBlockIdOp>
GpuIdBuilderFnType common3DIdBuilderFn ( int64_t multiplicity = 1)
static

Create a simple 3-D id builder that takes the originalBasisOfr The 3-D id builder returns a 3-D vector of ids for indexing and 3-D sizes.

  • ids for predicate generation.

Definition at line 195 of file Utils.cpp.

References buildPredicates(), mlir::AffineExpr::floorDiv(), mlir::getAffineDimExpr(), mlir::Builder::getContext(), mlir::Builder::getIndexType(), and mlir::affine::makeComposedFoldedAffineApply().

◆ commonLinearIdBuilderFn()

template<typename ThreadOrBlockIdOp>
GpuIdBuilderFnType commonLinearIdBuilderFn ( int64_t multiplicity = 1,
DeviceMaskingAttrInterface mask = nullptr )
static

Create a linear id builder that takes the originalBasisOfr and decompose it in the basis of forallMappingSizes.

The linear id builder returns an n-D vector of ids for indexing and 1-D size + id for predicate generation.

Definition at line 107 of file Utils.cpp.

References buildLinearId(), buildPredicates(), mlir::computeProduct(), mlir::computeStrides(), mlir::delinearize(), mlir::AffineExpr::floorDiv(), mlir::getAffineDimExpr(), mlir::getAsIndexOpFoldResult(), mlir::getValueOrCreateConstantIndexOp(), mlir::affine::makeComposedAffineApply(), and mlir::affine::makeComposedFoldedAffineApply().

◆ laneIdBuilderFn()

GpuIdBuilderFnType laneIdBuilderFn ( int64_t warpSize)
static

Create a lane id builder that takes the originalBasis and decompose it in the basis of forallMappingSizes.

The linear id builder returns an n-D vector of ids for indexing and 1-D size + id for predicate generation.

Definition at line 231 of file Utils.cpp.

References buildLinearId(), buildPredicates(), mlir::computeProduct(), mlir::computeStrides(), mlir::delinearize(), mlir::getAffineDimExpr(), mlir::getAsIndexOpFoldResult(), mlir::Builder::getContext(), mlir::affine::makeComposedAffineApply(), and mlir::affine::makeComposedFoldedAffineApply().