doxygen/Dialect_2GPU_2TransformOps_2Utils_8cpp_source.html

 //===- Utils.cpp - Utils for GPU transform ops ----------------------------===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/GPU/TransformOps/Utils.h"


 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/Dialect/Func/IR/FuncOps.h"

 #include "mlir/Dialect/GPU/IR/GPUDialect.h"

 #include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"

 #include "mlir/Dialect/MemRef/IR/MemRef.h"

 #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"

 #include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"

 #include "mlir/Dialect/SCF/IR/SCF.h"

 #include "mlir/Dialect/Transform/IR/TransformDialect.h"

 #include "mlir/Dialect/Transform/Interfaces/TransformInterfaces.h"

 #include "mlir/Dialect/Utils/IndexingUtils.h"

 #include "mlir/Dialect/Vector/IR/VectorOps.h"

 #include "mlir/IR/AffineExpr.h"

 #include "mlir/IR/Builders.h"

 #include "mlir/IR/BuiltinAttributes.h"

 #include "mlir/IR/IRMapping.h"

 #include "mlir/IR/MLIRContext.h"

 #include "mlir/IR/OpDefinition.h"

 #include "mlir/IR/Value.h"

 #include "mlir/IR/Visitors.h"

 #include "mlir/Support/LLVM.h"

 #include "llvm/ADT/STLExtras.h"

 #include "llvm/ADT/SmallVector.h"

 #include "llvm/ADT/TypeSwitch.h"

 #include "llvm/Support/Debug.h"

 #include "llvm/Support/InterleavedRange.h"


 using namespace mlir;

 using namespace mlir::gpu;

 using namespace mlir::transform;

 using namespace mlir::transform::gpu;


 #define DEBUG_TYPE "gpu-transforms"


 #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")

 #define LDBG(X) LLVM_DEBUG(DBGS() << (X) << "\n")

 #define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")


 /// Return a flattened thread id for the workgroup with given sizes.

 template <typename ThreadOrBlockIdOp>

 static Value buildLinearId(RewriterBase &rewriter, Location loc,

                            ArrayRef<OpFoldResult> originalBasisOfr) {

   LLVM_DEBUG(DBGS() << "----buildLinearId with originalBasisOfr:  "

                     << llvm::interleaved(originalBasisOfr) << "\n");

   assert(originalBasisOfr.size() == 3 && "expected 3 sizes");

   IndexType indexType = rewriter.getIndexType();

   AffineExpr tx, ty, tz, bdx, bdy;

   bindDims(rewriter.getContext(), tx, ty, tz);

   bindSymbols(rewriter.getContext(), bdx, bdy);

   SmallVector<OpFoldResult> vals{

       rewriter.create<ThreadOrBlockIdOp>(loc, indexType, Dimension::x)

           .getResult(),

       rewriter.create<ThreadOrBlockIdOp>(loc, indexType, Dimension::y)

           .getResult(),

       rewriter.create<ThreadOrBlockIdOp>(loc, indexType, Dimension::z)

           .getResult(),

       originalBasisOfr[0], originalBasisOfr[1]};

   OpFoldResult ofr = affine::makeComposedFoldedAffineApply(

       rewriter, loc, tx + ty * bdx + tz * bdx * bdy, vals);

   return getValueOrCreateConstantIndexOp(rewriter, loc, ofr);

 }


 /// Create a linear id builder that takes the `originalBasisOfr` and decompose

 /// it in the basis of `forallMappingSizes`. The linear id builder returns an

 /// n-D vector of ids for indexing and 1-D size + id for predicate generation.

 template <typename ThreadOrBlockIdOp>

 static GpuIdBuilderFnType commonLinearIdBuilderFn(int64_t multiplicity = 1) {

   auto res = [multiplicity](RewriterBase &rewriter, Location loc,

                             ArrayRef<int64_t> forallMappingSizes,

                             ArrayRef<int64_t> originalBasis) {

     SmallVector<OpFoldResult> originalBasisOfr =

         getAsIndexOpFoldResult(rewriter.getContext(), originalBasis);

     OpFoldResult linearId =

         buildLinearId<ThreadOrBlockIdOp>(rewriter, loc, originalBasisOfr);

     // Sizes in [0 .. n] -> [n .. 0] order to properly compute strides in

     // "row-major" order.

     SmallVector<int64_t> reverseBasisSizes(llvm::reverse(forallMappingSizes));

     SmallVector<int64_t> strides = computeStrides(reverseBasisSizes);

     AffineExpr d0 = getAffineDimExpr(0, rewriter.getContext());

     OpFoldResult scaledLinearId = affine::makeComposedFoldedAffineApply(

         rewriter, loc, d0.floorDiv(multiplicity), {linearId});

     SmallVector<AffineExpr> delinearizingExprs = delinearize(d0, strides);

     SmallVector<Value> ids;

     // Reverse back to be in [0 .. n] order.

     for (AffineExpr e : llvm::reverse(delinearizingExprs)) {

       ids.push_back(

           affine::makeComposedAffineApply(rewriter, loc, e, {scaledLinearId}));

     }


     LLVM_DEBUG(DBGS() << "--delinearization basis: "

                       << llvm::interleaved(reverseBasisSizes) << "\n";

                DBGS() << "--delinearization strides: "

                       << llvm::interleaved(strides) << "\n";

                DBGS() << "--delinearization exprs: "

                       << llvm::interleaved(delinearizingExprs) << "\n";

                DBGS() << "--ids: " << llvm::interleaved(ids) << "\n");


     // Return n-D ids for indexing and 1-D size + id for predicate generation.

     return IdBuilderResult{

         /*mappingIdOps=*/ids,

         /*availableMappingSizes=*/

         SmallVector<int64_t>{computeProduct(originalBasis)},

         // `forallMappingSizes` iterate in the scaled basis, they need to be

         // scaled back into the original basis to provide tight

         // activeMappingSizes quantities for predication.

         /*activeMappingSizes=*/

         SmallVector<int64_t>{computeProduct(forallMappingSizes) * multiplicity},

         /*activeIdOps=*/SmallVector<Value>{cast<Value>(linearId)}};

   };


   return res;

 }


 /// Create a simple 3-D id builder that takes the `originalBasisOfr`

 /// The 3-D id builder returns a 3-D vector of ids for indexing and 3-D sizes

 /// + ids for predicate generation.

 template <typename ThreadOrBlockIdOp>

 static GpuIdBuilderFnType common3DIdBuilderFn(int64_t multiplicity = 1) {

   auto res = [multiplicity](RewriterBase &rewriter, Location loc,

                             ArrayRef<int64_t> forallMappingSizes,

                             ArrayRef<int64_t> originalBasis) {

     IndexType indexType = rewriter.getIndexType();

     SmallVector<Value> ids{

         rewriter.create<ThreadOrBlockIdOp>(loc, indexType, Dimension::x),

         rewriter.create<ThreadOrBlockIdOp>(loc, indexType, Dimension::y),

         rewriter.create<ThreadOrBlockIdOp>(loc, indexType, Dimension::z)};

     // In the 3-D mapping case, scale the first dimension by the multiplicity.

     SmallVector<Value> scaledIds = ids;

     AffineExpr d0 = getAffineDimExpr(0, rewriter.getContext());

     scaledIds[0] = cast<Value>(affine::makeComposedFoldedAffineApply(

         rewriter, loc, d0.floorDiv(multiplicity), {scaledIds[0]}));

     // In the 3-D mapping case, unscale the first dimension by the multiplicity.

     SmallVector<int64_t> forallMappingSizeInOriginalBasis(forallMappingSizes);

     forallMappingSizeInOriginalBasis[0] *= multiplicity;

     return IdBuilderResult{

         /*mappingIdOps=*/scaledIds,

         /*availableMappingSizes=*/SmallVector<int64_t>{originalBasis},

         // `forallMappingSizes` iterate in the scaled basis, they need to be

         // scaled back into the original basis to provide tight

         // activeMappingSizes quantities for predication.

         /*activeMappingSizes=*/

         SmallVector<int64_t>{forallMappingSizeInOriginalBasis},

         /*activeIdOps=*/ids};

   };

   return res;

 }


 namespace mlir {

 namespace transform {

 namespace gpu {


 GpuIdBuilder::GpuIdBuilder(MLIRContext *ctx, bool useLinearMapping,

                            const MappingIdBuilderFnType &fn)

     : mappingAttributes(), idBuilder() {

   if (useLinearMapping) {

     for (uint64_t d = static_cast<uint64_t>(MappingId::LinearDim0),

                   e = getMaxEnumValForMappingId();

          d <= e; ++d)

       mappingAttributes.push_back(fn(ctx, symbolizeMappingId(d).value()));

   } else {

     for (uint64_t d = static_cast<uint64_t>(MappingId::DimX),

                   e = static_cast<uint64_t>(MappingId::DimZ);

          d <= e; ++d)

       mappingAttributes.push_back(fn(ctx, symbolizeMappingId(d).value()));

   }

 }


 GpuBlockIdBuilder::GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping)

     : GpuIdBuilder(ctx, useLinearMapping, [](MLIRContext *ctx, MappingId id) {

         return GPUBlockMappingAttr::get(ctx, id);

       }) {

   idBuilder = useLinearMapping

                   ? commonLinearIdBuilderFn<BlockIdOp>(/*multiplicity=*/1)

                   : common3DIdBuilderFn<BlockIdOp>(/*multiplicity=*/1);

 }


 GpuWarpgroupIdBuilder::GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize,

                                              bool useLinearMapping)

     : GpuIdBuilder(ctx, useLinearMapping,

                    [](MLIRContext *ctx, MappingId id) {

                      return GPUWarpgroupMappingAttr::get(ctx, id);

                    }),

       warpSize(warpSize) {

   idBuilder = useLinearMapping

                   ? commonLinearIdBuilderFn<ThreadIdOp>(

                         /*multiplicity=*/kNumWarpsPerGroup * warpSize)

                   : common3DIdBuilderFn<ThreadIdOp>(

                         /*multiplicity=*/kNumWarpsPerGroup * warpSize);

 }


 GpuWarpIdBuilder::GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize,

                                    bool useLinearMapping)

     : GpuIdBuilder(ctx, useLinearMapping,

                    [](MLIRContext *ctx, MappingId id) {

                      return GPUWarpMappingAttr::get(ctx, id);

                    }),

       warpSize(warpSize) {

   idBuilder =

       useLinearMapping

           ? commonLinearIdBuilderFn<ThreadIdOp>(/*multiplicity=*/warpSize)

           : common3DIdBuilderFn<ThreadIdOp>(/*multiplicity=*/warpSize);

 }


 GpuThreadIdBuilder::GpuThreadIdBuilder(MLIRContext *ctx, bool useLinearMapping)

     : GpuIdBuilder(ctx, useLinearMapping, [](MLIRContext *ctx, MappingId id) {

         return GPUThreadMappingAttr::get(ctx, id);

       }) {

   idBuilder = useLinearMapping

                   ? commonLinearIdBuilderFn<ThreadIdOp>(/*multiplicity=*/1)

                   : common3DIdBuilderFn<ThreadIdOp>(/*multiplicity=*/1);

 }


 DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp,

                                            std::optional<int64_t> gridDimX,

                                            std::optional<int64_t> gridDimY,

                                            std::optional<int64_t> gridDimZ,

                                            std::optional<int64_t> blockDimX,

                                            std::optional<int64_t> blockDimY,

                                            std::optional<int64_t> blockDimZ) {


   // TODO: pass a configuration object to set the limits properly.


   if ((blockDimX.value_or(1) * blockDimY.value_or(1) * blockDimZ.value_or(1)) >

           kMaxTotalBlockdim ||

       (gridDimX.value_or(1) * gridDimY.value_or(1) * gridDimZ.value_or(1)) >

           kMaxTotalGriddim ||

       blockDimX.value_or(1) > kMaxBlockdimx ||

       blockDimY.value_or(1) > kMaxBlockdimy ||

       blockDimZ.value_or(1) > kMaxBlockdimz ||

       gridDimY.value_or(1) > kMaxGriddimy ||

       gridDimZ.value_or(1) > kMaxGriddimz ||

       gridDimX.value_or(1) > kMaxGriddimx) {

     return transformOp.emitSilenceableError()

            << "Trying to launch a GPU kernel with grid_dims = ("

            << gridDimX.value_or(1) << ", " << gridDimY.value_or(1) << ", "

            << gridDimZ.value_or(1) << ") block_dims = ("

            << blockDimX.value_or(1) << ", " << blockDimY.value_or(1) << ", "

            << blockDimZ.value_or(1) << "). It is larger than the limits.";

   }

   return DiagnosedSilenceableFailure::success();

 }


 DiagnosedSilenceableFailure createGpuLaunch(

     RewriterBase &rewriter, Location loc, TransformOpInterface transformOp,

     LaunchOp &launchOp, std::optional<int64_t> gridDimX,

     std::optional<int64_t> gridDimY, std::optional<int64_t> gridDimZ,

     std::optional<int64_t> blockDimX, std::optional<int64_t> blockDimY,

     std::optional<int64_t> blockDimZ) {

   DiagnosedSilenceableFailure diag =

       checkGpuLimits(transformOp, gridDimX, gridDimY, gridDimZ, blockDimX,

                      blockDimY, blockDimZ);

   if (!diag.succeeded())

     return diag;


   auto createConst = [&](int dim) {

     return rewriter.create<arith::ConstantIndexOp>(loc, dim);

   };

   OpBuilder::InsertionGuard guard(rewriter);

   Value one = createConst(1);

   Value gridSizeX = gridDimX.has_value() ? createConst(gridDimX.value()) : one;

   Value gridSizeY = gridDimY.has_value() ? createConst(gridDimY.value()) : one;

   Value gridSizeZ = gridDimZ.has_value() ? createConst(gridDimZ.value()) : one;

   Value blkSizeX = blockDimX.has_value() ? createConst(blockDimX.value()) : one;

   Value blkSizeY = blockDimY.has_value() ? createConst(blockDimY.value()) : one;

   Value blkSizeZ = blockDimZ.has_value() ? createConst(blockDimZ.value()) : one;

   launchOp = rewriter.create<LaunchOp>(loc, gridSizeX, gridSizeY, gridSizeZ,

                                        blkSizeX, blkSizeY, blkSizeZ);

   rewriter.setInsertionPointToEnd(&launchOp.getBody().front());

   rewriter.create<TerminatorOp>(loc);

   return DiagnosedSilenceableFailure::success();

 }


 /// Alter kernel configuration of the given kernel.

 DiagnosedSilenceableFailure alterGpuLaunch(

     RewriterBase &rewriter, LaunchOp gpuLaunch,

     TransformOpInterface transformOp, std::optional<int64_t> gridDimX,

     std::optional<int64_t> gridDimY, std::optional<int64_t> gridDimZ,

     std::optional<int64_t> blockDimX, std::optional<int64_t> blockDimY,

     std::optional<int64_t> blockDimZ) {

   DiagnosedSilenceableFailure diag =

       checkGpuLimits(transformOp, gridDimX, gridDimY, gridDimZ, blockDimX,

                      blockDimY, blockDimZ);

   if (!diag.succeeded())

     return diag;


   KernelDim3 currentBlockdim = gpuLaunch.getBlockSizeOperandValues();

   OpBuilder::InsertionGuard guard(rewriter);

   rewriter.setInsertionPointAfterValue(currentBlockdim.x);

   auto createConstValue = [&](int dim) {

     return rewriter.create<arith::ConstantIndexOp>(currentBlockdim.x.getLoc(),

                                                    dim);

   };


   if (gridDimX.has_value())

     gpuLaunch.getGridSizeXMutable().assign(createConstValue(gridDimX.value()));

   if (gridDimY.has_value())

     gpuLaunch.getGridSizeYMutable().assign(createConstValue(gridDimY.value()));

   if (gridDimZ.has_value())

     gpuLaunch.getGridSizeZMutable().assign(createConstValue(gridDimZ.value()));

   if (blockDimX.has_value())

     gpuLaunch.getBlockSizeXMutable().assign(

         createConstValue(blockDimX.value()));

   if (blockDimY.has_value())

     gpuLaunch.getBlockSizeYMutable().assign(

         createConstValue(blockDimY.value()));

   if (blockDimZ.has_value())

     gpuLaunch.getBlockSizeZMutable().assign(

         createConstValue(blockDimZ.value()));

   return DiagnosedSilenceableFailure::success();

 }


 } // namespace gpu

 } // namespace transform

 } // namespace mlir

AffineOps.h

createConst
static Value createConst(Location loc, Type type, int value, PatternRewriter &rewriter)
Create an integer or index constant.
Definition: ExpandOps.cpp:30

Builders.h

DeviceMappingInterface.h

commonLinearIdBuilderFn
static GpuIdBuilderFnType commonLinearIdBuilderFn(int64_t multiplicity=1)
Create a linear id builder that takes the originalBasisOfr and decompose it in the basis of forallMap...
Definition: Utils.cpp:78

buildLinearId
static Value buildLinearId(RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > originalBasisOfr)
Return a flattened thread id for the workgroup with given sizes.
Definition: Utils.cpp:52

common3DIdBuilderFn
static GpuIdBuilderFnType common3DIdBuilderFn(int64_t multiplicity=1)
Create a simple 3-D id builder that takes the originalBasisOfr The 3-D id builder returns a 3-D vecto...
Definition: Utils.cpp:129

DBGS
#define DBGS()
Definition: Utils.cpp:46

Utils.h

FuncOps.h

GPUDialect.h

GPUTransformOps.h

IRMapping.h

IndexingUtils.h

MLIRContext.h

diag
static std::string diag(const llvm::Value &value)
Definition: ModuleImport.cpp:55

NVGPUDialect.h

kMaxGriddimz
constexpr int kMaxGriddimz
Definition: NVGPUDialect.h:37

kMaxTotalBlockdim
constexpr int kMaxTotalBlockdim
Definition: NVGPUDialect.h:30

kMaxGriddimy
constexpr int kMaxGriddimy
Definition: NVGPUDialect.h:36

kMaxBlockdimx
constexpr int kMaxBlockdimx
Definition: NVGPUDialect.h:31

kMaxBlockdimz
constexpr int kMaxBlockdimz
Definition: NVGPUDialect.h:33

kMaxGriddimx
constexpr int kMaxGriddimx
Definition: NVGPUDialect.h:35

kMaxBlockdimy
constexpr int kMaxBlockdimy
Definition: NVGPUDialect.h:32

kMaxTotalGriddim
constexpr int kMaxTotalGriddim
Definition: NVGPUDialect.h:34

OpDefinition.h

TransformDialect.h

TransformInterfaces.h

Value.h

VectorOps.h

Visitors.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVector
Definition: LLVM.h:72

mlir::AffineExpr
Base type for affine expression.
Definition: AffineExpr.h:68

mlir::AffineExpr::floorDiv
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:921

mlir::Builder::getContext
MLIRContext * getContext() const
Definition: Builders.h:55

mlir::Builder::getIndexType
IndexType getIndexType()
Definition: Builders.cpp:53

mlir::DiagnosedSilenceableFailure
The result of a transform IR operation application.
Definition: DiagnosedSilenceableFailure.h:38

mlir::DiagnosedSilenceableFailure::success
static DiagnosedSilenceableFailure success()
Constructs a DiagnosedSilenceableFailure in the success state.
Definition: DiagnosedSilenceableFailure.h:48

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60

mlir::OpBuilder::InsertionGuard
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346

mlir::OpBuilder::setInsertionPointToEnd
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Definition: Builders.h:434

mlir::OpBuilder::setInsertionPointAfterValue
void setInsertionPointAfterValue(Value val)
Sets the insertion point to the node after the specified value.
Definition: Builders.h:419

mlir::OpBuilder::create
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:455

mlir::OpFoldResult
This class represents a single result from folding an operation.
Definition: OpDefinition.h:271

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getLoc
Location getLoc() const
Return the location of this value.
Definition: Value.cpp:26

mlir::arith::ConstantIndexOp
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:93

Arith.h

MemRef.h

SCF.h

AffineExpr.h

BuiltinAttributes.h

LLVM.h

mlir::affine::makeComposedAffineApply
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1280

mlir::affine::makeComposedFoldedAffineApply
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
Definition: AffineOps.cpp:1333

mlir::gpu
Definition: GPUCommonPass.h:35

mlir::transform::gpu
Definition: GPUTransformOps.h:33

mlir::transform::gpu::alterGpuLaunch
DiagnosedSilenceableFailure alterGpuLaunch(RewriterBase &rewriter, mlir::gpu::LaunchOp gpuLaunch, TransformOpInterface transformOp, std::optional< int64_t > gridDimX=std::nullopt, std::optional< int64_t > gridDimY=std::nullopt, std::optional< int64_t > gridDimZ=std::nullopt, std::optional< int64_t > blockDimX=std::nullopt, std::optional< int64_t > blockDimY=std::nullopt, std::optional< int64_t > blockDimZ=std::nullopt)
Alter kernel configuration of the given kernel.

mlir::transform::gpu::createGpuLaunch
DiagnosedSilenceableFailure createGpuLaunch(RewriterBase &rewriter, Location loc, TransformOpInterface transformOp, mlir::gpu::LaunchOp &launchOp, std::optional< int64_t > gridDimX=std::nullopt, std::optional< int64_t > gridDimY=std::nullopt, std::optional< int64_t > gridDimZ=std::nullopt, std::optional< int64_t > blockDimX=std::nullopt, std::optional< int64_t > blockDimY=std::nullopt, std::optional< int64_t > blockDimZ=std::nullopt)
Create an empty-body gpu::LaunchOp using the provided kernel settings and put a terminator within.

mlir::transform::gpu::checkGpuLimits
DiagnosedSilenceableFailure checkGpuLimits(TransformOpInterface transformOp, std::optional< int64_t > gridDimX, std::optional< int64_t > gridDimY, std::optional< int64_t > gridDimZ, std::optional< int64_t > blockDimX, std::optional< int64_t > blockDimY, std::optional< int64_t > blockDimZ)
Determine if the size of the kernel configuration is supported by the GPU architecture being used.
Definition: Utils.cpp:224

mlir::transform::gpu::GpuIdBuilderFnType
std::function< IdBuilderResult(RewriterBase &, Location, ArrayRef< int64_t >, ArrayRef< int64_t >)> GpuIdBuilderFnType
Common gpu id builder type, allows the configuration of lowering for various mapping schemes.
Definition: Utils.h:59

mlir::transform
Definition: DLTITransformOps.h:18

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::getAsIndexOpFoldResult
OpFoldResult getAsIndexOpFoldResult(MLIRContext *ctx, int64_t val)
Convert int64_t to integer attributes of index type and return them as OpFoldResult.
Definition: StaticValueUtils.cpp:104

mlir::bindDims
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:311

mlir::computeStrides
SmallVector< int64_t > computeStrides(ArrayRef< int64_t > sizes)
Definition: IndexingUtils.h:47

mlir::delinearize
SmallVector< int64_t > delinearize(int64_t linearIndex, ArrayRef< int64_t > strides)
Given the strides together with a linear index in the dimension space, return the vector-space offset...
Definition: IndexingUtils.cpp:108

mlir::computeProduct
int64_t computeProduct(ArrayRef< int64_t > basis)
Self-explicit.
Definition: IndexingUtils.cpp:92

mlir::bindSymbols
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325

mlir::getValueOrCreateConstantIndexOp
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:112

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition: BytecodeImplementation.h:509

mlir::getAffineDimExpr
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.
Definition: AffineExpr.cpp:621

mlir::gpu::KernelDim3
Utility class for the GPU dialect to represent triples of Values accessible through ....
Definition: GPUDialect.h:39

mlir::gpu::KernelDim3::x
Value x
Definition: GPUDialect.h:40

mlir::transform::gpu::GpuBlockIdBuilder::GpuBlockIdBuilder
GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping=false)
Definition: Utils.cpp:179

mlir::transform::gpu::GpuIdBuilder
Helper struct for configuring the rewrite of mapped scf.forall ops to various gpu id configurations.
Definition: Utils.h:63

mlir::transform::gpu::GpuIdBuilder::mappingAttributes
SmallVector< DeviceMappingAttrInterface > mappingAttributes
The mapping attributes targeted by this generator.
Definition: Utils.h:72

mlir::transform::gpu::GpuIdBuilder::idBuilder
GpuIdBuilderFnType idBuilder
The constructor that builds the concrete IR for mapping ids.
Definition: Utils.h:75

mlir::transform::gpu::GpuIdBuilder::MappingIdBuilderFnType
std::function< DeviceMappingAttrInterface(MLIRContext *, mlir::gpu::MappingId)> MappingIdBuilderFnType
Definition: Utils.h:65

mlir::transform::gpu::GpuThreadIdBuilder::GpuThreadIdBuilder
GpuThreadIdBuilder(MLIRContext *ctx, bool useLinearMapping=false)
Definition: Utils.cpp:215

mlir::transform::gpu::GpuWarpIdBuilder::GpuWarpIdBuilder
GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, bool useLinearMapping=false)
Definition: Utils.cpp:202

mlir::transform::gpu::GpuWarpIdBuilder::warpSize
int64_t warpSize
Definition: Utils.h:108

mlir::transform::gpu::GpuWarpgroupIdBuilder::kNumWarpsPerGroup
static constexpr int64_t kNumWarpsPerGroup
In the future this may be configured by the transformation.
Definition: Utils.h:97

mlir::transform::gpu::GpuWarpgroupIdBuilder::GpuWarpgroupIdBuilder
GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, bool useLinearMapping=false)
Definition: Utils.cpp:188

mlir::transform::gpu::GpuWarpgroupIdBuilder::warpSize
int64_t warpSize
Definition: Utils.h:95

mlir::transform::gpu::IdBuilderResult
Helper type for functions that generate ids for the mapping of a scf.forall.
Definition: Utils.h:38