doxygen/SubgroupIdRewriter_8cpp_source.html

 //===- SubgroupIdRewriter.cpp - Implementation of SubgroupId rewriting ----===//

 //

 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements in-dialect rewriting of the gpu.subgroup_id op for archs

 // where:

 // subgroup_id = (tid.x + dim.x * (tid.y + dim.y * tid.z)) / subgroup_size

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/GPU/IR/GPUDialect.h"

 #include "mlir/Dialect/GPU/Transforms/Passes.h"

 #include "mlir/Dialect/Index/IR/IndexOps.h"

 #include "mlir/IR/Builders.h"

 #include "mlir/IR/PatternMatch.h"


 using namespace mlir;


 namespace {

 struct GpuSubgroupIdRewriter final : OpRewritePattern<gpu::SubgroupIdOp> {

   using OpRewritePattern<gpu::SubgroupIdOp>::OpRewritePattern;


   LogicalResult matchAndRewrite(gpu::SubgroupIdOp op,

                                 PatternRewriter &rewriter) const override {

     // Calculation of the thread's subgroup identifier.

     //

     // The process involves mapping the thread's 3D identifier within its

     // block (b_id.x, b_id.y, b_id.z) to a 1D linear index.

     // This linearization assumes a layout where the x-dimension (w_dim.x)

     // varies most rapidly (i.e., it is the innermost dimension).

     //

     // The formula for the linearized thread index is:

     // L = tid.x + dim.x * (tid.y + (dim.y * tid.z))

     //

     // Subsequently, the range of linearized indices [0, N_threads-1] is

     // divided into consecutive, non-overlapping segments, each representing

     // a subgroup of size 'subgroup_size'.

     //

     // Example Partitioning (N = subgroup_size):

     // | Subgroup 0      | Subgroup 1      | Subgroup 2      | ... |

     // | Indices 0..N-1  | Indices N..2N-1 | Indices 2N..3N-1| ... |

     //

     // The subgroup identifier is obtained via integer division of the

     // linearized thread index by the predefined 'subgroup_size'.

     //

     // subgroup_id = floor( L / subgroup_size )

     //             = (tid.x + dim.x * (tid.y + dim.y * tid.z)) /

     //             subgroup_size


     Location loc = op->getLoc();

     Type indexType = rewriter.getIndexType();


     Value dimX = gpu::BlockDimOp::create(rewriter, loc, gpu::Dimension::x);

     Value dimY = gpu::BlockDimOp::create(rewriter, loc, gpu::Dimension::y);

     Value tidX = gpu::ThreadIdOp::create(rewriter, loc, gpu::Dimension::x);

     Value tidY = gpu::ThreadIdOp::create(rewriter, loc, gpu::Dimension::y);

     Value tidZ = gpu::ThreadIdOp::create(rewriter, loc, gpu::Dimension::z);


     Value dimYxIdZ =

         arith::MulIOp::create(rewriter, loc, indexType, dimY, tidZ);

     Value dimYxIdZPlusIdY =

         arith::AddIOp::create(rewriter, loc, indexType, dimYxIdZ, tidY);

     Value dimYxIdZPlusIdYTimesDimX =

         arith::MulIOp::create(rewriter, loc, indexType, dimX, dimYxIdZPlusIdY);

     Value IdXPlusDimYxIdZPlusIdYTimesDimX = arith::AddIOp::create(

         rewriter, loc, indexType, tidX, dimYxIdZPlusIdYTimesDimX);

     Value subgroupSize = gpu::SubgroupSizeOp::create(

         rewriter, loc, rewriter.getIndexType(), /*upper_bound = */ nullptr);

     Value subgroupIdOp =

         arith::DivUIOp::create(rewriter, loc, indexType,

                                IdXPlusDimYxIdZPlusIdYTimesDimX, subgroupSize);

     rewriter.replaceOp(op, {subgroupIdOp});

     return success();

   }

 };


 } // namespace


 void mlir::populateGpuSubgroupIdPatterns(RewritePatternSet &patterns) {

   patterns.add<GpuSubgroupIdRewriter>(patterns.getContext());

 }

Builders.h

Passes.h

GPUDialect.h

IndexOps.h

PatternMatch.h

mlir::Builder::getIndexType
IndexType getIndexType()
Definition: Builders.cpp:50

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:769

mlir::RewritePatternSet
Definition: PatternMatch.h:792

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition: PatternMatch.cpp:127

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::xegpu::targetinfo::subgroupSize
constexpr unsigned subgroupSize
Definition: XeGPUTargetInfo.h:17

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition: GreedyPatternRewriteDriver.h:283

mlir::populateGpuSubgroupIdPatterns
void populateGpuSubgroupIdPatterns(RewritePatternSet &patterns)
Collect a set of patterns to rewrite SubgroupIdOp op within the GPU dialect.
Definition: SubgroupIdRewriter.cpp:83

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314