doxygen/DistributionUtils%5F8cpp%5Fsource.html

 //===- DistributionUtils.cpp - Distribution tools for GPUOps --------------===//

 //

 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.

 // See https://llvm.org/LICENSE.txt for license information.

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

 //

 //===----------------------------------------------------------------------===//

 //

 // This file implements distribution utility methods.

 //

 //===----------------------------------------------------------------------===//


 #include "mlir/Dialect/GPU/Utils/DistributionUtils.h"

 #include "mlir/Dialect/Affine/IR/AffineOps.h"

 #include "mlir/Dialect/Arith/IR/Arith.h"

 #include "mlir/IR/Value.h"


 #include <numeric>


 using namespace mlir;

 using namespace mlir::gpu;


 WarpExecuteOnLane0Op

 WarpDistributionPattern::moveRegionToNewWarpOpAndReplaceReturns(

     RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp,

     ValueRange newYieldedValues, TypeRange newReturnTypes) const {

   // Create a new op before the existing one, with the extra operands.

   OpBuilder::InsertionGuard g(rewriter);

   rewriter.setInsertionPoint(warpOp);

   auto newWarpOp = WarpExecuteOnLane0Op::create(

       rewriter, warpOp.getLoc(), newReturnTypes, warpOp.getLaneid(),

       warpOp.getWarpSize(), warpOp.getArgs(),

       warpOp.getBody()->getArgumentTypes());


   Region &opBody = warpOp.getBodyRegion();

   Region &newOpBody = newWarpOp.getBodyRegion();

   Block &newOpFirstBlock = newOpBody.front();

   rewriter.inlineRegionBefore(opBody, newOpBody, newOpBody.begin());

   rewriter.eraseBlock(&newOpFirstBlock);

   assert(newWarpOp.getWarpRegion().hasOneBlock() &&

          "expected WarpOp with single block");


   auto yield =

       cast<gpu::YieldOp>(newOpBody.getBlocks().begin()->getTerminator());


   rewriter.modifyOpInPlace(

       yield, [&]() { yield.getValuesMutable().assign(newYieldedValues); });

   return newWarpOp;

 }


 WarpExecuteOnLane0Op

 WarpDistributionPattern::moveRegionToNewWarpOpAndAppendReturns(

     RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp,

     ValueRange newYieldedValues, TypeRange newReturnTypes,

     SmallVector<size_t> &indices) const {

   SmallVector<Type> types(warpOp.getResultTypes().begin(),

                           warpOp.getResultTypes().end());

   auto yield = cast<gpu::YieldOp>(

       warpOp.getBodyRegion().getBlocks().begin()->getTerminator());

   llvm::SmallSetVector<Value, 32> yieldValues(yield.getOperands().begin(),

                                               yield.getOperands().end());

   for (auto [value, type] : llvm::zip_equal(newYieldedValues, newReturnTypes)) {

     if (yieldValues.insert(value)) {

       types.push_back(type);

       indices.push_back(yieldValues.size() - 1);

     } else {

       // If the value already exit the region don't create a new output.

       for (auto [idx, yieldOperand] :

            llvm::enumerate(yieldValues.getArrayRef())) {

         if (yieldOperand == value) {

           indices.push_back(idx);

           break;

         }

       }

     }

   }

   yieldValues.insert_range(newYieldedValues);

   WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns(

       rewriter, warpOp, yieldValues.getArrayRef(), types);

   rewriter.replaceOp(warpOp,

                      newWarpOp.getResults().take_front(warpOp.getNumResults()));

   return newWarpOp;

 }


 OpOperand *WarpDistributionPattern::getWarpResult(

     WarpExecuteOnLane0Op warpOp,

     llvm::function_ref<bool(Operation *)> fn) const {

   auto yield = cast<gpu::YieldOp>(

       warpOp.getBodyRegion().getBlocks().begin()->getTerminator());

   for (OpOperand &yieldOperand : yield->getOpOperands()) {

     Value yieldValues = yieldOperand.get();

     Operation *definedOp = yieldValues.getDefiningOp();

     if (definedOp && fn(definedOp)) {

       if (!warpOp.getResult(yieldOperand.getOperandNumber()).use_empty())

         return &yieldOperand;

     }

   }

   return nullptr;

 }


 bool WarpDistributionPattern::delinearizeLaneId(

     OpBuilder &builder, Location loc, ArrayRef<int64_t> originalShape,

     ArrayRef<int64_t> distributedShape, int64_t warpSize, Value laneId,

     SmallVectorImpl<Value> &delinearizedIds) const {

   // If the original shape and the distributed shape is the same, we don't

   // distribute at all--every thread is handling the whole. For such case, we

   // should not rely on lane IDs later. So just return an empty lane ID vector.

   if (originalShape == distributedShape) {

     delinearizedIds.clear();

     return true;

   }


   SmallVector<int64_t> sizes;

   for (auto [large, small] : llvm::zip_equal(originalShape, distributedShape)) {

     if (large % small != 0)

       return false;

     sizes.push_back(large / small);

   }

   if (std::accumulate(sizes.begin(), sizes.end(), 1,

                       std::multiplies<int64_t>()) != warpSize)

     return false;


   AffineExpr s0, s1;

   bindSymbols(builder.getContext(), s0, s1);


   int64_t usedThreads = 1;


   Value zero = arith::ConstantIndexOp::create(builder, loc, 0);

   delinearizedIds.assign(sizes.size(), zero);


   for (int i = sizes.size() - 1; i >= 0; --i) {

     usedThreads *= sizes[i];

     if (usedThreads == warpSize) {

       // We've used up all available threads. Don't need to perform modulo

       // anymore. And we can stop the calculation for further dimensions.

       delinearizedIds[i] = laneId;

       break;

     }

     delinearizedIds[i] =

         affine::makeComposedAffineApply(builder, loc, s0 % sizes[i], {laneId});

     laneId = affine::makeComposedAffineApply(

         builder, loc, s0.floorDiv(usedThreads), {laneId});

   }

   return true;

 }

AffineOps.h

DistributionUtils.h

Value.h

llvm::ArrayRef
Definition: LLVM.h:48

llvm::SmallVectorImpl
Definition: LLVM.h:74

llvm::SmallVector
Definition: LLVM.h:72

llvm::function_ref
Definition: LLVM.h:90

mlir::AffineExpr
Base type for affine expression.
Definition: AffineExpr.h:68

mlir::AffineExpr::floorDiv
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:959

mlir::Block
Block represents an ordered list of Operations.
Definition: Block.h:33

mlir::Builder::getContext
MLIRContext * getContext() const
Definition: Builders.h:55

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76

mlir::OpBuilder::InsertionGuard
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346

mlir::OpBuilder
This class helps build Operations.
Definition: Builders.h:205

mlir::OpBuilder::setInsertionPoint
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396

mlir::OpOperand
This class represents an operand of an operation.
Definition: Value.h:257

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88

mlir::Region
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26

mlir::Region::begin
iterator begin()
Definition: Region.h:55

mlir::Region::getBlocks
BlockListType & getBlocks()
Definition: Region.h:45

mlir::Region::front
Block & front()
Definition: Region.h:65

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358

mlir::RewriterBase::eraseBlock
virtual void eraseBlock(Block *block)
This method erases all operations in a block.
Definition: PatternMatch.cpp:232

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition: PatternMatch.cpp:127

mlir::RewriterBase::modifyOpInPlace
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:628

mlir::RewriterBase::inlineRegionBefore
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
Definition: PatternMatch.cpp:376

mlir::TypeRange
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96

mlir::Value::getDefiningOp
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18

mlir::arith::ConstantIndexOp::create
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition: ArithOps.cpp:359

Arith.h

mlir::affine::makeComposedAffineApply
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1274

mlir::detail::enumerate
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344

mlir::gpu
Definition: GPUCommonPass.h:35

mlir
Include the generated interface declarations.
Definition: LocalAliasAnalysis.h:20

mlir::bindSymbols
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325

mlir::gpu::WarpDistributionPattern::moveRegionToNewWarpOpAndAppendReturns
WarpExecuteOnLane0Op moveRegionToNewWarpOpAndAppendReturns(RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp, ValueRange newYieldedValues, TypeRange newReturnTypes, SmallVector< size_t > &indices) const
Helper to create a new WarpExecuteOnLane0Op region with extra outputs.
Definition: DistributionUtils.cpp:52

mlir::gpu::WarpDistributionPattern::delinearizeLaneId
bool delinearizeLaneId(OpBuilder &builder, Location loc, ArrayRef< int64_t > originalShape, ArrayRef< int64_t > distributedShape, int64_t warpSize, Value laneId, SmallVectorImpl< Value > &delinearizedIds) const
Delinearize the given laneId into multiple dimensions, where each dimension's size is determined by o...
Definition: DistributionUtils.cpp:101

mlir::gpu::WarpDistributionPattern::moveRegionToNewWarpOpAndReplaceReturns
WarpExecuteOnLane0Op moveRegionToNewWarpOpAndReplaceReturns(RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp, ValueRange newYieldedValues, TypeRange newReturnTypes) const
Helper to create a new WarpExecuteOnLane0Op with different signature.
Definition: DistributionUtils.cpp:24

mlir::gpu::WarpDistributionPattern::getWarpResult
OpOperand * getWarpResult(WarpExecuteOnLane0Op warpOp, llvm::function_ref< bool(Operation *)> fn) const
Return a value yielded by warpOp which statifies the filter lamdba condition and is not dead.
Definition: DistributionUtils.cpp:85