MLIR  22.0.0git
DistributionUtils.cpp
Go to the documentation of this file.
1 //===- DistributionUtils.cpp - Distribution tools for GPUOps --------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements distribution utility methods.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "mlir/IR/Value.h"
17 
18 #include <numeric>
19 
20 using namespace mlir;
21 using namespace mlir::gpu;
22 
23 WarpExecuteOnLane0Op
25  RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp,
26  ValueRange newYieldedValues, TypeRange newReturnTypes) const {
27  // Create a new op before the existing one, with the extra operands.
28  OpBuilder::InsertionGuard g(rewriter);
29  rewriter.setInsertionPoint(warpOp);
30  auto newWarpOp = WarpExecuteOnLane0Op::create(
31  rewriter, warpOp.getLoc(), newReturnTypes, warpOp.getLaneid(),
32  warpOp.getWarpSize(), warpOp.getArgs(),
33  warpOp.getBody()->getArgumentTypes());
34 
35  Region &opBody = warpOp.getBodyRegion();
36  Region &newOpBody = newWarpOp.getBodyRegion();
37  Block &newOpFirstBlock = newOpBody.front();
38  rewriter.inlineRegionBefore(opBody, newOpBody, newOpBody.begin());
39  rewriter.eraseBlock(&newOpFirstBlock);
40  assert(newWarpOp.getWarpRegion().hasOneBlock() &&
41  "expected WarpOp with single block");
42 
43  auto yield =
44  cast<gpu::YieldOp>(newOpBody.getBlocks().begin()->getTerminator());
45 
46  rewriter.modifyOpInPlace(
47  yield, [&]() { yield.getValuesMutable().assign(newYieldedValues); });
48  return newWarpOp;
49 }
50 
51 WarpExecuteOnLane0Op
53  RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp,
54  ValueRange newYieldedValues, TypeRange newReturnTypes,
55  SmallVector<size_t> &indices) const {
56  SmallVector<Type> types(warpOp.getResultTypes().begin(),
57  warpOp.getResultTypes().end());
58  auto yield = cast<gpu::YieldOp>(
59  warpOp.getBodyRegion().getBlocks().begin()->getTerminator());
60  llvm::SmallSetVector<Value, 32> yieldValues(yield.getOperands().begin(),
61  yield.getOperands().end());
62  for (auto [value, type] : llvm::zip_equal(newYieldedValues, newReturnTypes)) {
63  if (yieldValues.insert(value)) {
64  types.push_back(type);
65  indices.push_back(yieldValues.size() - 1);
66  } else {
67  // If the value already exit the region don't create a new output.
68  for (auto [idx, yieldOperand] :
69  llvm::enumerate(yieldValues.getArrayRef())) {
70  if (yieldOperand == value) {
71  indices.push_back(idx);
72  break;
73  }
74  }
75  }
76  }
77  yieldValues.insert_range(newYieldedValues);
78  WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns(
79  rewriter, warpOp, yieldValues.getArrayRef(), types);
80  rewriter.replaceOp(warpOp,
81  newWarpOp.getResults().take_front(warpOp.getNumResults()));
82  return newWarpOp;
83 }
84 
86  WarpExecuteOnLane0Op warpOp,
87  llvm::function_ref<bool(Operation *)> fn) const {
88  auto yield = cast<gpu::YieldOp>(
89  warpOp.getBodyRegion().getBlocks().begin()->getTerminator());
90  for (OpOperand &yieldOperand : yield->getOpOperands()) {
91  Value yieldValues = yieldOperand.get();
92  Operation *definedOp = yieldValues.getDefiningOp();
93  if (definedOp && fn(definedOp)) {
94  if (!warpOp.getResult(yieldOperand.getOperandNumber()).use_empty())
95  return &yieldOperand;
96  }
97  }
98  return nullptr;
99 }
100 
102  OpBuilder &builder, Location loc, ArrayRef<int64_t> originalShape,
103  ArrayRef<int64_t> distributedShape, int64_t warpSize, Value laneId,
104  SmallVectorImpl<Value> &delinearizedIds) const {
105  // If the original shape and the distributed shape is the same, we don't
106  // distribute at all--every thread is handling the whole. For such case, we
107  // should not rely on lane IDs later. So just return an empty lane ID vector.
108  if (originalShape == distributedShape) {
109  delinearizedIds.clear();
110  return true;
111  }
112 
113  SmallVector<int64_t> sizes;
114  for (auto [large, small] : llvm::zip_equal(originalShape, distributedShape)) {
115  if (large % small != 0)
116  return false;
117  sizes.push_back(large / small);
118  }
119  if (std::accumulate(sizes.begin(), sizes.end(), 1,
120  std::multiplies<int64_t>()) != warpSize)
121  return false;
122 
123  AffineExpr s0, s1;
124  bindSymbols(builder.getContext(), s0, s1);
125 
126  int64_t usedThreads = 1;
127 
128  Value zero = arith::ConstantIndexOp::create(builder, loc, 0);
129  delinearizedIds.assign(sizes.size(), zero);
130 
131  for (int i = sizes.size() - 1; i >= 0; --i) {
132  usedThreads *= sizes[i];
133  if (usedThreads == warpSize) {
134  // We've used up all available threads. Don't need to perform modulo
135  // anymore. And we can stop the calculation for further dimensions.
136  delinearizedIds[i] = laneId;
137  break;
138  }
139  delinearizedIds[i] =
140  affine::makeComposedAffineApply(builder, loc, s0 % sizes[i], {laneId});
142  builder, loc, s0.floorDiv(usedThreads), {laneId});
143  }
144  return true;
145 }
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:959
Block represents an ordered list of Operations.
Definition: Block.h:33
MLIRContext * getContext() const
Definition: Builders.h:55
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:346
This class helps build Operations.
Definition: Builders.h:205
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:396
This class represents an operand of an operation.
Definition: Value.h:257
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:358
virtual void eraseBlock(Block *block)
This method erases all operations in a block.
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:628
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:18
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition: ArithOps.cpp:359
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands, bool composeAffineMin=false)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1274
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Include the generated interface declarations.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:325
WarpExecuteOnLane0Op moveRegionToNewWarpOpAndAppendReturns(RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp, ValueRange newYieldedValues, TypeRange newReturnTypes, SmallVector< size_t > &indices) const
Helper to create a new WarpExecuteOnLane0Op region with extra outputs.
bool delinearizeLaneId(OpBuilder &builder, Location loc, ArrayRef< int64_t > originalShape, ArrayRef< int64_t > distributedShape, int64_t warpSize, Value laneId, SmallVectorImpl< Value > &delinearizedIds) const
Delinearize the given laneId into multiple dimensions, where each dimension's size is determined by o...
WarpExecuteOnLane0Op moveRegionToNewWarpOpAndReplaceReturns(RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp, ValueRange newYieldedValues, TypeRange newReturnTypes) const
Helper to create a new WarpExecuteOnLane0Op with different signature.
OpOperand * getWarpResult(WarpExecuteOnLane0Op warpOp, llvm::function_ref< bool(Operation *)> fn) const
Return a value yielded by warpOp which statifies the filter lamdba condition and is not dead.