MLIR  20.0.0git
DistributionUtils.cpp
Go to the documentation of this file.
1 //===- DistributionUtils.cpp - Distribution tools for GPUOps --------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements distribution utility methods.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "mlir/IR/Value.h"
17 
18 #include <numeric>
19 
20 using namespace mlir;
21 using namespace mlir::gpu;
22 
23 WarpExecuteOnLane0Op
25  RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp,
26  ValueRange newYieldedValues, TypeRange newReturnTypes) const {
27  // Create a new op before the existing one, with the extra operands.
28  OpBuilder::InsertionGuard g(rewriter);
29  rewriter.setInsertionPoint(warpOp);
30  auto newWarpOp = rewriter.create<WarpExecuteOnLane0Op>(
31  warpOp.getLoc(), newReturnTypes, warpOp.getLaneid(), warpOp.getWarpSize(),
32  warpOp.getArgs(), warpOp.getBody()->getArgumentTypes());
33 
34  Region &opBody = warpOp.getBodyRegion();
35  Region &newOpBody = newWarpOp.getBodyRegion();
36  Block &newOpFirstBlock = newOpBody.front();
37  rewriter.inlineRegionBefore(opBody, newOpBody, newOpBody.begin());
38  rewriter.eraseBlock(&newOpFirstBlock);
39  assert(newWarpOp.getWarpRegion().hasOneBlock() &&
40  "expected WarpOp with single block");
41 
42  auto yield =
43  cast<gpu::YieldOp>(newOpBody.getBlocks().begin()->getTerminator());
44 
45  rewriter.modifyOpInPlace(
46  yield, [&]() { yield.getValuesMutable().assign(newYieldedValues); });
47  return newWarpOp;
48 }
49 
50 WarpExecuteOnLane0Op
52  RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp,
53  ValueRange newYieldedValues, TypeRange newReturnTypes,
54  SmallVector<size_t> &indices) const {
55  SmallVector<Type> types(warpOp.getResultTypes().begin(),
56  warpOp.getResultTypes().end());
57  auto yield = cast<gpu::YieldOp>(
58  warpOp.getBodyRegion().getBlocks().begin()->getTerminator());
59  llvm::SmallSetVector<Value, 32> yieldValues(yield.getOperands().begin(),
60  yield.getOperands().end());
61  for (auto [value, type] : llvm::zip_equal(newYieldedValues, newReturnTypes)) {
62  if (yieldValues.insert(value)) {
63  types.push_back(type);
64  indices.push_back(yieldValues.size() - 1);
65  } else {
66  // If the value already exit the region don't create a new output.
67  for (auto [idx, yieldOperand] :
68  llvm::enumerate(yieldValues.getArrayRef())) {
69  if (yieldOperand == value) {
70  indices.push_back(idx);
71  break;
72  }
73  }
74  }
75  }
76  yieldValues.insert(newYieldedValues.begin(), newYieldedValues.end());
77  WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndReplaceReturns(
78  rewriter, warpOp, yieldValues.getArrayRef(), types);
79  rewriter.replaceOp(warpOp,
80  newWarpOp.getResults().take_front(warpOp.getNumResults()));
81  return newWarpOp;
82 }
83 
85  WarpExecuteOnLane0Op warpOp,
86  llvm::function_ref<bool(Operation *)> fn) const {
87  auto yield = cast<gpu::YieldOp>(
88  warpOp.getBodyRegion().getBlocks().begin()->getTerminator());
89  for (OpOperand &yieldOperand : yield->getOpOperands()) {
90  Value yieldValues = yieldOperand.get();
91  Operation *definedOp = yieldValues.getDefiningOp();
92  if (definedOp && fn(definedOp)) {
93  if (!warpOp.getResult(yieldOperand.getOperandNumber()).use_empty())
94  return &yieldOperand;
95  }
96  }
97  return nullptr;
98 }
99 
101  OpBuilder &builder, Location loc, ArrayRef<int64_t> originalShape,
102  ArrayRef<int64_t> distributedShape, int64_t warpSize, Value laneId,
103  SmallVectorImpl<Value> &delinearizedIds) const {
104  // If the original shape and the distributed shape is the same, we don't
105  // distribute at all--every thread is handling the whole. For such case, we
106  // should not rely on lane IDs later. So just return an empty lane ID vector.
107  if (originalShape == distributedShape) {
108  delinearizedIds.clear();
109  return true;
110  }
111 
112  SmallVector<int64_t> sizes;
113  for (auto [large, small] : llvm::zip_equal(originalShape, distributedShape)) {
114  if (large % small != 0)
115  return false;
116  sizes.push_back(large / small);
117  }
118  if (std::accumulate(sizes.begin(), sizes.end(), 1,
119  std::multiplies<int64_t>()) != warpSize)
120  return false;
121 
122  AffineExpr s0, s1;
123  bindSymbols(builder.getContext(), s0, s1);
124 
125  int64_t usedThreads = 1;
126 
127  Value zero = builder.create<arith::ConstantIndexOp>(loc, 0);
128  delinearizedIds.assign(sizes.size(), zero);
129 
130  for (int i = sizes.size() - 1; i >= 0; --i) {
131  usedThreads *= sizes[i];
132  if (usedThreads == warpSize) {
133  // We've used up all available threads. Don't need to perform modulo
134  // anymore. And we can stop the calculation for further dimensions.
135  delinearizedIds[i] = laneId;
136  break;
137  }
138  delinearizedIds[i] =
139  affine::makeComposedAffineApply(builder, loc, s0 % sizes[i], {laneId});
141  builder, loc, s0.floorDiv(usedThreads), {laneId});
142  }
143  return true;
144 }
Base type for affine expression.
Definition: AffineExpr.h:68
AffineExpr floorDiv(uint64_t v) const
Definition: AffineExpr.cpp:917
Block represents an ordered list of Operations.
Definition: Block.h:33
MLIRContext * getContext() const
Definition: Builders.h:56
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:66
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:357
This class helps build Operations.
Definition: Builders.h:216
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:407
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:497
This class represents an operand of an operation.
Definition: Value.h:267
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition: Region.h:26
iterator begin()
Definition: Region.h:55
BlockListType & getBlocks()
Definition: Region.h:45
Block & front()
Definition: Region.h:65
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition: PatternMatch.h:400
virtual void eraseBlock(Block *block)
This method erases all operations in a block.
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition: PatternMatch.h:636
void inlineRegionBefore(Region &region, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:381
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
Definition: Value.cpp:20
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:93
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1144
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Definition: Matchers.h:344
Include the generated interface declarations.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Definition: AffineExpr.h:362
WarpExecuteOnLane0Op moveRegionToNewWarpOpAndAppendReturns(RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp, ValueRange newYieldedValues, TypeRange newReturnTypes, SmallVector< size_t > &indices) const
Helper to create a new WarpExecuteOnLane0Op region with extra outputs.
bool delinearizeLaneId(OpBuilder &builder, Location loc, ArrayRef< int64_t > originalShape, ArrayRef< int64_t > distributedShape, int64_t warpSize, Value laneId, SmallVectorImpl< Value > &delinearizedIds) const
Delinearize the given laneId into multiple dimensions, where each dimension's size is determined by o...
WarpExecuteOnLane0Op moveRegionToNewWarpOpAndReplaceReturns(RewriterBase &rewriter, WarpExecuteOnLane0Op warpOp, ValueRange newYieldedValues, TypeRange newReturnTypes) const
Helper to create a new WarpExecuteOnLane0Op with different signature.
OpOperand * getWarpResult(WarpExecuteOnLane0Op warpOp, llvm::function_ref< bool(Operation *)> fn) const
Return a value yielded by warpOp which statifies the filter lamdba condition and is not dead.