MLIR  20.0.0git
ParallelLoopCollapsing.cpp
Go to the documentation of this file.
1 //===- ParallelLoopCollapsing.cpp - Pass collapsing parallel loop indices -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
15 #include "llvm/ADT/SmallSet.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Debug.h"
18 
19 namespace mlir {
20 #define GEN_PASS_DEF_TESTSCFPARALLELLOOPCOLLAPSING
21 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
22 } // namespace mlir
23 
24 #define DEBUG_TYPE "parallel-loop-collapsing"
25 
26 using namespace mlir;
27 
28 namespace {
29 struct TestSCFParallelLoopCollapsing
30  : public impl::TestSCFParallelLoopCollapsingBase<
31  TestSCFParallelLoopCollapsing> {
32 
33  void runOnOperation() override {
34  Operation *module = getOperation();
35 
36  // The common case for GPU dialect will be simplifying the ParallelOp to 3
37  // arguments, so we do that here to simplify things.
39 
40  // Gather the input args into the format required by
41  // `collapseParallelLoops`.
42  if (!clCollapsedIndices0.empty())
43  combinedLoops.push_back(clCollapsedIndices0);
44  if (!clCollapsedIndices1.empty()) {
45  if (clCollapsedIndices0.empty()) {
46  llvm::errs()
47  << "collapsed-indices-1 specified but not collapsed-indices-0";
48  signalPassFailure();
49  return;
50  }
51  combinedLoops.push_back(clCollapsedIndices1);
52  }
53  if (!clCollapsedIndices2.empty()) {
54  if (clCollapsedIndices1.empty()) {
55  llvm::errs()
56  << "collapsed-indices-2 specified but not collapsed-indices-1";
57  signalPassFailure();
58  return;
59  }
60  combinedLoops.push_back(clCollapsedIndices2);
61  }
62 
63  if (combinedLoops.empty()) {
64  llvm::errs() << "No collapsed-indices were specified. This pass is only "
65  "for testing and does not automatically collapse all "
66  "parallel loops or similar.";
67  signalPassFailure();
68  return;
69  }
70 
71  // Confirm that the specified loops are [0,N) by testing that N values exist
72  // with the maximum value being N-1.
73  llvm::SmallSet<unsigned, 8> flattenedCombinedLoops;
74  unsigned maxCollapsedIndex = 0;
75  for (auto &loops : combinedLoops) {
76  for (auto &loop : loops) {
77  flattenedCombinedLoops.insert(loop);
78  maxCollapsedIndex = std::max(maxCollapsedIndex, loop);
79  }
80  }
81 
82  if (maxCollapsedIndex != flattenedCombinedLoops.size() - 1 ||
83  !flattenedCombinedLoops.contains(maxCollapsedIndex)) {
84  llvm::errs()
85  << "collapsed-indices arguments must include all values [0,N).";
86  signalPassFailure();
87  return;
88  }
89 
90  // Only apply the transformation on parallel loops where the specified
91  // transformation is valid, but do NOT early abort in the case of invalid
92  // loops.
93  IRRewriter rewriter(&getContext());
94  module->walk([&](scf::ParallelOp op) {
95  if (flattenedCombinedLoops.size() != op.getNumLoops()) {
96  op.emitOpError("has ")
97  << op.getNumLoops()
98  << " iter args while this limited functionality testing pass was "
99  "configured only for loops with exactly "
100  << flattenedCombinedLoops.size() << " iter args.";
101  return;
102  }
103  collapseParallelLoops(rewriter, op, combinedLoops);
104  });
105  }
106 };
107 } // namespace
108 
110  return std::make_unique<TestSCFParallelLoopCollapsing>();
111 }
static MLIRContext * getContext(OpFoldResult val)
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition: PatternMatch.h:772
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:793
Include the generated interface declarations.
void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef< std::vector< unsigned >> combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
Definition: Utils.cpp:1061
std::unique_ptr< Pass > createTestSCFParallelLoopCollapsingPass()
Creates a pass that transforms a single ParallelLoop over N induction variables into another Parallel...