MLIR  16.0.0git
VectorToSCF.cpp
Go to the documentation of this file.
1 //===- VectorToSCF.cpp - Convert vector to SCF dialect ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements lowering of vector transfer operations to SCF.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <type_traits>
14 
16 
22 #include "mlir/IR/Builders.h"
24 #include "mlir/Pass/Pass.h"
26 #include "mlir/Transforms/Passes.h"
27 
28 namespace mlir {
29 #define GEN_PASS_DEF_CONVERTVECTORTOSCF
30 #include "mlir/Conversion/Passes.h.inc"
31 } // namespace mlir
32 
33 using namespace mlir;
34 using vector::TransferReadOp;
35 using vector::TransferWriteOp;
36 
37 namespace {
38 
39 /// Attribute name used for labeling transfer ops during progressive lowering.
40 static const char kPassLabel[] = "__vector_to_scf_lowering__";
41 
42 /// Patterns that inherit from this struct have access to
43 /// VectorTransferToSCFOptions.
44 template <typename OpTy>
45 struct VectorToSCFPattern : public OpRewritePattern<OpTy> {
46  explicit VectorToSCFPattern(MLIRContext *context,
48  : OpRewritePattern<OpTy>(context), options(opt) {}
49 
51 };
52 
53 /// Given a vector transfer op, calculate which dimension of the `source`
54 /// memref should be unpacked in the next application of TransferOpConversion.
55 /// A return value of None indicates a broadcast.
56 template <typename OpTy>
57 static Optional<int64_t> unpackedDim(OpTy xferOp) {
58  // TODO: support 0-d corner case.
59  assert(xferOp.getTransferRank() > 0 && "unexpected 0-d transfer");
60  auto map = xferOp.getPermutationMap();
61  if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
62  return expr.getPosition();
63  }
64  assert(xferOp.isBroadcastDim(0) &&
65  "Expected AffineDimExpr or AffineConstantExpr");
66  return None;
67 }
68 
69 /// Compute the permutation map for the new (N-1)-D vector transfer op. This
70 /// map is identical to the current permutation map, but the first result is
71 /// omitted.
72 template <typename OpTy>
73 static AffineMap unpackedPermutationMap(OpBuilder &b, OpTy xferOp) {
74  // TODO: support 0-d corner case.
75  assert(xferOp.getTransferRank() > 0 && "unexpected 0-d transfer");
76  auto map = xferOp.getPermutationMap();
77  return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(),
78  b.getContext());
79 }
80 
81 /// Calculate the indices for the new vector transfer op.
82 ///
83 /// E.g.: transfer_read %A[%a, %b, %c, %d] ... : vector<5x4x3xf32> ...
84 /// --> transfer_read %A[%a, %b + iv, %c, %d] ... vector<4x3f32>
85 /// ^^^^^^
86 /// `iv` is the iteration variable of the (new) surrounding loop.
87 template <typename OpTy>
88 static void getXferIndices(OpBuilder &b, OpTy xferOp, Value iv,
89  SmallVector<Value, 8> &indices) {
90  typename OpTy::Adaptor adaptor(xferOp);
91  // Corresponding memref dim of the vector dim that is unpacked.
92  auto dim = unpackedDim(xferOp);
93  auto prevIndices = adaptor.getIndices();
94  indices.append(prevIndices.begin(), prevIndices.end());
95 
96  Location loc = xferOp.getLoc();
97  bool isBroadcast = !dim.has_value();
98  if (!isBroadcast) {
99  AffineExpr d0, d1;
100  bindDims(xferOp.getContext(), d0, d1);
101  Value offset = adaptor.getIndices()[dim.value()];
102  indices[dim.value()] =
103  makeComposedAffineApply(b, loc, d0 + d1, {offset, iv});
104  }
105 }
106 
107 static void maybeYieldValue(OpBuilder &b, Location loc, bool hasRetVal,
108  Value value) {
109  if (hasRetVal) {
110  assert(value && "Expected non-empty value");
111  b.create<scf::YieldOp>(loc, value);
112  } else {
113  b.create<scf::YieldOp>(loc);
114  }
115 }
116 
117 /// Generates a boolean Value that is true if the iv-th bit in xferOp's mask
118 /// is set to true. No such check is generated under following circumstances:
119 /// * xferOp does not have a mask.
120 /// * xferOp's mask is not 1D. (In case of (N>1)-D, a subvector of the mask is
121 /// computed and attached to the new transfer op in the pattern.)
122 /// * The to-be-unpacked dim of xferOp is a broadcast.
123 template <typename OpTy>
124 static Value generateMaskCheck(OpBuilder &b, OpTy xferOp, Value iv) {
125  if (!xferOp.getMask())
126  return Value();
127  if (xferOp.getMaskType().getRank() != 1)
128  return Value();
129  if (xferOp.isBroadcastDim(0))
130  return Value();
131 
132  Location loc = xferOp.getLoc();
133  return b.create<vector::ExtractElementOp>(loc, xferOp.getMask(), iv);
134 }
135 
136 /// Helper function TransferOpConversion and TransferOp1dConversion.
137 /// Generate an in-bounds check if the transfer op may go out-of-bounds on the
138 /// specified dimension `dim` with the loop iteration variable `iv`.
139 /// E.g., when unpacking dimension 0 from:
140 /// ```
141 /// %vec = vector.transfer_read %A[%a, %b] %cst
142 /// : vector<5x4xf32>, memref<?x?xf32>
143 /// ```
144 /// An if check similar to this will be generated inside the loop:
145 /// ```
146 /// %d = memref.dim %A, %c0 : memref<?x?xf32>
147 /// if (%a + iv < %d) {
148 /// (in-bounds case)
149 /// } else {
150 /// (out-of-bounds case)
151 /// }
152 /// ```
153 ///
154 /// If the transfer is 1D and has a mask, this function generates a more complex
155 /// check also accounts for potentially masked out elements.
156 ///
157 /// This function variant returns the value returned by `inBoundsCase` or
158 /// `outOfBoundsCase`. The MLIR type of the return value must be specified in
159 /// `resultTypes`.
160 template <typename OpTy>
161 static Value generateInBoundsCheck(
162  OpBuilder &b, OpTy xferOp, Value iv, Optional<int64_t> dim,
163  TypeRange resultTypes,
164  function_ref<Value(OpBuilder &, Location)> inBoundsCase,
165  function_ref<Value(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
166  bool hasRetVal = !resultTypes.empty();
167  Value cond; // Condition to be built...
168 
169  // Condition check 1: Access in-bounds?
170  bool isBroadcast = !dim; // No in-bounds check for broadcasts.
171  Location loc = xferOp.getLoc();
172  ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
173  if (!xferOp.isDimInBounds(0) && !isBroadcast) {
174  Value memrefDim =
175  vector::createOrFoldDimOp(b, loc, xferOp.getSource(), *dim);
176  AffineExpr d0, d1;
177  bindDims(xferOp.getContext(), d0, d1);
178  Value base = xferOp.getIndices()[*dim];
179  Value memrefIdx = makeComposedAffineApply(b, loc, d0 + d1, {base, iv});
180  cond = lb.create<arith::CmpIOp>(arith::CmpIPredicate::sgt, memrefDim,
181  memrefIdx);
182  }
183 
184  // Condition check 2: Masked in?
185  if (auto maskCond = generateMaskCheck(b, xferOp, iv)) {
186  if (cond)
187  cond = lb.create<arith::AndIOp>(cond, maskCond);
188  else
189  cond = maskCond;
190  }
191 
192  // If the condition is non-empty, generate an SCF::IfOp.
193  if (cond) {
194  auto check = lb.create<scf::IfOp>(
195  resultTypes, cond,
196  /*thenBuilder=*/
197  [&](OpBuilder &b, Location loc) {
198  maybeYieldValue(b, loc, hasRetVal, inBoundsCase(b, loc));
199  },
200  /*elseBuilder=*/
201  [&](OpBuilder &b, Location loc) {
202  if (outOfBoundsCase) {
203  maybeYieldValue(b, loc, hasRetVal, outOfBoundsCase(b, loc));
204  } else {
205  b.create<scf::YieldOp>(loc);
206  }
207  });
208 
209  return hasRetVal ? check.getResult(0) : Value();
210  }
211 
212  // Condition is empty, no need for an SCF::IfOp.
213  return inBoundsCase(b, loc);
214 }
215 
216 /// In this function variant, `inBoundsCase` and `outOfBoundsCase` do not have
217 /// a return value. Consequently, this function does not have a return value.
218 template <typename OpTy>
219 static void generateInBoundsCheck(
220  OpBuilder &b, OpTy xferOp, Value iv, Optional<int64_t> dim,
221  function_ref<void(OpBuilder &, Location)> inBoundsCase,
222  function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
223  generateInBoundsCheck(
224  b, xferOp, iv, dim, /*resultTypes=*/TypeRange(),
225  /*inBoundsCase=*/
226  [&](OpBuilder &b, Location loc) {
227  inBoundsCase(b, loc);
228  return Value();
229  },
230  /*outOfBoundsCase=*/
231  [&](OpBuilder &b, Location loc) {
232  if (outOfBoundsCase)
233  outOfBoundsCase(b, loc);
234  return Value();
235  });
236 }
237 
238 /// Given an ArrayAttr, return a copy where the first element is dropped.
239 static ArrayAttr dropFirstElem(OpBuilder &b, ArrayAttr attr) {
240  if (!attr)
241  return attr;
242  return ArrayAttr::get(b.getContext(), attr.getValue().drop_front());
243 }
244 
245 /// Add the pass label to a vector transfer op if its rank is not the target
246 /// rank.
247 template <typename OpTy>
248 static void maybeApplyPassLabel(OpBuilder &b, OpTy newXferOp,
249  unsigned targetRank) {
250  if (newXferOp.getVectorType().getRank() > targetRank)
251  newXferOp->setAttr(kPassLabel, b.getUnitAttr());
252 }
253 
254 /// Return true if this transfer op operates on a source tensor.
255 template <typename OpTy>
256 static bool isTensorOp(OpTy xferOp) {
257  if (xferOp.getShapedType().template isa<RankedTensorType>()) {
258  if (xferOp.getOperationName().equals(TransferWriteOp::getOperationName())) {
259  // TransferWriteOps on tensors have a result.
260  assert(xferOp->getNumResults() > 0);
261  }
262  return true;
263  }
264  return false;
265 }
266 
267 namespace lowering_n_d {
268 
269 /// Helper data structure for data and mask buffers.
270 struct BufferAllocs {
271  Value dataBuffer;
272  Value maskBuffer;
273 };
274 
275 // TODO: Parallelism and threadlocal considerations with a ParallelScope trait.
277  Operation *scope =
279  assert(scope && "Expected op to be inside automatic allocation scope");
280  return scope;
281 }
282 
283 /// Allocate temporary buffers for data (vector) and mask (if present).
284 template <typename OpTy>
285 static BufferAllocs allocBuffers(OpBuilder &b, OpTy xferOp) {
286  Location loc = xferOp.getLoc();
287  OpBuilder::InsertionGuard guard(b);
288  Operation *scope = getAutomaticAllocationScope(xferOp);
289  assert(scope->getNumRegions() == 1 &&
290  "AutomaticAllocationScope with >1 regions");
291  b.setInsertionPointToStart(&scope->getRegion(0).front());
292 
293  BufferAllocs result;
294  auto bufferType = MemRefType::get({}, xferOp.getVectorType());
295  result.dataBuffer = b.create<memref::AllocaOp>(loc, bufferType);
296 
297  if (xferOp.getMask()) {
298  auto maskType = MemRefType::get({}, xferOp.getMask().getType());
299  auto maskBuffer = b.create<memref::AllocaOp>(loc, maskType);
300  b.setInsertionPoint(xferOp);
301  b.create<memref::StoreOp>(loc, xferOp.getMask(), maskBuffer);
302  result.maskBuffer = b.create<memref::LoadOp>(loc, maskBuffer);
303  }
304 
305  return result;
306 }
307 
308 /// Given a MemRefType with VectorType element type, unpack one dimension from
309 /// the VectorType into the MemRefType.
310 ///
311 /// E.g.: memref<9xvector<5x6xf32>> --> memref<9x5xvector<6xf32>>
312 static MemRefType unpackOneDim(MemRefType type) {
313  auto vectorType = type.getElementType().dyn_cast<VectorType>();
314  auto memrefShape = type.getShape();
315  SmallVector<int64_t, 8> newMemrefShape;
316  newMemrefShape.append(memrefShape.begin(), memrefShape.end());
317  newMemrefShape.push_back(vectorType.getDimSize(0));
318  return MemRefType::get(newMemrefShape,
319  VectorType::get(vectorType.getShape().drop_front(),
320  vectorType.getElementType()));
321 }
322 
323 /// Given a transfer op, find the memref from which the mask is loaded. This
324 /// is similar to Strategy<TransferWriteOp>::getBuffer.
325 template <typename OpTy>
326 static Value getMaskBuffer(OpTy xferOp) {
327  assert(xferOp.getMask() && "Expected that transfer op has mask");
328  auto loadOp = xferOp.getMask().template getDefiningOp<memref::LoadOp>();
329  assert(loadOp && "Expected transfer op mask produced by LoadOp");
330  return loadOp.getMemRef();
331 }
332 
333 /// Codegen strategy, depending on the operation.
334 template <typename OpTy>
335 struct Strategy;
336 
337 /// Code strategy for vector TransferReadOp.
338 template <>
339 struct Strategy<TransferReadOp> {
340  /// Find the StoreOp that is used for writing the current TransferReadOp's
341  /// result to the temporary buffer allocation.
342  static memref::StoreOp getStoreOp(TransferReadOp xferOp) {
343  assert(xferOp->hasOneUse() && "Expected exactly one use of TransferReadOp");
344  auto storeOp = dyn_cast<memref::StoreOp>((*xferOp->use_begin()).getOwner());
345  assert(storeOp && "Expected TransferReadOp result used by StoreOp");
346  return storeOp;
347  }
348 
349  /// Find the temporary buffer allocation. All labeled TransferReadOps are
350  /// used like this, where %buf is either the buffer allocation or a type cast
351  /// of the buffer allocation:
352  /// ```
353  /// %vec = vector.transfer_read ... { __vector_to_scf_lowering__ } ...
354  /// memref.store %vec, %buf[...] ...
355  /// ```
356  static Value getBuffer(TransferReadOp xferOp) {
357  return getStoreOp(xferOp).getMemRef();
358  }
359 
360  /// Retrieve the indices of the current StoreOp that stores into the buffer.
361  static void getBufferIndices(TransferReadOp xferOp,
362  SmallVector<Value, 8> &indices) {
363  auto storeOp = getStoreOp(xferOp);
364  auto prevIndices = memref::StoreOpAdaptor(storeOp).getIndices();
365  indices.append(prevIndices.begin(), prevIndices.end());
366  }
367 
368  /// Rewrite the TransferReadOp, assuming that there are no out-of-bounds
369  /// accesses on the to-be-unpacked dimension.
370  ///
371  /// 1. Generate a new (N-1)-d TransferReadOp using the loop iteration
372  /// variable `iv`.
373  /// 2. Store the result into the (already `vector.type_cast`ed) buffer.
374  ///
375  /// E.g.:
376  /// ```
377  /// %vec = vector.transfer_read %A[%a+%i, %b, %c], %cst
378  /// : memref<?x?x?xf32>, vector<4x3xf32>
379  /// memref.store %vec, %buf[%i] : memref<5xvector<4x3xf32>>
380  /// ```
381  /// Is rewritten to:
382  /// ```
383  /// %casted = vector.type_cast %buf
384  /// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
385  /// for %j = 0 to 4 {
386  /// %vec = vector.transfer_read %A[%a+%i, %b+%j, %c], %cst
387  /// : memref<?x?x?xf32>, vector<3xf32>
388  /// memref.store %vec, %casted[%i, %j] : memref<5x4xvector<3xf32>>
389  /// }
390  /// ```
391  ///
392  /// Note: The loop and type cast are generated in TransferOpConversion.
393  /// The original TransferReadOp and store op are deleted in `cleanup`.
394  /// Note: The `mask` operand is set in TransferOpConversion.
395  static TransferReadOp rewriteOp(OpBuilder &b,
397  TransferReadOp xferOp, Value buffer, Value iv,
398  ValueRange /*loopState*/) {
399  SmallVector<Value, 8> storeIndices;
400  getBufferIndices(xferOp, storeIndices);
401  storeIndices.push_back(iv);
402 
403  SmallVector<Value, 8> xferIndices;
404  getXferIndices(b, xferOp, iv, xferIndices);
405 
406  Location loc = xferOp.getLoc();
407  auto bufferType = buffer.getType().dyn_cast<ShapedType>();
408  auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
409  auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr());
410  auto newXferOp = b.create<vector::TransferReadOp>(
411  loc, vecType, xferOp.getSource(), xferIndices,
412  AffineMapAttr::get(unpackedPermutationMap(b, xferOp)),
413  xferOp.getPadding(), Value(), inBoundsAttr);
414 
415  maybeApplyPassLabel(b, newXferOp, options.targetRank);
416 
417  b.create<memref::StoreOp>(loc, newXferOp.getVector(), buffer, storeIndices);
418  return newXferOp;
419  }
420 
421  /// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
422  /// padding value to the temporary buffer.
423  static Value handleOutOfBoundsDim(OpBuilder &b, TransferReadOp xferOp,
424  Value buffer, Value iv,
425  ValueRange /*loopState*/) {
426  SmallVector<Value, 8> storeIndices;
427  getBufferIndices(xferOp, storeIndices);
428  storeIndices.push_back(iv);
429 
430  Location loc = xferOp.getLoc();
431  auto bufferType = buffer.getType().dyn_cast<ShapedType>();
432  auto vecType = bufferType.getElementType().dyn_cast<VectorType>();
433  auto vec = b.create<vector::SplatOp>(loc, vecType, xferOp.getPadding());
434  b.create<memref::StoreOp>(loc, vec, buffer, storeIndices);
435 
436  return Value();
437  }
438 
439  /// Cleanup after rewriting the op.
440  static void cleanup(PatternRewriter &rewriter, TransferReadOp xferOp,
441  scf::ForOp /*forOp*/) {
442  rewriter.eraseOp(getStoreOp(xferOp));
443  rewriter.eraseOp(xferOp);
444  }
445 
446  /// Return the initial loop state for the generated scf.for loop.
447  static Value initialLoopState(TransferReadOp xferOp) { return Value(); }
448 };
449 
450 /// Codegen strategy for vector TransferWriteOp.
451 template <>
452 struct Strategy<TransferWriteOp> {
453  /// Find the temporary buffer allocation. All labeled TransferWriteOps are
454  /// used like this, where %buf is either the buffer allocation or a type cast
455  /// of the buffer allocation:
456  /// ```
457  /// %vec = memref.load %buf[...] ...
458  /// vector.transfer_write %vec ... { __vector_to_scf_lowering__ } ...
459  /// ```
460  static Value getBuffer(TransferWriteOp xferOp) {
461  auto loadOp = xferOp.getVector().getDefiningOp<memref::LoadOp>();
462  assert(loadOp && "Expected transfer op vector produced by LoadOp");
463  return loadOp.getMemRef();
464  }
465 
466  /// Retrieve the indices of the current LoadOp that loads from the buffer.
467  static void getBufferIndices(TransferWriteOp xferOp,
468  SmallVector<Value, 8> &indices) {
469  auto loadOp = xferOp.getVector().getDefiningOp<memref::LoadOp>();
470  auto prevIndices = memref::LoadOpAdaptor(loadOp).getIndices();
471  indices.append(prevIndices.begin(), prevIndices.end());
472  }
473 
474  /// Rewrite the TransferWriteOp, assuming that there are no out-of-bounds
475  /// accesses on the to-be-unpacked dimension.
476  ///
477  /// 1. Load an (N-1)-d vector from the (already `vector.type_cast`ed) buffer,
478  /// using the loop iteration variable `iv`.
479  /// 2. Generate a new (N-1)-d TransferWriteOp, writing the loaded vector back
480  /// to memory.
481  ///
482  /// Note: For more details, see comments on Strategy<TransferReadOp>.
483  static TransferWriteOp rewriteOp(OpBuilder &b,
485  TransferWriteOp xferOp, Value buffer,
486  Value iv, ValueRange loopState) {
487  SmallVector<Value, 8> loadIndices;
488  getBufferIndices(xferOp, loadIndices);
489  loadIndices.push_back(iv);
490 
491  SmallVector<Value, 8> xferIndices;
492  getXferIndices(b, xferOp, iv, xferIndices);
493 
494  Location loc = xferOp.getLoc();
495  auto vec = b.create<memref::LoadOp>(loc, buffer, loadIndices);
496  auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr());
497  auto source = loopState.empty() ? xferOp.getSource() : loopState[0];
498  Type type = isTensorOp(xferOp) ? xferOp.getShapedType() : Type();
499  auto newXferOp = b.create<vector::TransferWriteOp>(
500  loc, type, vec, source, xferIndices,
501  AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(),
502  inBoundsAttr);
503 
504  maybeApplyPassLabel(b, newXferOp, options.targetRank);
505 
506  return newXferOp;
507  }
508 
509  /// Handle out-of-bounds accesses on the to-be-unpacked dimension.
510  static Value handleOutOfBoundsDim(OpBuilder &b, TransferWriteOp xferOp,
511  Value buffer, Value iv,
512  ValueRange loopState) {
513  return isTensorOp(xferOp) ? loopState[0] : Value();
514  }
515 
516  /// Cleanup after rewriting the op.
517  static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp,
518  scf::ForOp forOp) {
519  if (isTensorOp(xferOp)) {
520  assert(forOp->getNumResults() == 1 && "Expected one for loop result");
521  rewriter.replaceOp(xferOp, forOp->getResult(0));
522  } else {
523  rewriter.eraseOp(xferOp);
524  }
525  }
526 
527  /// Return the initial loop state for the generated scf.for loop.
528  static Value initialLoopState(TransferWriteOp xferOp) {
529  return isTensorOp(xferOp) ? xferOp.getSource() : Value();
530  }
531 };
532 
533 template <typename OpTy>
534 LogicalResult checkPrepareXferOp(OpTy xferOp,
536  if (xferOp->hasAttr(kPassLabel))
537  return failure();
538  if (xferOp.getVectorType().getRank() <= options.targetRank)
539  return failure();
540  if (isTensorOp(xferOp) && !options.lowerTensors)
541  return failure();
542  // Transfer ops that modify the element type are not supported atm.
543  if (xferOp.getVectorType().getElementType() !=
544  xferOp.getShapedType().getElementType())
545  return failure();
546  return success();
547 }
548 
549 /// Prepare a TransferReadOp for progressive lowering.
550 ///
551 /// 1. Allocate a temporary buffer.
552 /// 2. Label the TransferReadOp, marking it eligible for progressive lowering.
553 /// 3. Store the result of the TransferReadOp into the temporary buffer.
554 /// 4. Load the result from the temporary buffer and replace all uses of the
555 /// original TransferReadOp with this load.
556 ///
557 /// E.g.:
558 /// ```
559 /// %vec = vector.transfer_read %A[%a, %b, %c], %cst
560 /// : vector<5x4xf32>, memref<?x?x?xf32>
561 /// ```
562 /// is rewritten to:
563 /// ```
564 /// %0 = memref.alloca() : memref<vector<5x4xf32>>
565 /// %1 = vector.transfer_read %A[%a, %b, %c], %cst
566 /// { __vector_to_scf_lowering__ } : vector<5x4xf32>, memref<?x?x?xf32>
567 /// memref.store %1, %0[] : memref<vector<5x4xf32>>
568 /// %vec = memref.load %0[] : memref<vector<5x4xf32>>
569 /// ```
570 ///
571 /// Note: A second temporary buffer may be allocated for the `mask` operand.
572 struct PrepareTransferReadConversion
573  : public VectorToSCFPattern<TransferReadOp> {
574  using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern;
575 
576  LogicalResult matchAndRewrite(TransferReadOp xferOp,
577  PatternRewriter &rewriter) const override {
578  if (checkPrepareXferOp(xferOp, options).failed())
579  return failure();
580 
581  auto buffers = allocBuffers(rewriter, xferOp);
582  auto *newXfer = rewriter.clone(*xferOp.getOperation());
583  newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
584  if (xferOp.getMask()) {
585  dyn_cast<TransferReadOp>(newXfer).getMaskMutable().assign(
586  buffers.maskBuffer);
587  }
588 
589  Location loc = xferOp.getLoc();
590  rewriter.create<memref::StoreOp>(loc, newXfer->getResult(0),
591  buffers.dataBuffer);
592  rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
593 
594  return success();
595  }
596 };
597 
598 /// Prepare a TransferWriteOp for progressive lowering.
599 ///
600 /// 1. Allocate a temporary buffer.
601 /// 2. Store the vector into the buffer.
602 /// 3. Load the vector from the buffer again.
603 /// 4. Use the loaded vector as a TransferWriteOp operand and label the op,
604 /// marking it eligible for progressive lowering via TransferOpConversion.
605 ///
606 /// E.g.:
607 /// ```
608 /// vector.transfer_write %vec, %A[%a, %b, %c]
609 /// : vector<5x4xf32>, memref<?x?x?xf32>
610 /// ```
611 /// is rewritten to:
612 /// ```
613 /// %0 = memref.alloca() : memref<vector<5x4xf32>>
614 /// memref.store %vec, %0[] : memref<vector<5x4xf32>>
615 /// %1 = memref.load %0[] : memref<vector<5x4xf32>>
616 /// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ }
617 /// : vector<5x4xf32>, memref<?x?x?xf32>
618 /// ```
619 ///
620 /// Note: A second temporary buffer may be allocated for the `mask` operand.
621 struct PrepareTransferWriteConversion
622  : public VectorToSCFPattern<TransferWriteOp> {
623  using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern;
624 
625  LogicalResult matchAndRewrite(TransferWriteOp xferOp,
626  PatternRewriter &rewriter) const override {
627  if (checkPrepareXferOp(xferOp, options).failed())
628  return failure();
629 
630  Location loc = xferOp.getLoc();
631  auto buffers = allocBuffers(rewriter, xferOp);
632  rewriter.create<memref::StoreOp>(loc, xferOp.getVector(),
633  buffers.dataBuffer);
634  auto loadedVec = rewriter.create<memref::LoadOp>(loc, buffers.dataBuffer);
635  rewriter.updateRootInPlace(xferOp, [&]() {
636  xferOp.getVectorMutable().assign(loadedVec);
637  xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
638  });
639 
640  if (xferOp.getMask()) {
641  rewriter.updateRootInPlace(xferOp, [&]() {
642  xferOp.getMaskMutable().assign(buffers.maskBuffer);
643  });
644  }
645 
646  return success();
647  }
648 };
649 
650 /// Progressive lowering of vector transfer ops: Unpack one dimension.
651 ///
652 /// 1. Unpack one dimension from the current buffer type and cast the buffer
653 /// to that new type. E.g.:
654 /// ```
655 /// %vec = memref.load %0[%1] : memref<5xvector<4x3xf32>>
656 /// vector.transfer_write %vec ...
657 /// ```
658 /// The following cast is generated:
659 /// ```
660 /// %casted = vector.type_cast %0
661 /// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
662 /// ```
663 /// 2. Generate a for loop and rewrite the transfer op according to the
664 /// corresponding Strategy<OpTy>. If the to-be-unpacked dimension can be
665 /// out-of-bounds, generate an if-check and handle both cases separately.
666 /// 3. Clean up according to the corresponding Strategy<OpTy>.
667 ///
668 /// Note: If the transfer op is a TransferWriteOp and operates on a tensor
669 /// source (as opposed to a memref source), then each iteration of the generated
670 /// scf.for loop yields the new tensor value. E.g.:
671 /// ```
672 /// %result = scf.for i = 0 to 5 {
673 /// %0 = memref.load %buffer[i] : memref<5xvector<4x3xf32>>
674 /// %1 = vector.transfer_write %0, %source[...]
675 /// : vector<4x3xf32>, tensor<5x4x3xf32>
676 /// scf.yield %1 : tensor<5x4x3xf32>
677 /// }
678 /// ```
679 template <typename OpTy>
680 struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
681  using VectorToSCFPattern<OpTy>::VectorToSCFPattern;
682 
683  void initialize() {
684  // This pattern recursively unpacks one dimension at a time. The recursion
685  // bounded as the rank is strictly decreasing.
686  this->setHasBoundedRewriteRecursion();
687  }
688 
689  LogicalResult matchAndRewrite(OpTy xferOp,
690  PatternRewriter &rewriter) const override {
691  if (!xferOp->hasAttr(kPassLabel))
692  return failure();
693 
694  // Find and cast data buffer. How the buffer can be found depends on OpTy.
695  ImplicitLocOpBuilder locB(xferOp.getLoc(), rewriter);
696  auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
697  auto dataBufferType = dataBuffer.getType().template dyn_cast<MemRefType>();
698  auto castedDataType = unpackOneDim(dataBufferType);
699  auto castedDataBuffer =
700  locB.create<vector::TypeCastOp>(castedDataType, dataBuffer);
701 
702  // If the xferOp has a mask: Find and cast mask buffer.
703  Value castedMaskBuffer;
704  if (xferOp.getMask()) {
705  auto maskBuffer = getMaskBuffer(xferOp);
706  auto maskBufferType =
707  maskBuffer.getType().template dyn_cast<MemRefType>();
708  if (xferOp.isBroadcastDim(0) || xferOp.getMaskType().getRank() == 1) {
709  // Do not unpack a dimension of the mask, if:
710  // * To-be-unpacked transfer op dimension is a broadcast.
711  // * Mask is 1D, i.e., the mask cannot be further unpacked.
712  // (That means that all remaining dimensions of the transfer op must
713  // be broadcasted.)
714  castedMaskBuffer = maskBuffer;
715  } else {
716  auto castedMaskType = unpackOneDim(maskBufferType);
717  castedMaskBuffer =
718  locB.create<vector::TypeCastOp>(castedMaskType, maskBuffer);
719  }
720  }
721 
722  // Loop bounds and step.
723  auto lb = locB.create<arith::ConstantIndexOp>(0);
724  auto ub = locB.create<arith::ConstantIndexOp>(
725  castedDataType.getDimSize(castedDataType.getRank() - 1));
726  auto step = locB.create<arith::ConstantIndexOp>(1);
727  // TransferWriteOps that operate on tensors return the modified tensor and
728  // require a loop state.
729  auto loopState = Strategy<OpTy>::initialLoopState(xferOp);
730 
731  // Generate for loop.
732  auto result = locB.create<scf::ForOp>(
733  lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
734  [&](OpBuilder &b, Location loc, Value iv, ValueRange loopState) {
735  Type stateType = loopState.empty() ? Type() : loopState[0].getType();
736 
737  auto result = generateInBoundsCheck(
738  b, xferOp, iv, unpackedDim(xferOp),
739  stateType ? TypeRange(stateType) : TypeRange(),
740  /*inBoundsCase=*/
741  [&](OpBuilder &b, Location loc) {
742  // Create new transfer op.
743  OpTy newXfer = Strategy<OpTy>::rewriteOp(
744  b, this->options, xferOp, castedDataBuffer, iv, loopState);
745 
746  // If old transfer op has a mask: Set mask on new transfer op.
747  // Special case: If the mask of the old transfer op is 1D and
748  // the
749  // unpacked dim is not a broadcast, no mask is
750  // needed on the new transfer op.
751  if (xferOp.getMask() && (xferOp.isBroadcastDim(0) ||
752  xferOp.getMaskType().getRank() > 1)) {
753  OpBuilder::InsertionGuard guard(b);
754  b.setInsertionPoint(newXfer); // Insert load before newXfer.
755 
756  SmallVector<Value, 8> loadIndices;
757  Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
758  // In case of broadcast: Use same indices to load from memref
759  // as before.
760  if (!xferOp.isBroadcastDim(0))
761  loadIndices.push_back(iv);
762 
763  auto mask = b.create<memref::LoadOp>(loc, castedMaskBuffer,
764  loadIndices);
765  rewriter.updateRootInPlace(newXfer, [&]() {
766  newXfer.getMaskMutable().assign(mask);
767  });
768  }
769 
770  return loopState.empty() ? Value() : newXfer->getResult(0);
771  },
772  /*outOfBoundsCase=*/
773  [&](OpBuilder &b, Location /*loc*/) {
774  return Strategy<OpTy>::handleOutOfBoundsDim(
775  b, xferOp, castedDataBuffer, iv, loopState);
776  });
777 
778  maybeYieldValue(b, loc, !loopState.empty(), result);
779  });
780 
781  Strategy<OpTy>::cleanup(rewriter, xferOp, result);
782  return success();
783  }
784 };
785 
786 } // namespace lowering_n_d
787 
789 
790 /// If the original transfer op has a mask, compute the mask of the new transfer
791 /// op (for the current iteration `i`) and assign it.
792 template <typename OpTy>
793 static void maybeAssignMask(OpBuilder &b, OpTy xferOp, OpTy newXferOp,
794  int64_t i) {
795  if (!xferOp.getMask())
796  return;
797 
798  if (xferOp.isBroadcastDim(0)) {
799  // To-be-unpacked dimension is a broadcast, which does not have a
800  // corresponding mask dimension. Mask attribute remains unchanged.
801  newXferOp.getMaskMutable().assign(xferOp.getMask());
802  return;
803  }
804 
805  if (xferOp.getMaskType().getRank() > 1) {
806  // Unpack one dimension of the mask.
807  OpBuilder::InsertionGuard guard(b);
808  b.setInsertionPoint(newXferOp); // Insert load before newXfer.
809 
810  llvm::SmallVector<int64_t, 1> indices({i});
811  Location loc = xferOp.getLoc();
812  auto newMask = b.create<vector::ExtractOp>(loc, xferOp.getMask(), indices);
813  newXferOp.getMaskMutable().assign(newMask);
814  }
815 
816  // If we end up here: The mask of the old transfer op is 1D and the unpacked
817  // dim is not a broadcast, so no mask is needed on the new transfer op.
818  // `generateInBoundsCheck` will have evaluated the mask already.
819 }
820 
821 /// Progressive lowering of vector TransferReadOp with unrolling: Unpack one
822 /// dimension. This is similar to TransferOpConversion<TransferReadOp>, but no
823 /// memref buffer is allocated and the SCF loop is fully unrolled.
824 ///
825 /// ```
826 /// E.g.:
827 /// ```
828 /// %vec = vector.transfer_read %A[%a, %b, %c], %padding
829 /// : memref<?x?x?xf32>, vector<5x4xf32>
830 /// ```
831 /// is rewritten to IR such as (simplified):
832 /// ```
833 /// %v_init = splat %padding : vector<5x4xf32>
834 /// %tmp0 = vector.transfer_read %A[%a, %b, %c], %padding
835 /// : memref<?x?x?xf32>, vector<4xf32>
836 /// %v0 = vector.insert %tmp0, %v_init[0] : vector<4xf32> into vector<5x4xf32>
837 /// %tmp1 = vector.transfer_read %A[%a, %b + 1, %c], %padding
838 /// : memref<?x?x?xf32>, vector<4xf32>
839 /// %v1 = vector.insert %tmp1, %v0[1] : vector<4xf32> into vector<5x4xf32>
840 /// ...
841 /// %tmp4 = vector.transfer_read %A[%a, %b + 4, %c], %padding
842 /// : memref<?x?x?xf32>, vector<4xf32>
843 /// %vec = vector.insert %tmp1, %v3[4] : vector<4xf32> into vector<5x4xf32>
844 /// ```
845 ///
846 /// Note: As an optimization, if the result of the original TransferReadOp
847 /// was directly inserted into another vector, no new %v_init vector is created.
848 /// Instead, the new TransferReadOp results are inserted into that vector.
849 struct UnrollTransferReadConversion
850  : public VectorToSCFPattern<TransferReadOp> {
851  using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern;
852 
853  void initialize() {
854  // This pattern recursively unpacks one dimension at a time. The recursion
855  // bounded as the rank is strictly decreasing.
856  setHasBoundedRewriteRecursion();
857  }
858 
859  /// Return the vector into which the newly created TransferReadOp results
860  /// are inserted.
861  Value getResultVector(TransferReadOp xferOp,
862  PatternRewriter &rewriter) const {
863  if (auto insertOp = getInsertOp(xferOp))
864  return insertOp.getDest();
865  Location loc = xferOp.getLoc();
866  return rewriter.create<vector::SplatOp>(loc, xferOp.getVectorType(),
867  xferOp.getPadding());
868  }
869 
870  /// If the result of the TransferReadOp has exactly one user, which is a
871  /// vector::InsertOp, return that operation.
872  vector::InsertOp getInsertOp(TransferReadOp xferOp) const {
873  if (xferOp->hasOneUse()) {
874  Operation *xferOpUser = *xferOp->getUsers().begin();
875  if (auto insertOp = dyn_cast<vector::InsertOp>(xferOpUser))
876  return insertOp;
877  }
878 
879  return vector::InsertOp();
880  }
881 
882  /// If the result of the TransferReadOp has exactly one user, which is a
883  /// vector::InsertOp, return that operation's indices.
884  void getInsertionIndices(TransferReadOp xferOp,
885  SmallVector<int64_t, 8> &indices) const {
886  if (auto insertOp = getInsertOp(xferOp)) {
887  for (Attribute attr : insertOp.getPosition())
888  indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
889  }
890  }
891 
892  /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
893  /// accesses, and broadcasts and transposes in permutation maps.
894  LogicalResult matchAndRewrite(TransferReadOp xferOp,
895  PatternRewriter &rewriter) const override {
896  if (xferOp.getVectorType().getRank() <= options.targetRank)
897  return failure();
898  if (isTensorOp(xferOp) && !options.lowerTensors)
899  return failure();
900  // Transfer ops that modify the element type are not supported atm.
901  if (xferOp.getVectorType().getElementType() !=
902  xferOp.getShapedType().getElementType())
903  return failure();
904 
905  auto insertOp = getInsertOp(xferOp);
906  auto vec = getResultVector(xferOp, rewriter);
907  auto vecType = vec.getType().dyn_cast<VectorType>();
908  auto xferVecType = xferOp.getVectorType();
909  auto newXferVecType = VectorType::get(xferVecType.getShape().drop_front(),
910  xferVecType.getElementType());
911  int64_t dimSize = xferVecType.getShape()[0];
912 
913  // Generate fully unrolled loop of transfer ops.
914  Location loc = xferOp.getLoc();
915  for (int64_t i = 0; i < dimSize; ++i) {
916  Value iv = rewriter.create<arith::ConstantIndexOp>(loc, i);
917 
918  vec = generateInBoundsCheck(
919  rewriter, xferOp, iv, unpackedDim(xferOp), TypeRange(vecType),
920  /*inBoundsCase=*/
921  [&](OpBuilder &b, Location loc) {
922  // Indices for the new transfer op.
923  SmallVector<Value, 8> xferIndices;
924  getXferIndices(b, xferOp, iv, xferIndices);
925 
926  // Indices for the new vector.insert op.
927  SmallVector<int64_t, 8> insertionIndices;
928  getInsertionIndices(xferOp, insertionIndices);
929  insertionIndices.push_back(i);
930 
931  auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr());
932  auto newXferOp = b.create<vector::TransferReadOp>(
933  loc, newXferVecType, xferOp.getSource(), xferIndices,
934  AffineMapAttr::get(unpackedPermutationMap(b, xferOp)),
935  xferOp.getPadding(), Value(), inBoundsAttr);
936  maybeAssignMask(b, xferOp, newXferOp, i);
937  return b.create<vector::InsertOp>(loc, newXferOp, vec,
938  insertionIndices);
939  },
940  /*outOfBoundsCase=*/
941  [&](OpBuilder &b, Location loc) {
942  // Loop through original (unmodified) vector.
943  return vec;
944  });
945  }
946 
947  if (insertOp) {
948  // Rewrite single user of the old TransferReadOp, which was an InsertOp.
949  rewriter.replaceOp(insertOp, vec);
950  rewriter.eraseOp(xferOp);
951  } else {
952  rewriter.replaceOp(xferOp, vec);
953  }
954 
955  return success();
956  }
957 };
958 
959 /// Progressive lowering of vector TransferWriteOp with unrolling: Unpack one
960 /// dimension. This is similar to TransferOpConversion<TransferWriteOp>, but no
961 /// memref buffer is allocated and the SCF loop is fully unrolled.
962 ///
963 /// ```
964 /// E.g.:
965 /// ```
966 /// vector.transfer_write %vec, %A[%a, %b, %c]
967 /// : vector<5x4xf32>, memref<?x?x?xf32>
968 /// ```
969 /// is rewritten to IR such as (simplified):
970 /// ```
971 /// %v0 = vector.extract %vec[0] : vector<5x4xf32>
972 /// vector.transfer_write %v0, %A[%a, %b, %c] : vector<4xf32>, memref<...>
973 /// %v1 = vector.extract %vec[1] : vector<5x4xf32>
974 /// vector.transfer_write %v1, %A[%a, %b + 1, %c] : vector<4xf32>, memref<...>
975 /// ...
976 /// %v4 = vector.extract %vec[4] : vector<5x4xf32>
977 /// vector.transfer_write %v4, %A[%a, %b + 4, %c] : vector<4xf32>, memref<...>
978 /// ```
979 ///
980 /// Note: As an optimization, if the vector of the original TransferWriteOp
981 /// was directly extracted from another vector via an ExtractOp `a`, extract
982 /// the vectors for the newly generated TransferWriteOps from `a`'s input. By
983 /// doing so, `a` may become dead, and the number of ExtractOps generated during
984 /// recursive application of this pattern will be minimal.
985 struct UnrollTransferWriteConversion
986  : public VectorToSCFPattern<TransferWriteOp> {
987  using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern;
988 
989  void initialize() {
990  // This pattern recursively unpacks one dimension at a time. The recursion
991  // bounded as the rank is strictly decreasing.
992  setHasBoundedRewriteRecursion();
993  }
994 
995  /// Return the vector from which newly generated ExtracOps will extract.
996  Value getDataVector(TransferWriteOp xferOp) const {
997  if (auto extractOp = getExtractOp(xferOp))
998  return extractOp.getVector();
999  return xferOp.getVector();
1000  }
1001 
1002  /// If the input of the given TransferWriteOp is an ExtractOp, return it.
1003  vector::ExtractOp getExtractOp(TransferWriteOp xferOp) const {
1004  if (auto *op = xferOp.getVector().getDefiningOp())
1005  return dyn_cast<vector::ExtractOp>(op);
1006  return vector::ExtractOp();
1007  }
1008 
1009  /// If the input of the given TransferWriteOp is an ExtractOp, return its
1010  /// indices.
1011  void getExtractionIndices(TransferWriteOp xferOp,
1012  SmallVector<int64_t, 8> &indices) const {
1013  if (auto extractOp = getExtractOp(xferOp)) {
1014  for (Attribute attr : extractOp.getPosition())
1015  indices.push_back(attr.dyn_cast<IntegerAttr>().getInt());
1016  }
1017  }
1018 
1019  /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
1020  /// accesses, and broadcasts and transposes in permutation maps.
1021  LogicalResult matchAndRewrite(TransferWriteOp xferOp,
1022  PatternRewriter &rewriter) const override {
1023  if (xferOp.getVectorType().getRank() <= options.targetRank)
1024  return failure();
1025  if (isTensorOp(xferOp) && !options.lowerTensors)
1026  return failure();
1027  // Transfer ops that modify the element type are not supported atm.
1028  if (xferOp.getVectorType().getElementType() !=
1029  xferOp.getShapedType().getElementType())
1030  return failure();
1031 
1032  auto vec = getDataVector(xferOp);
1033  auto xferVecType = xferOp.getVectorType();
1034  int64_t dimSize = xferVecType.getShape()[0];
1035  auto source = xferOp.getSource(); // memref or tensor to be written to.
1036  auto sourceType = isTensorOp(xferOp) ? xferOp.getShapedType() : Type();
1037 
1038  // Generate fully unrolled loop of transfer ops.
1039  Location loc = xferOp.getLoc();
1040  for (int64_t i = 0; i < dimSize; ++i) {
1041  Value iv = rewriter.create<arith::ConstantIndexOp>(loc, i);
1042 
1043  auto updatedSource = generateInBoundsCheck(
1044  rewriter, xferOp, iv, unpackedDim(xferOp),
1045  isTensorOp(xferOp) ? TypeRange(sourceType) : TypeRange(),
1046  /*inBoundsCase=*/
1047  [&](OpBuilder &b, Location loc) {
1048  // Indices for the new transfer op.
1049  SmallVector<Value, 8> xferIndices;
1050  getXferIndices(b, xferOp, iv, xferIndices);
1051 
1052  // Indices for the new vector.extract op.
1053  SmallVector<int64_t, 8> extractionIndices;
1054  getExtractionIndices(xferOp, extractionIndices);
1055  extractionIndices.push_back(i);
1056 
1057  auto extracted =
1058  b.create<vector::ExtractOp>(loc, vec, extractionIndices);
1059  auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr());
1060  auto newXferOp = b.create<vector::TransferWriteOp>(
1061  loc, sourceType, extracted, source, xferIndices,
1062  AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(),
1063  inBoundsAttr);
1064 
1065  maybeAssignMask(b, xferOp, newXferOp, i);
1066 
1067  return isTensorOp(xferOp) ? newXferOp->getResult(0) : Value();
1068  },
1069  /*outOfBoundsCase=*/
1070  [&](OpBuilder &b, Location loc) {
1071  return isTensorOp(xferOp) ? source : Value();
1072  });
1073 
1074  if (isTensorOp(xferOp))
1075  source = updatedSource;
1076  }
1077 
1078  if (isTensorOp(xferOp))
1079  rewriter.replaceOp(xferOp, source);
1080  else
1081  rewriter.eraseOp(xferOp);
1082 
1083  return success();
1084  }
1085 };
1086 
1087 } // namespace lowering_n_d_unrolled
1088 
1089 namespace lowering_1_d {
1090 
1091 /// Compute the indices into the memref for the LoadOp/StoreOp generated as
1092 /// part of TransferOp1dConversion. Return the memref dimension on which
1093 /// the transfer is operating. A return value of None indicates a broadcast.
1094 template <typename OpTy>
1095 static Optional<int64_t>
1096 get1dMemrefIndices(OpBuilder &b, OpTy xferOp, Value iv,
1097  SmallVector<Value, 8> &memrefIndices) {
1098  auto indices = xferOp.getIndices();
1099  auto map = xferOp.getPermutationMap();
1100  assert(xferOp.getTransferRank() > 0 && "unexpected 0-d transfer");
1101 
1102  memrefIndices.append(indices.begin(), indices.end());
1103  assert(map.getNumResults() == 1 &&
1104  "Expected 1 permutation map result for 1D transfer");
1105  if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
1106  Location loc = xferOp.getLoc();
1107  auto dim = expr.getPosition();
1108  AffineExpr d0, d1;
1109  bindDims(xferOp.getContext(), d0, d1);
1110  Value offset = memrefIndices[dim];
1111  memrefIndices[dim] = makeComposedAffineApply(b, loc, d0 + d1, {offset, iv});
1112  return dim;
1113  }
1114 
1115  assert(xferOp.isBroadcastDim(0) &&
1116  "Expected AffineDimExpr or AffineConstantExpr");
1117  return None;
1118 }
1119 
1120 /// Codegen strategy for TransferOp1dConversion, depending on the
1121 /// operation.
1122 template <typename OpTy>
1123 struct Strategy1d;
1124 
1125 /// Codegen strategy for TransferReadOp.
1126 template <>
1127 struct Strategy1d<TransferReadOp> {
1128  static void generateForLoopBody(OpBuilder &b, Location loc,
1129  TransferReadOp xferOp, Value iv,
1130  ValueRange loopState) {
1131  SmallVector<Value, 8> indices;
1132  auto dim = get1dMemrefIndices(b, xferOp, iv, indices);
1133  auto vec = loopState[0];
1134 
1135  // In case of out-of-bounds access, leave `vec` as is (was initialized with
1136  // padding value).
1137  auto nextVec = generateInBoundsCheck(
1138  b, xferOp, iv, dim, TypeRange(xferOp.getVectorType()),
1139  /*inBoundsCase=*/
1140  [&](OpBuilder &b, Location loc) {
1141  Value val =
1142  b.create<memref::LoadOp>(loc, xferOp.getSource(), indices);
1143  return b.create<vector::InsertElementOp>(loc, val, vec, iv);
1144  },
1145  /*outOfBoundsCase=*/
1146  [&](OpBuilder & /*b*/, Location loc) { return vec; });
1147  b.create<scf::YieldOp>(loc, nextVec);
1148  }
1149 
1150  static Value initialLoopState(OpBuilder &b, TransferReadOp xferOp) {
1151  // Inititalize vector with padding value.
1152  Location loc = xferOp.getLoc();
1153  return b.create<vector::SplatOp>(loc, xferOp.getVectorType(),
1154  xferOp.getPadding());
1155  }
1156 };
1157 
1158 /// Codegen strategy for TransferWriteOp.
1159 template <>
1160 struct Strategy1d<TransferWriteOp> {
1161  static void generateForLoopBody(OpBuilder &b, Location loc,
1162  TransferWriteOp xferOp, Value iv,
1163  ValueRange /*loopState*/) {
1164  SmallVector<Value, 8> indices;
1165  auto dim = get1dMemrefIndices(b, xferOp, iv, indices);
1166 
1167  // Nothing to do in case of out-of-bounds access.
1168  generateInBoundsCheck(
1169  b, xferOp, iv, dim,
1170  /*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
1171  auto val =
1172  b.create<vector::ExtractElementOp>(loc, xferOp.getVector(), iv);
1173  b.create<memref::StoreOp>(loc, val, xferOp.getSource(), indices);
1174  });
1175  b.create<scf::YieldOp>(loc);
1176  }
1177 
1178  static Value initialLoopState(OpBuilder &b, TransferWriteOp xferOp) {
1179  return Value();
1180  }
1181 };
1182 
1183 /// Return true if the last dimension of the MemRefType has unit stride.
1184 static bool isLastMemrefDimUnitStride(MemRefType type) {
1185  int64_t offset;
1186  SmallVector<int64_t, 4> strides;
1187  auto successStrides = getStridesAndOffset(type, strides, offset);
1188  return succeeded(successStrides) && (strides.empty() || strides.back() == 1);
1189 }
1190 
1191 /// Lower a 1D vector transfer op to SCF using scalar loads/stores. This is
1192 /// necessary in cases where a 1D vector transfer op cannot be lowered into
1193 /// vector load/stores due to non-unit strides or broadcasts:
1194 ///
1195 /// * Transfer dimension is not the last memref dimension
1196 /// * Transfer dimension is a broadcast (i.e., scalar load + broadcast)
1197 /// * Memref has a layout map with non-unit stride on the last dimension
1198 ///
1199 /// This pattern generates IR as follows:
1200 ///
1201 /// 1. Generate a for loop iterating over each vector element.
1202 /// 2. Inside the loop, generate a InsertElementOp or ExtractElementOp,
1203 /// depending on OpTy.
1204 ///
1205 /// TODO: In some cases (no masking, etc.), LLVM::MatrixColumnMajorLoadOp
1206 /// can be generated instead of TransferOp1dConversion. Add such a pattern
1207 /// to ConvertVectorToLLVM.
1208 ///
1209 /// E.g.:
1210 /// ```
1211 /// vector.transfer_write %vec, %A[%a, %b]
1212 /// {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]}
1213 /// : vector<9xf32>, memref<?x?xf32>
1214 /// ```
1215 /// Is rewritten to approximately the following pseudo-IR:
1216 /// ```
1217 /// for i = 0 to 9 {
1218 /// %t = vector.extractelement %vec[i] : vector<9xf32>
1219 /// memref.store %t, %arg0[%a + i, %b] : memref<?x?xf32>
1220 /// }
1221 /// ```
1222 template <typename OpTy>
1223 struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> {
1224  using VectorToSCFPattern<OpTy>::VectorToSCFPattern;
1225 
1226  LogicalResult matchAndRewrite(OpTy xferOp,
1227  PatternRewriter &rewriter) const override {
1228  // TODO: support 0-d corner case.
1229  if (xferOp.getTransferRank() == 0)
1230  return failure();
1231  auto map = xferOp.getPermutationMap();
1232  auto memRefType = xferOp.getShapedType().template dyn_cast<MemRefType>();
1233 
1234  if (!memRefType)
1235  return failure();
1236  if (xferOp.getVectorType().getRank() != 1)
1237  return failure();
1238  if (map.isMinorIdentity() && isLastMemrefDimUnitStride(memRefType))
1239  return failure(); // Handled by ConvertVectorToLLVM
1240 
1241  // Loop bounds, step, state...
1242  Location loc = xferOp.getLoc();
1243  auto vecType = xferOp.getVectorType();
1244  auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
1245  auto ub =
1246  rewriter.create<arith::ConstantIndexOp>(loc, vecType.getDimSize(0));
1247  auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
1248  auto loopState = Strategy1d<OpTy>::initialLoopState(rewriter, xferOp);
1249 
1250  // Generate for loop.
1251  rewriter.replaceOpWithNewOp<scf::ForOp>(
1252  xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
1253  [&](OpBuilder &b, Location loc, Value iv, ValueRange loopState) {
1254  Strategy1d<OpTy>::generateForLoopBody(b, loc, xferOp, iv, loopState);
1255  });
1256 
1257  return success();
1258  }
1259 };
1260 
1261 } // namespace lowering_1_d
1262 } // namespace
1263 
1266  if (options.unroll) {
1267  patterns.add<lowering_n_d_unrolled::UnrollTransferReadConversion,
1268  lowering_n_d_unrolled::UnrollTransferWriteConversion>(
1269  patterns.getContext(), options);
1270  } else {
1271  patterns.add<lowering_n_d::PrepareTransferReadConversion,
1272  lowering_n_d::PrepareTransferWriteConversion,
1273  lowering_n_d::TransferOpConversion<TransferReadOp>,
1274  lowering_n_d::TransferOpConversion<TransferWriteOp>>(
1275  patterns.getContext(), options);
1276  }
1277 
1278  if (options.targetRank == 1) {
1279  patterns.add<lowering_1_d::TransferOp1dConversion<TransferReadOp>,
1280  lowering_1_d::TransferOp1dConversion<TransferWriteOp>>(
1281  patterns.getContext(), options);
1282  }
1283 }
1284 
1285 namespace {
1286 
1287 struct ConvertVectorToSCFPass
1288  : public impl::ConvertVectorToSCFBase<ConvertVectorToSCFPass> {
1289  ConvertVectorToSCFPass() = default;
1290  ConvertVectorToSCFPass(const VectorTransferToSCFOptions &options) {
1291  this->fullUnroll = options.unroll;
1292  this->targetRank = options.targetRank;
1293  this->lowerPermutationMaps = options.lowerPermutationMaps;
1294  this->lowerTensors = options.lowerTensors;
1295  }
1296 
1297  void runOnOperation() override {
1299  options.unroll = fullUnroll;
1300  options.targetRank = targetRank;
1301  options.lowerPermutationMaps = lowerPermutationMaps;
1302  options.lowerTensors = lowerTensors;
1303 
1304  // Lower permutation maps first.
1305  if (lowerPermutationMaps) {
1306  RewritePatternSet lowerTransferPatterns(&getContext());
1308  lowerTransferPatterns);
1309  (void)applyPatternsAndFoldGreedily(getOperation(),
1310  std::move(lowerTransferPatterns));
1311  }
1312 
1313  RewritePatternSet patterns(&getContext());
1314  populateVectorToSCFConversionPatterns(patterns, options);
1315  (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
1316  }
1317 };
1318 
1319 } // namespace
1320 
1321 std::unique_ptr<Pass>
1323  return std::make_unique<ConvertVectorToSCFPass>(options);
1324 }
Include the generated interface declarations.
MLIRContext * getContext() const
Definition: Builders.h:54
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types &#39;Ts&#39; to the pattern list with the given arguments...
bool unroll
Triggers full unrolling (vs iterating with a loop) during transfer to scf.
Definition: VectorToSCF.h:73
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:600
Operation is a basic unit of execution within MLIR.
Definition: Operation.h:28
FailureOr< Value > getBuffer(RewriterBase &rewriter, Value value, const BufferizationOptions &options)
Lookup the buffer for the given value.
unsigned targetRank
Minimal rank to which vector transfer are lowered.
Definition: VectorToSCF.h:54
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:477
Block & front()
Definition: Region.h:65
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:349
A trait of region holding operations that define a new scope for automatic allocations, i.e., allocations that are freed when control is transferred back from the operation&#39;s region.
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Operation * clone(Operation &op, BlockAndValueMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:506
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value...
Definition: LogicalResult.h:72
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
Definition: AffineMap.cpp:206
Operation * getParentWithTrait()
Returns the closest surrounding parent operation with trait Trait.
Definition: Operation.h:179
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ValueRange operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:957
bool succeeded(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a success value...
Definition: LogicalResult.h:68
bool isLastMemrefDimUnitStride(MemRefType type)
Return true if the last dimension of the MemRefType has unit stride.
Definition: VectorOps.cpp:123
static Operation * getAutomaticAllocationScope(Operation *op)
static constexpr const bool value
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:48
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:418
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
LogicalResult getStridesAndOffset(MemRefType t, SmallVectorImpl< int64_t > &strides, int64_t &offset)
Returns the strides of the MemRef if the layout map is in strided form.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
virtual void replaceOp(Operation *op, ValueRange newValues)
This method replaces the results of the operation with the specified list of values.
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
U dyn_cast() const
Definition: Types.h:269
UnitAttr getUnitAttr()
Definition: Builders.cpp:95
Attributes are known-constant values of operations.
Definition: Attributes.h:25
Base type for affine expression.
Definition: AffineExpr.h:68
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:32
void updateRootInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around a root update of an operation.
Definition: PatternMatch.h:499
std::unique_ptr< Pass > createConvertVectorToSCFPass(const VectorTransferToSCFOptions &options=VectorTransferToSCFOptions())
Create a pass to convert a subset of vector ops to SCF.
A multi-dimensional affine map Affine map&#39;s are immutable like Type&#39;s, and they are uniqued...
Definition: AffineMap.h:42
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source...
Definition: VectorUtils.cpp:37
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:85
static llvm::ManagedStatic< PassManagerOptions > options
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:355
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:382
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:299
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:395
Type getType() const
Return the type of this value.
Definition: Value.h:118
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replaces the result op with a new op that is created without verification.
Definition: PatternMatch.h:451
ImplicitLocOpBuilder maintains a &#39;current location&#39;, allowing use of the create<> method without spec...
A dimensional identifier appearing in an affine expression.
Definition: AffineExpr.h:216
Specialization of arith.constant op that returns an integer of index type.
Definition: Arith.h:80
static VectorType vectorType(CodeGen &codegen, Type etp)
Constructs vector type.
bool lowerTensors
Allows vector transfers that operated on tensors to be lowered (this is an uncommon alternative)...
Definition: VectorToSCF.h:67
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:56
void populateVectorTransferPermutationMapLoweringPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of transfer read/write lowering patterns that simplify the permutation map (e...
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:336
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:650
LogicalResult applyPatternsAndFoldGreedily(MutableArrayRef< Region > regions, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig())
Rewrite the regions of the specified operation, which must be isolated from above, by repeatedly applying the highest benefit patterns in a greedy work-list driven manner.
This class helps build Operations.
Definition: Builders.h:197
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:345
Region & getRegion(unsigned index)
Returns the region held by this operation at position &#39;index&#39;.
Definition: Operation.h:486
MLIRContext * getContext() const
void populateVectorToSCFConversionPatterns(RewritePatternSet &patterns, const VectorTransferToSCFOptions &options=VectorTransferToSCFOptions())
Collect a set of patterns to convert from the Vector dialect to SCF + func.
When lowering an N-d vector transfer op to an (N-1)-d vector transfer op, a temporary buffer is creat...
Definition: VectorToSCF.h:52
static void getXferIndices(OpBuilder &b, TransferOpType xferOp, AffineMap offsetMap, ArrayRef< Value > dimValues, SmallVector< Value, 4 > &indices)
For a vector TransferOpType xferOp, an empty indices vector, and an AffineMap representing offsets to...
Definition: VectorToGPU.cpp:47