MLIR  18.0.0git
VectorToSCF.cpp
Go to the documentation of this file.
1 //===- VectorToSCF.cpp - Convert vector to SCF dialect ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements lowering of vector transfer operations to SCF.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <numeric>
14 #include <optional>
15 #include <type_traits>
16 
18 
26 #include "mlir/IR/Builders.h"
28 #include "mlir/Pass/Pass.h"
30 #include "mlir/Transforms/Passes.h"
31 
32 namespace mlir {
33 #define GEN_PASS_DEF_CONVERTVECTORTOSCF
34 #include "mlir/Conversion/Passes.h.inc"
35 } // namespace mlir
36 
37 using namespace mlir;
38 using vector::TransferReadOp;
39 using vector::TransferWriteOp;
40 
41 namespace {
42 
43 /// Attribute name used for labeling transfer ops during progressive lowering.
44 static const char kPassLabel[] = "__vector_to_scf_lowering__";
45 
46 /// Patterns that inherit from this struct have access to
47 /// VectorTransferToSCFOptions.
48 template <typename OpTy>
49 struct VectorToSCFPattern : public OpRewritePattern<OpTy> {
50  explicit VectorToSCFPattern(MLIRContext *context,
52  : OpRewritePattern<OpTy>(context), options(opt) {}
53 
55 };
56 
57 /// Given a vector transfer op, calculate which dimension of the `source`
58 /// memref should be unpacked in the next application of TransferOpConversion.
59 /// A return value of std::nullopt indicates a broadcast.
60 template <typename OpTy>
61 static std::optional<int64_t> unpackedDim(OpTy xferOp) {
62  // TODO: support 0-d corner case.
63  assert(xferOp.getTransferRank() > 0 && "unexpected 0-d transfer");
64  auto map = xferOp.getPermutationMap();
65  if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
66  return expr.getPosition();
67  }
68  assert(xferOp.isBroadcastDim(0) &&
69  "Expected AffineDimExpr or AffineConstantExpr");
70  return std::nullopt;
71 }
72 
73 /// Compute the permutation map for the new (N-1)-D vector transfer op. This
74 /// map is identical to the current permutation map, but the first result is
75 /// omitted.
76 template <typename OpTy>
77 static AffineMap unpackedPermutationMap(OpBuilder &b, OpTy xferOp) {
78  // TODO: support 0-d corner case.
79  assert(xferOp.getTransferRank() > 0 && "unexpected 0-d transfer");
80  auto map = xferOp.getPermutationMap();
81  return AffineMap::get(map.getNumDims(), 0, map.getResults().drop_front(),
82  b.getContext());
83 }
84 
85 /// Calculate the indices for the new vector transfer op.
86 ///
87 /// E.g.: transfer_read %A[%a, %b, %c, %d] ... : vector<5x4x3xf32> ...
88 /// --> transfer_read %A[%a, %b + iv, %c, %d] ... vector<4x3f32>
89 /// ^^^^^^
90 /// `iv` is the iteration variable of the (new) surrounding loop.
91 template <typename OpTy>
92 static void getXferIndices(OpBuilder &b, OpTy xferOp, Value iv,
93  SmallVector<Value, 8> &indices) {
94  typename OpTy::Adaptor adaptor(xferOp);
95  // Corresponding memref dim of the vector dim that is unpacked.
96  auto dim = unpackedDim(xferOp);
97  auto prevIndices = adaptor.getIndices();
98  indices.append(prevIndices.begin(), prevIndices.end());
99 
100  Location loc = xferOp.getLoc();
101  bool isBroadcast = !dim.has_value();
102  if (!isBroadcast) {
103  AffineExpr d0, d1;
104  bindDims(xferOp.getContext(), d0, d1);
105  Value offset = adaptor.getIndices()[*dim];
106  indices[*dim] =
107  affine::makeComposedAffineApply(b, loc, d0 + d1, {offset, iv});
108  }
109 }
110 
111 static void maybeYieldValue(OpBuilder &b, Location loc, bool hasRetVal,
112  Value value) {
113  if (hasRetVal) {
114  assert(value && "Expected non-empty value");
115  b.create<scf::YieldOp>(loc, value);
116  } else {
117  b.create<scf::YieldOp>(loc);
118  }
119 }
120 
121 /// Generates a boolean Value that is true if the iv-th bit in xferOp's mask
122 /// is set to true. No such check is generated under following circumstances:
123 /// * xferOp does not have a mask.
124 /// * xferOp's mask is not 1D. (In case of (N>1)-D, a subvector of the mask is
125 /// computed and attached to the new transfer op in the pattern.)
126 /// * The to-be-unpacked dim of xferOp is a broadcast.
127 template <typename OpTy>
128 static Value generateMaskCheck(OpBuilder &b, OpTy xferOp, Value iv) {
129  if (!xferOp.getMask())
130  return Value();
131  if (xferOp.getMaskType().getRank() != 1)
132  return Value();
133  if (xferOp.isBroadcastDim(0))
134  return Value();
135 
136  Location loc = xferOp.getLoc();
137  return b.create<vector::ExtractElementOp>(loc, xferOp.getMask(), iv);
138 }
139 
140 /// Helper function TransferOpConversion and TransferOp1dConversion.
141 /// Generate an in-bounds check if the transfer op may go out-of-bounds on the
142 /// specified dimension `dim` with the loop iteration variable `iv`.
143 /// E.g., when unpacking dimension 0 from:
144 /// ```
145 /// %vec = vector.transfer_read %A[%a, %b] %cst
146 /// : vector<5x4xf32>, memref<?x?xf32>
147 /// ```
148 /// An if check similar to this will be generated inside the loop:
149 /// ```
150 /// %d = memref.dim %A, %c0 : memref<?x?xf32>
151 /// if (%a + iv < %d) {
152 /// (in-bounds case)
153 /// } else {
154 /// (out-of-bounds case)
155 /// }
156 /// ```
157 ///
158 /// If the transfer is 1D and has a mask, this function generates a more complex
159 /// check also accounts for potentially masked out elements.
160 ///
161 /// This function variant returns the value returned by `inBoundsCase` or
162 /// `outOfBoundsCase`. The MLIR type of the return value must be specified in
163 /// `resultTypes`.
164 template <typename OpTy>
165 static Value generateInBoundsCheck(
166  OpBuilder &b, OpTy xferOp, Value iv, std::optional<int64_t> dim,
167  TypeRange resultTypes,
168  function_ref<Value(OpBuilder &, Location)> inBoundsCase,
169  function_ref<Value(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
170  bool hasRetVal = !resultTypes.empty();
171  Value cond; // Condition to be built...
172 
173  // Condition check 1: Access in-bounds?
174  bool isBroadcast = !dim; // No in-bounds check for broadcasts.
175  Location loc = xferOp.getLoc();
176  ImplicitLocOpBuilder lb(xferOp.getLoc(), b);
177  if (!xferOp.isDimInBounds(0) && !isBroadcast) {
178  Value memrefDim =
179  vector::createOrFoldDimOp(b, loc, xferOp.getSource(), *dim);
180  AffineExpr d0, d1;
181  bindDims(xferOp.getContext(), d0, d1);
182  Value base = xferOp.getIndices()[*dim];
183  Value memrefIdx =
184  affine::makeComposedAffineApply(b, loc, d0 + d1, {base, iv});
185  cond = lb.create<arith::CmpIOp>(arith::CmpIPredicate::sgt, memrefDim,
186  memrefIdx);
187  }
188 
189  // Condition check 2: Masked in?
190  if (auto maskCond = generateMaskCheck(b, xferOp, iv)) {
191  if (cond)
192  cond = lb.create<arith::AndIOp>(cond, maskCond);
193  else
194  cond = maskCond;
195  }
196 
197  // If the condition is non-empty, generate an SCF::IfOp.
198  if (cond) {
199  auto check = lb.create<scf::IfOp>(
200  cond,
201  /*thenBuilder=*/
202  [&](OpBuilder &b, Location loc) {
203  maybeYieldValue(b, loc, hasRetVal, inBoundsCase(b, loc));
204  },
205  /*elseBuilder=*/
206  [&](OpBuilder &b, Location loc) {
207  if (outOfBoundsCase) {
208  maybeYieldValue(b, loc, hasRetVal, outOfBoundsCase(b, loc));
209  } else {
210  b.create<scf::YieldOp>(loc);
211  }
212  });
213 
214  return hasRetVal ? check.getResult(0) : Value();
215  }
216 
217  // Condition is empty, no need for an SCF::IfOp.
218  return inBoundsCase(b, loc);
219 }
220 
221 /// In this function variant, `inBoundsCase` and `outOfBoundsCase` do not have
222 /// a return value. Consequently, this function does not have a return value.
223 template <typename OpTy>
224 static void generateInBoundsCheck(
225  OpBuilder &b, OpTy xferOp, Value iv, std::optional<int64_t> dim,
226  function_ref<void(OpBuilder &, Location)> inBoundsCase,
227  function_ref<void(OpBuilder &, Location)> outOfBoundsCase = nullptr) {
228  generateInBoundsCheck(
229  b, xferOp, iv, dim, /*resultTypes=*/TypeRange(),
230  /*inBoundsCase=*/
231  [&](OpBuilder &b, Location loc) {
232  inBoundsCase(b, loc);
233  return Value();
234  },
235  /*outOfBoundsCase=*/
236  [&](OpBuilder &b, Location loc) {
237  if (outOfBoundsCase)
238  outOfBoundsCase(b, loc);
239  return Value();
240  });
241 }
242 
243 /// Given an ArrayAttr, return a copy where the first element is dropped.
244 static ArrayAttr dropFirstElem(OpBuilder &b, ArrayAttr attr) {
245  if (!attr)
246  return attr;
247  return ArrayAttr::get(b.getContext(), attr.getValue().drop_front());
248 }
249 
250 /// Add the pass label to a vector transfer op if its rank is not the target
251 /// rank.
252 template <typename OpTy>
253 static void maybeApplyPassLabel(OpBuilder &b, OpTy newXferOp,
254  unsigned targetRank) {
255  if (newXferOp.getVectorType().getRank() > targetRank)
256  newXferOp->setAttr(kPassLabel, b.getUnitAttr());
257 }
258 
259 /// Return true if this transfer op operates on a source tensor.
260 template <typename OpTy>
261 static bool isTensorOp(OpTy xferOp) {
262  if (isa<RankedTensorType>(xferOp.getShapedType())) {
263  if (xferOp.getOperationName().equals(TransferWriteOp::getOperationName())) {
264  // TransferWriteOps on tensors have a result.
265  assert(xferOp->getNumResults() > 0);
266  }
267  return true;
268  }
269  return false;
270 }
271 
272 namespace lowering_n_d {
273 
274 /// Helper data structure for data and mask buffers.
275 struct BufferAllocs {
276  Value dataBuffer;
277  Value maskBuffer;
278 };
279 
280 // TODO: Parallelism and threadlocal considerations with a ParallelScope trait.
282  Operation *scope =
284  assert(scope && "Expected op to be inside automatic allocation scope");
285  return scope;
286 }
287 
288 /// Allocate temporary buffers for data (vector) and mask (if present).
289 template <typename OpTy>
290 static BufferAllocs allocBuffers(OpBuilder &b, OpTy xferOp) {
291  Location loc = xferOp.getLoc();
292  OpBuilder::InsertionGuard guard(b);
293  Operation *scope = getAutomaticAllocationScope(xferOp);
294  assert(scope->getNumRegions() == 1 &&
295  "AutomaticAllocationScope with >1 regions");
296  b.setInsertionPointToStart(&scope->getRegion(0).front());
297 
298  BufferAllocs result;
299  auto bufferType = MemRefType::get({}, xferOp.getVectorType());
300  result.dataBuffer = b.create<memref::AllocaOp>(loc, bufferType);
301 
302  if (xferOp.getMask()) {
303  auto maskType = MemRefType::get({}, xferOp.getMask().getType());
304  auto maskBuffer = b.create<memref::AllocaOp>(loc, maskType);
305  b.setInsertionPoint(xferOp);
306  b.create<memref::StoreOp>(loc, xferOp.getMask(), maskBuffer);
307  result.maskBuffer = b.create<memref::LoadOp>(loc, maskBuffer, ValueRange());
308  }
309 
310  return result;
311 }
312 
313 /// Given a MemRefType with VectorType element type, unpack one dimension from
314 /// the VectorType into the MemRefType.
315 ///
316 /// E.g.: memref<9xvector<5x6xf32>> --> memref<9x5xvector<6xf32>>
317 static FailureOr<MemRefType> unpackOneDim(MemRefType type) {
318  auto vectorType = dyn_cast<VectorType>(type.getElementType());
319  // Vectors with leading scalable dims are not supported.
320  // It may be possible to support these in future by using dynamic memref dims.
321  if (vectorType.getScalableDims().front())
322  return failure();
323  auto memrefShape = type.getShape();
324  SmallVector<int64_t, 8> newMemrefShape;
325  newMemrefShape.append(memrefShape.begin(), memrefShape.end());
326  newMemrefShape.push_back(vectorType.getDimSize(0));
327  return MemRefType::get(newMemrefShape,
328  VectorType::Builder(vectorType).dropDim(0));
329 }
330 
331 /// Given a transfer op, find the memref from which the mask is loaded. This
332 /// is similar to Strategy<TransferWriteOp>::getBuffer.
333 template <typename OpTy>
334 static Value getMaskBuffer(OpTy xferOp) {
335  assert(xferOp.getMask() && "Expected that transfer op has mask");
336  auto loadOp = xferOp.getMask().template getDefiningOp<memref::LoadOp>();
337  assert(loadOp && "Expected transfer op mask produced by LoadOp");
338  return loadOp.getMemRef();
339 }
340 
341 /// Codegen strategy, depending on the operation.
342 template <typename OpTy>
343 struct Strategy;
344 
345 /// Code strategy for vector TransferReadOp.
346 template <>
347 struct Strategy<TransferReadOp> {
348  /// Find the StoreOp that is used for writing the current TransferReadOp's
349  /// result to the temporary buffer allocation.
350  static memref::StoreOp getStoreOp(TransferReadOp xferOp) {
351  assert(xferOp->hasOneUse() && "Expected exactly one use of TransferReadOp");
352  auto storeOp = dyn_cast<memref::StoreOp>((*xferOp->use_begin()).getOwner());
353  assert(storeOp && "Expected TransferReadOp result used by StoreOp");
354  return storeOp;
355  }
356 
357  /// Find the temporary buffer allocation. All labeled TransferReadOps are
358  /// used like this, where %buf is either the buffer allocation or a type cast
359  /// of the buffer allocation:
360  /// ```
361  /// %vec = vector.transfer_read ... { __vector_to_scf_lowering__ } ...
362  /// memref.store %vec, %buf[...] ...
363  /// ```
364  static Value getBuffer(TransferReadOp xferOp) {
365  return getStoreOp(xferOp).getMemRef();
366  }
367 
368  /// Retrieve the indices of the current StoreOp that stores into the buffer.
369  static void getBufferIndices(TransferReadOp xferOp,
370  SmallVector<Value, 8> &indices) {
371  auto storeOp = getStoreOp(xferOp);
372  auto prevIndices = memref::StoreOpAdaptor(storeOp).getIndices();
373  indices.append(prevIndices.begin(), prevIndices.end());
374  }
375 
376  /// Rewrite the TransferReadOp, assuming that there are no out-of-bounds
377  /// accesses on the to-be-unpacked dimension.
378  ///
379  /// 1. Generate a new (N-1)-d TransferReadOp using the loop iteration
380  /// variable `iv`.
381  /// 2. Store the result into the (already `vector.type_cast`ed) buffer.
382  ///
383  /// E.g.:
384  /// ```
385  /// %vec = vector.transfer_read %A[%a+%i, %b, %c], %cst
386  /// : memref<?x?x?xf32>, vector<4x3xf32>
387  /// memref.store %vec, %buf[%i] : memref<5xvector<4x3xf32>>
388  /// ```
389  /// Is rewritten to:
390  /// ```
391  /// %casted = vector.type_cast %buf
392  /// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
393  /// for %j = 0 to 4 {
394  /// %vec = vector.transfer_read %A[%a+%i, %b+%j, %c], %cst
395  /// : memref<?x?x?xf32>, vector<3xf32>
396  /// memref.store %vec, %casted[%i, %j] : memref<5x4xvector<3xf32>>
397  /// }
398  /// ```
399  ///
400  /// Note: The loop and type cast are generated in TransferOpConversion.
401  /// The original TransferReadOp and store op are deleted in `cleanup`.
402  /// Note: The `mask` operand is set in TransferOpConversion.
403  static TransferReadOp rewriteOp(OpBuilder &b,
405  TransferReadOp xferOp, Value buffer, Value iv,
406  ValueRange /*loopState*/) {
407  SmallVector<Value, 8> storeIndices;
408  getBufferIndices(xferOp, storeIndices);
409  storeIndices.push_back(iv);
410 
411  SmallVector<Value, 8> xferIndices;
412  getXferIndices(b, xferOp, iv, xferIndices);
413 
414  Location loc = xferOp.getLoc();
415  auto bufferType = dyn_cast<ShapedType>(buffer.getType());
416  auto vecType = dyn_cast<VectorType>(bufferType.getElementType());
417  auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr());
418  auto newXferOp = b.create<vector::TransferReadOp>(
419  loc, vecType, xferOp.getSource(), xferIndices,
420  AffineMapAttr::get(unpackedPermutationMap(b, xferOp)),
421  xferOp.getPadding(), Value(), inBoundsAttr);
422 
423  maybeApplyPassLabel(b, newXferOp, options.targetRank);
424 
425  b.create<memref::StoreOp>(loc, newXferOp.getVector(), buffer, storeIndices);
426  return newXferOp;
427  }
428 
429  /// Handle out-of-bounds accesses on the to-be-unpacked dimension: Write
430  /// padding value to the temporary buffer.
431  static Value handleOutOfBoundsDim(OpBuilder &b, TransferReadOp xferOp,
432  Value buffer, Value iv,
433  ValueRange /*loopState*/) {
434  SmallVector<Value, 8> storeIndices;
435  getBufferIndices(xferOp, storeIndices);
436  storeIndices.push_back(iv);
437 
438  Location loc = xferOp.getLoc();
439  auto bufferType = dyn_cast<ShapedType>(buffer.getType());
440  auto vecType = dyn_cast<VectorType>(bufferType.getElementType());
441  auto vec = b.create<vector::SplatOp>(loc, vecType, xferOp.getPadding());
442  b.create<memref::StoreOp>(loc, vec, buffer, storeIndices);
443 
444  return Value();
445  }
446 
447  /// Cleanup after rewriting the op.
448  static void cleanup(PatternRewriter &rewriter, TransferReadOp xferOp,
449  scf::ForOp /*forOp*/) {
450  rewriter.eraseOp(getStoreOp(xferOp));
451  rewriter.eraseOp(xferOp);
452  }
453 
454  /// Return the initial loop state for the generated scf.for loop.
455  static Value initialLoopState(TransferReadOp xferOp) { return Value(); }
456 };
457 
458 /// Codegen strategy for vector TransferWriteOp.
459 template <>
460 struct Strategy<TransferWriteOp> {
461  /// Find the temporary buffer allocation. All labeled TransferWriteOps are
462  /// used like this, where %buf is either the buffer allocation or a type cast
463  /// of the buffer allocation:
464  /// ```
465  /// %vec = memref.load %buf[...] ...
466  /// vector.transfer_write %vec ... { __vector_to_scf_lowering__ } ...
467  /// ```
468  static Value getBuffer(TransferWriteOp xferOp) {
469  auto loadOp = xferOp.getVector().getDefiningOp<memref::LoadOp>();
470  assert(loadOp && "Expected transfer op vector produced by LoadOp");
471  return loadOp.getMemRef();
472  }
473 
474  /// Retrieve the indices of the current LoadOp that loads from the buffer.
475  static void getBufferIndices(TransferWriteOp xferOp,
476  SmallVector<Value, 8> &indices) {
477  auto loadOp = xferOp.getVector().getDefiningOp<memref::LoadOp>();
478  auto prevIndices = memref::LoadOpAdaptor(loadOp).getIndices();
479  indices.append(prevIndices.begin(), prevIndices.end());
480  }
481 
482  /// Rewrite the TransferWriteOp, assuming that there are no out-of-bounds
483  /// accesses on the to-be-unpacked dimension.
484  ///
485  /// 1. Load an (N-1)-d vector from the (already `vector.type_cast`ed) buffer,
486  /// using the loop iteration variable `iv`.
487  /// 2. Generate a new (N-1)-d TransferWriteOp, writing the loaded vector back
488  /// to memory.
489  ///
490  /// Note: For more details, see comments on Strategy<TransferReadOp>.
491  static TransferWriteOp rewriteOp(OpBuilder &b,
493  TransferWriteOp xferOp, Value buffer,
494  Value iv, ValueRange loopState) {
495  SmallVector<Value, 8> loadIndices;
496  getBufferIndices(xferOp, loadIndices);
497  loadIndices.push_back(iv);
498 
499  SmallVector<Value, 8> xferIndices;
500  getXferIndices(b, xferOp, iv, xferIndices);
501 
502  Location loc = xferOp.getLoc();
503  auto vec = b.create<memref::LoadOp>(loc, buffer, loadIndices);
504  auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr());
505  auto source = loopState.empty() ? xferOp.getSource() : loopState[0];
506  Type type = isTensorOp(xferOp) ? xferOp.getShapedType() : Type();
507  auto newXferOp = b.create<vector::TransferWriteOp>(
508  loc, type, vec, source, xferIndices,
509  AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(),
510  inBoundsAttr);
511 
512  maybeApplyPassLabel(b, newXferOp, options.targetRank);
513 
514  return newXferOp;
515  }
516 
517  /// Handle out-of-bounds accesses on the to-be-unpacked dimension.
518  static Value handleOutOfBoundsDim(OpBuilder &b, TransferWriteOp xferOp,
519  Value buffer, Value iv,
520  ValueRange loopState) {
521  return isTensorOp(xferOp) ? loopState[0] : Value();
522  }
523 
524  /// Cleanup after rewriting the op.
525  static void cleanup(PatternRewriter &rewriter, TransferWriteOp xferOp,
526  scf::ForOp forOp) {
527  if (isTensorOp(xferOp)) {
528  assert(forOp->getNumResults() == 1 && "Expected one for loop result");
529  rewriter.replaceOp(xferOp, forOp->getResult(0));
530  } else {
531  rewriter.eraseOp(xferOp);
532  }
533  }
534 
535  /// Return the initial loop state for the generated scf.for loop.
536  static Value initialLoopState(TransferWriteOp xferOp) {
537  return isTensorOp(xferOp) ? xferOp.getSource() : Value();
538  }
539 };
540 
541 template <typename OpTy>
542 LogicalResult checkPrepareXferOp(OpTy xferOp,
544  if (xferOp->hasAttr(kPassLabel))
545  return failure();
546  if (xferOp.getVectorType().getRank() <= options.targetRank)
547  return failure();
548  // Currently the unpacking of the leading dimension into the memref is not
549  // supported for scalable dimensions.
550  if (xferOp.getVectorType().getScalableDims().front())
551  return failure();
552  if (isTensorOp(xferOp) && !options.lowerTensors)
553  return failure();
554  // Transfer ops that modify the element type are not supported atm.
555  if (xferOp.getVectorType().getElementType() !=
556  xferOp.getShapedType().getElementType())
557  return failure();
558  return success();
559 }
560 
561 /// Prepare a TransferReadOp for progressive lowering.
562 ///
563 /// 1. Allocate a temporary buffer.
564 /// 2. Label the TransferReadOp, marking it eligible for progressive lowering.
565 /// 3. Store the result of the TransferReadOp into the temporary buffer.
566 /// 4. Load the result from the temporary buffer and replace all uses of the
567 /// original TransferReadOp with this load.
568 ///
569 /// E.g.:
570 /// ```
571 /// %vec = vector.transfer_read %A[%a, %b, %c], %cst
572 /// : vector<5x4xf32>, memref<?x?x?xf32>
573 /// ```
574 /// is rewritten to:
575 /// ```
576 /// %0 = memref.alloca() : memref<vector<5x4xf32>>
577 /// %1 = vector.transfer_read %A[%a, %b, %c], %cst
578 /// { __vector_to_scf_lowering__ } : vector<5x4xf32>, memref<?x?x?xf32>
579 /// memref.store %1, %0[] : memref<vector<5x4xf32>>
580 /// %vec = memref.load %0[] : memref<vector<5x4xf32>>
581 /// ```
582 ///
583 /// Note: A second temporary buffer may be allocated for the `mask` operand.
584 struct PrepareTransferReadConversion
585  : public VectorToSCFPattern<TransferReadOp> {
586  using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern;
587 
588  LogicalResult matchAndRewrite(TransferReadOp xferOp,
589  PatternRewriter &rewriter) const override {
590  if (checkPrepareXferOp(xferOp, options).failed())
591  return failure();
592 
593  auto buffers = allocBuffers(rewriter, xferOp);
594  auto *newXfer = rewriter.clone(*xferOp.getOperation());
595  newXfer->setAttr(kPassLabel, rewriter.getUnitAttr());
596  if (xferOp.getMask()) {
597  dyn_cast<TransferReadOp>(newXfer).getMaskMutable().assign(
598  buffers.maskBuffer);
599  }
600 
601  Location loc = xferOp.getLoc();
602  rewriter.create<memref::StoreOp>(loc, newXfer->getResult(0),
603  buffers.dataBuffer);
604  rewriter.replaceOpWithNewOp<memref::LoadOp>(xferOp, buffers.dataBuffer);
605 
606  return success();
607  }
608 };
609 
610 /// Prepare a TransferWriteOp for progressive lowering.
611 ///
612 /// 1. Allocate a temporary buffer.
613 /// 2. Store the vector into the buffer.
614 /// 3. Load the vector from the buffer again.
615 /// 4. Use the loaded vector as a TransferWriteOp operand and label the op,
616 /// marking it eligible for progressive lowering via TransferOpConversion.
617 ///
618 /// E.g.:
619 /// ```
620 /// vector.transfer_write %vec, %A[%a, %b, %c]
621 /// : vector<5x4xf32>, memref<?x?x?xf32>
622 /// ```
623 /// is rewritten to:
624 /// ```
625 /// %0 = memref.alloca() : memref<vector<5x4xf32>>
626 /// memref.store %vec, %0[] : memref<vector<5x4xf32>>
627 /// %1 = memref.load %0[] : memref<vector<5x4xf32>>
628 /// vector.transfer_write %1, %A[%a, %b, %c] { __vector_to_scf_lowering__ }
629 /// : vector<5x4xf32>, memref<?x?x?xf32>
630 /// ```
631 ///
632 /// Note: A second temporary buffer may be allocated for the `mask` operand.
633 struct PrepareTransferWriteConversion
634  : public VectorToSCFPattern<TransferWriteOp> {
635  using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern;
636 
637  LogicalResult matchAndRewrite(TransferWriteOp xferOp,
638  PatternRewriter &rewriter) const override {
639  if (checkPrepareXferOp(xferOp, options).failed())
640  return failure();
641 
642  Location loc = xferOp.getLoc();
643  auto buffers = allocBuffers(rewriter, xferOp);
644  rewriter.create<memref::StoreOp>(loc, xferOp.getVector(),
645  buffers.dataBuffer);
646  auto loadedVec = rewriter.create<memref::LoadOp>(loc, buffers.dataBuffer);
647  rewriter.updateRootInPlace(xferOp, [&]() {
648  xferOp.getVectorMutable().assign(loadedVec);
649  xferOp->setAttr(kPassLabel, rewriter.getUnitAttr());
650  });
651 
652  if (xferOp.getMask()) {
653  rewriter.updateRootInPlace(xferOp, [&]() {
654  xferOp.getMaskMutable().assign(buffers.maskBuffer);
655  });
656  }
657 
658  return success();
659  }
660 };
661 
662 /// Decompose a n-D PrintOp into a loop of elementary/scalar prints. This allows
663 /// printing both 1D scalable vectors and n-D fixed size vectors.
664 ///
665 /// E.g.:
666 /// ```
667 /// vector.print %v : vector<[4]xi32>
668 /// ```
669 /// is rewritten to:
670 /// ```
671 /// %c0 = arith.constant 0 : index
672 /// %c4 = arith.constant 4 : index
673 /// %c1 = arith.constant 1 : index
674 /// %vscale = vector.vscale
675 /// %length = arith.muli %vscale, %c4 : index
676 /// %lastIndex = arith.subi %length, %c1 : index
677 /// vector.print punctuation <open>
678 /// scf.for %i = %c0 to %length step %c1 {
679 /// %el = vector.extractelement %v[%i : index] : vector<[4]xi32>
680 /// vector.print %el : i32 punctuation <no_punctuation>
681 /// %notLastIndex = arith.cmpi ult, %i, %lastIndex : index
682 /// scf.if %notLastIndex {
683 /// vector.print punctuation <comma>
684 /// }
685 /// }
686 /// vector.print punctuation <close>
687 /// vector.print
688 /// ```
689 struct DecomposePrintOpConversion : public VectorToSCFPattern<vector::PrintOp> {
690  using VectorToSCFPattern<vector::PrintOp>::VectorToSCFPattern;
691  LogicalResult matchAndRewrite(vector::PrintOp printOp,
692  PatternRewriter &rewriter) const override {
693  if (!printOp.getSource())
694  return failure();
695 
696  VectorType vectorType = dyn_cast<VectorType>(printOp.getPrintType());
697  if (!vectorType)
698  return failure();
699 
700  // Currently >= 2D scalable vectors are not supported.
701  // These can't be lowered to LLVM (as LLVM does not support scalable vectors
702  // of scalable vectors), and due to limitations of current ops can't be
703  // indexed with SSA values or flattened. This may change after
704  // https://reviews.llvm.org/D155034, though there still needs to be a path
705  // for lowering to LLVM.
706  if (vectorType.getRank() > 1 && vectorType.isScalable())
707  return failure();
708 
709  auto loc = printOp.getLoc();
710  auto value = printOp.getSource();
711 
712  if (auto intTy = dyn_cast<IntegerType>(vectorType.getElementType())) {
713  // Oddly sized integers are (somewhat) buggy on a lot of backends, so to
714  // avoid issues extend them to a more standard size.
715  // https://github.com/llvm/llvm-project/issues/30613
716  auto width = intTy.getWidth();
717  auto legalWidth = llvm::NextPowerOf2(std::max(8u, width) - 1);
718  auto legalIntTy = IntegerType::get(rewriter.getContext(), legalWidth,
719  intTy.getSignedness());
720  // arith can only take signless integers, so we must cast back and forth.
721  auto signlessSourceVectorType =
722  vectorType.cloneWith({}, getIntTypeWithSignlessSemantics(intTy));
723  auto signlessTargetVectorType =
724  vectorType.cloneWith({}, getIntTypeWithSignlessSemantics(legalIntTy));
725  auto targetVectorType = vectorType.cloneWith({}, legalIntTy);
726  value = rewriter.create<vector::BitCastOp>(loc, signlessSourceVectorType,
727  value);
728  if (width == 1 || intTy.isUnsigned())
729  value = rewriter.create<arith::ExtUIOp>(loc, signlessTargetVectorType,
730  value);
731  else
732  value = rewriter.create<arith::ExtSIOp>(loc, signlessTargetVectorType,
733  value);
734  value = rewriter.create<vector::BitCastOp>(loc, targetVectorType, value);
735  vectorType = targetVectorType;
736  }
737 
738  auto scalableDimensions = vectorType.getScalableDims();
739  auto shape = vectorType.getShape();
740  constexpr int64_t singletonShape[] = {1};
741  if (vectorType.getRank() == 0)
742  shape = singletonShape;
743 
744  if (vectorType.getRank() != 1) {
745  // Flatten n-D vectors to 1D. This is done to allow indexing with a
746  // non-constant value (which can currently only be done via
747  // vector.extractelement for 1D vectors).
748  auto flatLength = std::accumulate(shape.begin(), shape.end(), 1,
749  std::multiplies<int64_t>());
750  auto flatVectorType =
751  VectorType::get({flatLength}, vectorType.getElementType());
752  value = rewriter.create<vector::ShapeCastOp>(loc, flatVectorType, value);
753  }
754 
755  vector::PrintOp firstClose;
756  SmallVector<Value, 8> loopIndices;
757  for (unsigned d = 0; d < shape.size(); d++) {
758  // Setup loop bounds and step.
759  Value lowerBound = rewriter.create<arith::ConstantIndexOp>(loc, 0);
760  Value upperBound = rewriter.create<arith::ConstantIndexOp>(loc, shape[d]);
761  Value step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
762  if (!scalableDimensions.empty() && scalableDimensions[d]) {
763  auto vscale = rewriter.create<vector::VectorScaleOp>(
764  loc, rewriter.getIndexType());
765  upperBound = rewriter.create<arith::MulIOp>(loc, upperBound, vscale);
766  }
767  auto lastIndex = rewriter.create<arith::SubIOp>(loc, upperBound, step);
768 
769  // Create a loop to print the elements surrounded by parentheses.
770  rewriter.create<vector::PrintOp>(loc, vector::PrintPunctuation::Open);
771  auto loop =
772  rewriter.create<scf::ForOp>(loc, lowerBound, upperBound, step);
773  auto printClose = rewriter.create<vector::PrintOp>(
774  loc, vector::PrintPunctuation::Close);
775  if (!firstClose)
776  firstClose = printClose;
777 
778  auto loopIdx = loop.getInductionVar();
779  loopIndices.push_back(loopIdx);
780 
781  // Print a comma after all but the last element.
782  rewriter.setInsertionPointToStart(loop.getBody());
783  auto notLastIndex = rewriter.create<arith::CmpIOp>(
784  loc, arith::CmpIPredicate::ult, loopIdx, lastIndex);
785  rewriter.create<scf::IfOp>(loc, notLastIndex,
786  [&](OpBuilder &builder, Location loc) {
787  builder.create<vector::PrintOp>(
788  loc, vector::PrintPunctuation::Comma);
789  builder.create<scf::YieldOp>(loc);
790  });
791 
792  rewriter.setInsertionPointToStart(loop.getBody());
793  }
794 
795  // Compute the flattened index.
796  // Note: For the > rank 1 vectors this assumes non-scalable.
797  Value flatIndex;
798  auto currentStride = 1;
799  for (int d = shape.size() - 1; d >= 0; d--) {
800  auto stride = rewriter.create<arith::ConstantIndexOp>(loc, currentStride);
801  auto index = rewriter.create<arith::MulIOp>(loc, stride, loopIndices[d]);
802  if (flatIndex)
803  flatIndex = rewriter.create<arith::AddIOp>(loc, flatIndex, index);
804  else
805  flatIndex = index;
806  currentStride *= shape[d];
807  }
808 
809  // Print the scalar elements in the inner most loop.
810  auto element =
811  rewriter.create<vector::ExtractElementOp>(loc, value, flatIndex);
812  rewriter.create<vector::PrintOp>(loc, element,
813  vector::PrintPunctuation::NoPunctuation);
814 
815  rewriter.setInsertionPointAfter(firstClose);
816  rewriter.create<vector::PrintOp>(loc, printOp.getPunctuation());
817  rewriter.eraseOp(printOp);
818  return success();
819  }
820 
821  static IntegerType getIntTypeWithSignlessSemantics(IntegerType intTy) {
822  return IntegerType::get(intTy.getContext(), intTy.getWidth(),
823  IntegerType::Signless);
824  };
825 };
826 
827 /// Progressive lowering of vector transfer ops: Unpack one dimension.
828 ///
829 /// 1. Unpack one dimension from the current buffer type and cast the buffer
830 /// to that new type. E.g.:
831 /// ```
832 /// %vec = memref.load %0[%1] : memref<5xvector<4x3xf32>>
833 /// vector.transfer_write %vec ...
834 /// ```
835 /// The following cast is generated:
836 /// ```
837 /// %casted = vector.type_cast %0
838 /// : memref<5xvector<4x3xf32>> to memref<5x4xvector<3xf32>>
839 /// ```
840 /// 2. Generate a for loop and rewrite the transfer op according to the
841 /// corresponding Strategy<OpTy>. If the to-be-unpacked dimension can be
842 /// out-of-bounds, generate an if-check and handle both cases separately.
843 /// 3. Clean up according to the corresponding Strategy<OpTy>.
844 ///
845 /// Note: If the transfer op is a TransferWriteOp and operates on a tensor
846 /// source (as opposed to a memref source), then each iteration of the generated
847 /// scf.for loop yields the new tensor value. E.g.:
848 /// ```
849 /// %result = scf.for i = 0 to 5 {
850 /// %0 = memref.load %buffer[i] : memref<5xvector<4x3xf32>>
851 /// %1 = vector.transfer_write %0, %source[...]
852 /// : vector<4x3xf32>, tensor<5x4x3xf32>
853 /// scf.yield %1 : tensor<5x4x3xf32>
854 /// }
855 /// ```
856 template <typename OpTy>
857 struct TransferOpConversion : public VectorToSCFPattern<OpTy> {
858  using VectorToSCFPattern<OpTy>::VectorToSCFPattern;
859 
860  void initialize() {
861  // This pattern recursively unpacks one dimension at a time. The recursion
862  // bounded as the rank is strictly decreasing.
863  this->setHasBoundedRewriteRecursion();
864  }
865 
866  LogicalResult matchAndRewrite(OpTy xferOp,
867  PatternRewriter &rewriter) const override {
868  if (!xferOp->hasAttr(kPassLabel))
869  return failure();
870 
871  // Find and cast data buffer. How the buffer can be found depends on OpTy.
872  ImplicitLocOpBuilder locB(xferOp.getLoc(), rewriter);
873  auto dataBuffer = Strategy<OpTy>::getBuffer(xferOp);
874  auto dataBufferType = dyn_cast<MemRefType>(dataBuffer.getType());
875  auto castedDataType = unpackOneDim(dataBufferType);
876  if (failed(castedDataType))
877  return failure();
878 
879  auto castedDataBuffer =
880  locB.create<vector::TypeCastOp>(*castedDataType, dataBuffer);
881 
882  // If the xferOp has a mask: Find and cast mask buffer.
883  Value castedMaskBuffer;
884  if (xferOp.getMask()) {
885  auto maskBuffer = getMaskBuffer(xferOp);
886  auto maskBufferType = dyn_cast<MemRefType>(maskBuffer.getType());
887  if (xferOp.isBroadcastDim(0) || xferOp.getMaskType().getRank() == 1) {
888  // Do not unpack a dimension of the mask, if:
889  // * To-be-unpacked transfer op dimension is a broadcast.
890  // * Mask is 1D, i.e., the mask cannot be further unpacked.
891  // (That means that all remaining dimensions of the transfer op must
892  // be broadcasted.)
893  castedMaskBuffer = maskBuffer;
894  } else {
895  // It's safe to assume the mask buffer can be unpacked if the data
896  // buffer was unpacked.
897  auto castedMaskType = *unpackOneDim(maskBufferType);
898  castedMaskBuffer =
899  locB.create<vector::TypeCastOp>(castedMaskType, maskBuffer);
900  }
901  }
902 
903  // Loop bounds and step.
904  auto lb = locB.create<arith::ConstantIndexOp>(0);
905  auto ub = locB.create<arith::ConstantIndexOp>(
906  castedDataType->getDimSize(castedDataType->getRank() - 1));
907  auto step = locB.create<arith::ConstantIndexOp>(1);
908  // TransferWriteOps that operate on tensors return the modified tensor and
909  // require a loop state.
910  auto loopState = Strategy<OpTy>::initialLoopState(xferOp);
911 
912  // Generate for loop.
913  auto result = locB.create<scf::ForOp>(
914  lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
915  [&](OpBuilder &b, Location loc, Value iv, ValueRange loopState) {
916  Type stateType = loopState.empty() ? Type() : loopState[0].getType();
917 
918  auto result = generateInBoundsCheck(
919  b, xferOp, iv, unpackedDim(xferOp),
920  stateType ? TypeRange(stateType) : TypeRange(),
921  /*inBoundsCase=*/
922  [&](OpBuilder &b, Location loc) {
923  // Create new transfer op.
924  OpTy newXfer = Strategy<OpTy>::rewriteOp(
925  b, this->options, xferOp, castedDataBuffer, iv, loopState);
926 
927  // If old transfer op has a mask: Set mask on new transfer op.
928  // Special case: If the mask of the old transfer op is 1D and
929  // the
930  // unpacked dim is not a broadcast, no mask is
931  // needed on the new transfer op.
932  if (xferOp.getMask() && (xferOp.isBroadcastDim(0) ||
933  xferOp.getMaskType().getRank() > 1)) {
934  OpBuilder::InsertionGuard guard(b);
935  b.setInsertionPoint(newXfer); // Insert load before newXfer.
936 
937  SmallVector<Value, 8> loadIndices;
938  Strategy<OpTy>::getBufferIndices(xferOp, loadIndices);
939  // In case of broadcast: Use same indices to load from memref
940  // as before.
941  if (!xferOp.isBroadcastDim(0))
942  loadIndices.push_back(iv);
943 
944  auto mask = b.create<memref::LoadOp>(loc, castedMaskBuffer,
945  loadIndices);
946  rewriter.updateRootInPlace(newXfer, [&]() {
947  newXfer.getMaskMutable().assign(mask);
948  });
949  }
950 
951  return loopState.empty() ? Value() : newXfer->getResult(0);
952  },
953  /*outOfBoundsCase=*/
954  [&](OpBuilder &b, Location /*loc*/) {
955  return Strategy<OpTy>::handleOutOfBoundsDim(
956  b, xferOp, castedDataBuffer, iv, loopState);
957  });
958 
959  maybeYieldValue(b, loc, !loopState.empty(), result);
960  });
961 
962  Strategy<OpTy>::cleanup(rewriter, xferOp, result);
963  return success();
964  }
965 };
966 
967 } // namespace lowering_n_d
968 
970 
971 /// If the original transfer op has a mask, compute the mask of the new transfer
972 /// op (for the current iteration `i`) and assign it.
973 template <typename OpTy>
974 static void maybeAssignMask(OpBuilder &b, OpTy xferOp, OpTy newXferOp,
975  int64_t i) {
976  if (!xferOp.getMask())
977  return;
978 
979  if (xferOp.isBroadcastDim(0)) {
980  // To-be-unpacked dimension is a broadcast, which does not have a
981  // corresponding mask dimension. Mask attribute remains unchanged.
982  newXferOp.getMaskMutable().assign(xferOp.getMask());
983  return;
984  }
985 
986  if (xferOp.getMaskType().getRank() > 1) {
987  // Unpack one dimension of the mask.
988  OpBuilder::InsertionGuard guard(b);
989  b.setInsertionPoint(newXferOp); // Insert load before newXfer.
990 
991  llvm::SmallVector<int64_t, 1> indices({i});
992  Location loc = xferOp.getLoc();
993  auto newMask = b.create<vector::ExtractOp>(loc, xferOp.getMask(), indices);
994  newXferOp.getMaskMutable().assign(newMask);
995  }
996 
997  // If we end up here: The mask of the old transfer op is 1D and the unpacked
998  // dim is not a broadcast, so no mask is needed on the new transfer op.
999  // `generateInBoundsCheck` will have evaluated the mask already.
1000 }
1001 
1002 /// Progressive lowering of vector TransferReadOp with unrolling: Unpack one
1003 /// dimension. This is similar to TransferOpConversion<TransferReadOp>, but no
1004 /// memref buffer is allocated and the SCF loop is fully unrolled.
1005 ///
1006 /// ```
1007 /// E.g.:
1008 /// ```
1009 /// %vec = vector.transfer_read %A[%a, %b, %c], %padding
1010 /// : memref<?x?x?xf32>, vector<5x4xf32>
1011 /// ```
1012 /// is rewritten to IR such as (simplified):
1013 /// ```
1014 /// %v_init = splat %padding : vector<5x4xf32>
1015 /// %tmp0 = vector.transfer_read %A[%a, %b, %c], %padding
1016 /// : memref<?x?x?xf32>, vector<4xf32>
1017 /// %v0 = vector.insert %tmp0, %v_init[0] : vector<4xf32> into vector<5x4xf32>
1018 /// %tmp1 = vector.transfer_read %A[%a, %b + 1, %c], %padding
1019 /// : memref<?x?x?xf32>, vector<4xf32>
1020 /// %v1 = vector.insert %tmp1, %v0[1] : vector<4xf32> into vector<5x4xf32>
1021 /// ...
1022 /// %tmp4 = vector.transfer_read %A[%a, %b + 4, %c], %padding
1023 /// : memref<?x?x?xf32>, vector<4xf32>
1024 /// %vec = vector.insert %tmp1, %v3[4] : vector<4xf32> into vector<5x4xf32>
1025 /// ```
1026 ///
1027 /// Note: As an optimization, if the result of the original TransferReadOp
1028 /// was directly inserted into another vector, no new %v_init vector is created.
1029 /// Instead, the new TransferReadOp results are inserted into that vector.
1030 struct UnrollTransferReadConversion
1031  : public VectorToSCFPattern<TransferReadOp> {
1032  using VectorToSCFPattern<TransferReadOp>::VectorToSCFPattern;
1033 
1034  void initialize() {
1035  // This pattern recursively unpacks one dimension at a time. The recursion
1036  // bounded as the rank is strictly decreasing.
1037  setHasBoundedRewriteRecursion();
1038  }
1039 
1040  /// Return the vector into which the newly created TransferReadOp results
1041  /// are inserted.
1042  Value getResultVector(TransferReadOp xferOp,
1043  PatternRewriter &rewriter) const {
1044  if (auto insertOp = getInsertOp(xferOp))
1045  return insertOp.getDest();
1046  Location loc = xferOp.getLoc();
1047  return rewriter.create<vector::SplatOp>(loc, xferOp.getVectorType(),
1048  xferOp.getPadding());
1049  }
1050 
1051  /// If the result of the TransferReadOp has exactly one user, which is a
1052  /// vector::InsertOp, return that operation.
1053  vector::InsertOp getInsertOp(TransferReadOp xferOp) const {
1054  if (xferOp->hasOneUse()) {
1055  Operation *xferOpUser = *xferOp->getUsers().begin();
1056  if (auto insertOp = dyn_cast<vector::InsertOp>(xferOpUser))
1057  return insertOp;
1058  }
1059 
1060  return vector::InsertOp();
1061  }
1062 
1063  /// If the result of the TransferReadOp has exactly one user, which is a
1064  /// vector::InsertOp, return that operation's indices.
1065  void getInsertionIndices(TransferReadOp xferOp,
1066  SmallVectorImpl<OpFoldResult> &indices) const {
1067  if (auto insertOp = getInsertOp(xferOp)) {
1068  auto pos = insertOp.getMixedPosition();
1069  indices.append(pos.begin(), pos.end());
1070  }
1071  }
1072 
1073  /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
1074  /// accesses, and broadcasts and transposes in permutation maps.
1075  LogicalResult matchAndRewrite(TransferReadOp xferOp,
1076  PatternRewriter &rewriter) const override {
1077  if (xferOp.getVectorType().getRank() <= options.targetRank)
1078  return failure();
1079  if (isTensorOp(xferOp) && !options.lowerTensors)
1080  return failure();
1081  // Transfer ops that modify the element type are not supported atm.
1082  if (xferOp.getVectorType().getElementType() !=
1083  xferOp.getShapedType().getElementType())
1084  return failure();
1085 
1086  auto insertOp = getInsertOp(xferOp);
1087  auto vec = getResultVector(xferOp, rewriter);
1088  auto vecType = dyn_cast<VectorType>(vec.getType());
1089  auto xferVecType = xferOp.getVectorType();
1090 
1091  if (xferVecType.getScalableDims()[0]) {
1092  // Cannot unroll a scalable dimension at compile time.
1093  return failure();
1094  }
1095 
1096  VectorType newXferVecType = VectorType::Builder(xferVecType).dropDim(0);
1097 
1098  int64_t dimSize = xferVecType.getShape()[0];
1099 
1100  // Generate fully unrolled loop of transfer ops.
1101  Location loc = xferOp.getLoc();
1102  for (int64_t i = 0; i < dimSize; ++i) {
1103  Value iv = rewriter.create<arith::ConstantIndexOp>(loc, i);
1104 
1105  vec = generateInBoundsCheck(
1106  rewriter, xferOp, iv, unpackedDim(xferOp), TypeRange(vecType),
1107  /*inBoundsCase=*/
1108  [&](OpBuilder &b, Location loc) {
1109  // Indices for the new transfer op.
1110  SmallVector<Value, 8> xferIndices;
1111  getXferIndices(b, xferOp, iv, xferIndices);
1112 
1113  // Indices for the new vector.insert op.
1114  SmallVector<OpFoldResult, 8> insertionIndices;
1115  getInsertionIndices(xferOp, insertionIndices);
1116  insertionIndices.push_back(rewriter.getIndexAttr(i));
1117 
1118  auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr());
1119  auto newXferOp = b.create<vector::TransferReadOp>(
1120  loc, newXferVecType, xferOp.getSource(), xferIndices,
1121  AffineMapAttr::get(unpackedPermutationMap(b, xferOp)),
1122  xferOp.getPadding(), Value(), inBoundsAttr);
1123  maybeAssignMask(b, xferOp, newXferOp, i);
1124  return b.create<vector::InsertOp>(loc, newXferOp, vec,
1125  insertionIndices);
1126  },
1127  /*outOfBoundsCase=*/
1128  [&](OpBuilder &b, Location loc) {
1129  // Loop through original (unmodified) vector.
1130  return vec;
1131  });
1132  }
1133 
1134  if (insertOp) {
1135  // Rewrite single user of the old TransferReadOp, which was an InsertOp.
1136  rewriter.replaceOp(insertOp, vec);
1137  rewriter.eraseOp(xferOp);
1138  } else {
1139  rewriter.replaceOp(xferOp, vec);
1140  }
1141 
1142  return success();
1143  }
1144 };
1145 
1146 /// Progressive lowering of vector TransferWriteOp with unrolling: Unpack one
1147 /// dimension. This is similar to TransferOpConversion<TransferWriteOp>, but no
1148 /// memref buffer is allocated and the SCF loop is fully unrolled.
1149 ///
1150 /// ```
1151 /// E.g.:
1152 /// ```
1153 /// vector.transfer_write %vec, %A[%a, %b, %c]
1154 /// : vector<5x4xf32>, memref<?x?x?xf32>
1155 /// ```
1156 /// is rewritten to IR such as (simplified):
1157 /// ```
1158 /// %v0 = vector.extract %vec[0] : vector<4xf32> from vector<5x4xf32>
1159 /// vector.transfer_write %v0, %A[%a, %b, %c] : vector<4xf32>, memref<...>
1160 /// %v1 = vector.extract %vec[1] : vector<4xf32> from vector<5x4xf32>
1161 /// vector.transfer_write %v1, %A[%a, %b + 1, %c] : vector<4xf32>, memref<...>
1162 /// ...
1163 /// %v4 = vector.extract %vec[4] : vector<4xf32> from vector<5x4xf32>
1164 /// vector.transfer_write %v4, %A[%a, %b + 4, %c] : vector<4xf32>, memref<...>
1165 /// ```
1166 ///
1167 /// Note: As an optimization, if the vector of the original TransferWriteOp
1168 /// was directly extracted from another vector via an ExtractOp `a`, extract
1169 /// the vectors for the newly generated TransferWriteOps from `a`'s input. By
1170 /// doing so, `a` may become dead, and the number of ExtractOps generated during
1171 /// recursive application of this pattern will be minimal.
1172 struct UnrollTransferWriteConversion
1173  : public VectorToSCFPattern<TransferWriteOp> {
1174  using VectorToSCFPattern<TransferWriteOp>::VectorToSCFPattern;
1175 
1176  void initialize() {
1177  // This pattern recursively unpacks one dimension at a time. The recursion
1178  // bounded as the rank is strictly decreasing.
1179  setHasBoundedRewriteRecursion();
1180  }
1181 
1182  /// Return the vector from which newly generated ExtracOps will extract.
1183  Value getDataVector(TransferWriteOp xferOp) const {
1184  if (auto extractOp = getExtractOp(xferOp))
1185  return extractOp.getVector();
1186  return xferOp.getVector();
1187  }
1188 
1189  /// If the input of the given TransferWriteOp is an ExtractOp, return it.
1190  vector::ExtractOp getExtractOp(TransferWriteOp xferOp) const {
1191  if (auto *op = xferOp.getVector().getDefiningOp())
1192  return dyn_cast<vector::ExtractOp>(op);
1193  return vector::ExtractOp();
1194  }
1195 
1196  /// If the input of the given TransferWriteOp is an ExtractOp, return its
1197  /// indices.
1198  void getExtractionIndices(TransferWriteOp xferOp,
1199  SmallVectorImpl<OpFoldResult> &indices) const {
1200  if (auto extractOp = getExtractOp(xferOp)) {
1201  auto pos = extractOp.getMixedPosition();
1202  indices.append(pos.begin(), pos.end());
1203  }
1204  }
1205 
1206  /// Rewrite the op: Unpack one dimension. Can handle masks, out-of-bounds
1207  /// accesses, and broadcasts and transposes in permutation maps.
1208  LogicalResult matchAndRewrite(TransferWriteOp xferOp,
1209  PatternRewriter &rewriter) const override {
1210  if (xferOp.getVectorType().getRank() <= options.targetRank)
1211  return failure();
1212  if (isTensorOp(xferOp) && !options.lowerTensors)
1213  return failure();
1214  // Transfer ops that modify the element type are not supported atm.
1215  if (xferOp.getVectorType().getElementType() !=
1216  xferOp.getShapedType().getElementType())
1217  return failure();
1218 
1219  auto vec = getDataVector(xferOp);
1220  auto xferVecType = xferOp.getVectorType();
1221  int64_t dimSize = xferVecType.getShape()[0];
1222  Value source = xferOp.getSource(); // memref or tensor to be written to.
1223  auto sourceType = isTensorOp(xferOp) ? xferOp.getShapedType() : Type();
1224 
1225  // Generate fully unrolled loop of transfer ops.
1226  Location loc = xferOp.getLoc();
1227  for (int64_t i = 0; i < dimSize; ++i) {
1228  Value iv = rewriter.create<arith::ConstantIndexOp>(loc, i);
1229 
1230  auto updatedSource = generateInBoundsCheck(
1231  rewriter, xferOp, iv, unpackedDim(xferOp),
1232  isTensorOp(xferOp) ? TypeRange(sourceType) : TypeRange(),
1233  /*inBoundsCase=*/
1234  [&](OpBuilder &b, Location loc) {
1235  // Indices for the new transfer op.
1236  SmallVector<Value, 8> xferIndices;
1237  getXferIndices(b, xferOp, iv, xferIndices);
1238 
1239  // Indices for the new vector.extract op.
1240  SmallVector<OpFoldResult, 8> extractionIndices;
1241  getExtractionIndices(xferOp, extractionIndices);
1242  extractionIndices.push_back(b.getI64IntegerAttr(i));
1243 
1244  auto extracted =
1245  b.create<vector::ExtractOp>(loc, vec, extractionIndices);
1246  auto inBoundsAttr = dropFirstElem(b, xferOp.getInBoundsAttr());
1247  auto newXferOp = b.create<vector::TransferWriteOp>(
1248  loc, sourceType, extracted, source, xferIndices,
1249  AffineMapAttr::get(unpackedPermutationMap(b, xferOp)), Value(),
1250  inBoundsAttr);
1251 
1252  maybeAssignMask(b, xferOp, newXferOp, i);
1253 
1254  return isTensorOp(xferOp) ? newXferOp->getResult(0) : Value();
1255  },
1256  /*outOfBoundsCase=*/
1257  [&](OpBuilder &b, Location loc) {
1258  return isTensorOp(xferOp) ? source : Value();
1259  });
1260 
1261  if (isTensorOp(xferOp))
1262  source = updatedSource;
1263  }
1264 
1265  if (isTensorOp(xferOp))
1266  rewriter.replaceOp(xferOp, source);
1267  else
1268  rewriter.eraseOp(xferOp);
1269 
1270  return success();
1271  }
1272 };
1273 
1274 } // namespace lowering_n_d_unrolled
1275 
1276 namespace lowering_1_d {
1277 
1278 /// Compute the indices into the memref for the LoadOp/StoreOp generated as
1279 /// part of TransferOp1dConversion. Return the memref dimension on which
1280 /// the transfer is operating. A return value of std::nullopt indicates a
1281 /// broadcast.
1282 template <typename OpTy>
1283 static std::optional<int64_t>
1284 get1dMemrefIndices(OpBuilder &b, OpTy xferOp, Value iv,
1285  SmallVector<Value, 8> &memrefIndices) {
1286  auto indices = xferOp.getIndices();
1287  auto map = xferOp.getPermutationMap();
1288  assert(xferOp.getTransferRank() > 0 && "unexpected 0-d transfer");
1289 
1290  memrefIndices.append(indices.begin(), indices.end());
1291  assert(map.getNumResults() == 1 &&
1292  "Expected 1 permutation map result for 1D transfer");
1293  if (auto expr = map.getResult(0).template dyn_cast<AffineDimExpr>()) {
1294  Location loc = xferOp.getLoc();
1295  auto dim = expr.getPosition();
1296  AffineExpr d0, d1;
1297  bindDims(xferOp.getContext(), d0, d1);
1298  Value offset = memrefIndices[dim];
1299  memrefIndices[dim] =
1300  affine::makeComposedAffineApply(b, loc, d0 + d1, {offset, iv});
1301  return dim;
1302  }
1303 
1304  assert(xferOp.isBroadcastDim(0) &&
1305  "Expected AffineDimExpr or AffineConstantExpr");
1306  return std::nullopt;
1307 }
1308 
1309 /// Codegen strategy for TransferOp1dConversion, depending on the
1310 /// operation.
1311 template <typename OpTy>
1312 struct Strategy1d;
1313 
1314 /// Codegen strategy for TransferReadOp.
1315 template <>
1316 struct Strategy1d<TransferReadOp> {
1317  static void generateForLoopBody(OpBuilder &b, Location loc,
1318  TransferReadOp xferOp, Value iv,
1319  ValueRange loopState) {
1320  SmallVector<Value, 8> indices;
1321  auto dim = get1dMemrefIndices(b, xferOp, iv, indices);
1322  auto vec = loopState[0];
1323 
1324  // In case of out-of-bounds access, leave `vec` as is (was initialized with
1325  // padding value).
1326  auto nextVec = generateInBoundsCheck(
1327  b, xferOp, iv, dim, TypeRange(xferOp.getVectorType()),
1328  /*inBoundsCase=*/
1329  [&](OpBuilder &b, Location loc) {
1330  Value val =
1331  b.create<memref::LoadOp>(loc, xferOp.getSource(), indices);
1332  return b.create<vector::InsertElementOp>(loc, val, vec, iv);
1333  },
1334  /*outOfBoundsCase=*/
1335  [&](OpBuilder & /*b*/, Location loc) { return vec; });
1336  b.create<scf::YieldOp>(loc, nextVec);
1337  }
1338 
1339  static Value initialLoopState(OpBuilder &b, TransferReadOp xferOp) {
1340  // Inititalize vector with padding value.
1341  Location loc = xferOp.getLoc();
1342  return b.create<vector::SplatOp>(loc, xferOp.getVectorType(),
1343  xferOp.getPadding());
1344  }
1345 };
1346 
1347 /// Codegen strategy for TransferWriteOp.
1348 template <>
1349 struct Strategy1d<TransferWriteOp> {
1350  static void generateForLoopBody(OpBuilder &b, Location loc,
1351  TransferWriteOp xferOp, Value iv,
1352  ValueRange /*loopState*/) {
1353  SmallVector<Value, 8> indices;
1354  auto dim = get1dMemrefIndices(b, xferOp, iv, indices);
1355 
1356  // Nothing to do in case of out-of-bounds access.
1357  generateInBoundsCheck(
1358  b, xferOp, iv, dim,
1359  /*inBoundsCase=*/[&](OpBuilder &b, Location loc) {
1360  auto val =
1361  b.create<vector::ExtractElementOp>(loc, xferOp.getVector(), iv);
1362  b.create<memref::StoreOp>(loc, val, xferOp.getSource(), indices);
1363  });
1364  b.create<scf::YieldOp>(loc);
1365  }
1366 
1367  static Value initialLoopState(OpBuilder &b, TransferWriteOp xferOp) {
1368  return Value();
1369  }
1370 };
1371 
1372 /// Lower a 1D vector transfer op to SCF using scalar loads/stores. This is
1373 /// necessary in cases where a 1D vector transfer op cannot be lowered into
1374 /// vector load/stores due to non-unit strides or broadcasts:
1375 ///
1376 /// * Transfer dimension is not the last memref dimension
1377 /// * Transfer dimension is a broadcast (i.e., scalar load + broadcast)
1378 /// * Memref has a layout map with non-unit stride on the last dimension
1379 ///
1380 /// This pattern generates IR as follows:
1381 ///
1382 /// 1. Generate a for loop iterating over each vector element.
1383 /// 2. Inside the loop, generate a InsertElementOp or ExtractElementOp,
1384 /// depending on OpTy.
1385 ///
1386 /// TODO: In some cases (no masking, etc.), LLVM::MatrixColumnMajorLoadOp
1387 /// can be generated instead of TransferOp1dConversion. Add such a pattern
1388 /// to ConvertVectorToLLVM.
1389 ///
1390 /// E.g.:
1391 /// ```
1392 /// vector.transfer_write %vec, %A[%a, %b]
1393 /// {permutation_map = affine_map<(d0, d1) -> (d0)>, in_bounds = [true]}
1394 /// : vector<9xf32>, memref<?x?xf32>
1395 /// ```
1396 /// Is rewritten to approximately the following pseudo-IR:
1397 /// ```
1398 /// for i = 0 to 9 {
1399 /// %t = vector.extractelement %vec[i] : vector<9xf32>
1400 /// memref.store %t, %arg0[%a + i, %b] : memref<?x?xf32>
1401 /// }
1402 /// ```
1403 template <typename OpTy>
1404 struct TransferOp1dConversion : public VectorToSCFPattern<OpTy> {
1405  using VectorToSCFPattern<OpTy>::VectorToSCFPattern;
1406 
1407  LogicalResult matchAndRewrite(OpTy xferOp,
1408  PatternRewriter &rewriter) const override {
1409  // TODO: support 0-d corner case.
1410  if (xferOp.getTransferRank() == 0)
1411  return failure();
1412  auto map = xferOp.getPermutationMap();
1413  auto memRefType = dyn_cast<MemRefType>(xferOp.getShapedType());
1414 
1415  if (!memRefType)
1416  return failure();
1417  if (xferOp.getVectorType().getRank() != 1)
1418  return failure();
1419  if (map.isMinorIdentity() && isLastMemrefDimUnitStride(memRefType))
1420  return failure(); // Handled by ConvertVectorToLLVM
1421 
1422  // Loop bounds, step, state...
1423  Location loc = xferOp.getLoc();
1424  auto vecType = xferOp.getVectorType();
1425  auto lb = rewriter.create<arith::ConstantIndexOp>(loc, 0);
1426  Value ub =
1427  rewriter.create<arith::ConstantIndexOp>(loc, vecType.getDimSize(0));
1428  if (vecType.isScalable()) {
1429  Value vscale =
1430  rewriter.create<vector::VectorScaleOp>(loc, rewriter.getIndexType());
1431  ub = rewriter.create<arith::MulIOp>(loc, ub, vscale);
1432  }
1433  auto step = rewriter.create<arith::ConstantIndexOp>(loc, 1);
1434  auto loopState = Strategy1d<OpTy>::initialLoopState(rewriter, xferOp);
1435 
1436  // Generate for loop.
1437  rewriter.replaceOpWithNewOp<scf::ForOp>(
1438  xferOp, lb, ub, step, loopState ? ValueRange(loopState) : ValueRange(),
1439  [&](OpBuilder &b, Location loc, Value iv, ValueRange loopState) {
1440  Strategy1d<OpTy>::generateForLoopBody(b, loc, xferOp, iv, loopState);
1441  });
1442 
1443  return success();
1444  }
1445 };
1446 
1447 } // namespace lowering_1_d
1448 } // namespace
1449 
1452  if (options.unroll) {
1453  patterns.add<lowering_n_d_unrolled::UnrollTransferReadConversion,
1454  lowering_n_d_unrolled::UnrollTransferWriteConversion>(
1455  patterns.getContext(), options);
1456  } else {
1457  patterns.add<lowering_n_d::PrepareTransferReadConversion,
1458  lowering_n_d::PrepareTransferWriteConversion,
1459  lowering_n_d::TransferOpConversion<TransferReadOp>,
1460  lowering_n_d::TransferOpConversion<TransferWriteOp>>(
1461  patterns.getContext(), options);
1462  }
1463 
1464  if (options.targetRank == 1) {
1465  patterns.add<lowering_1_d::TransferOp1dConversion<TransferReadOp>,
1466  lowering_1_d::TransferOp1dConversion<TransferWriteOp>>(
1467  patterns.getContext(), options);
1468  }
1469  patterns.add<lowering_n_d::DecomposePrintOpConversion>(patterns.getContext(),
1470  options);
1471 }
1472 
1473 namespace {
1474 
1475 struct ConvertVectorToSCFPass
1476  : public impl::ConvertVectorToSCFBase<ConvertVectorToSCFPass> {
1477  ConvertVectorToSCFPass() = default;
1478  ConvertVectorToSCFPass(const VectorTransferToSCFOptions &options) {
1479  this->fullUnroll = options.unroll;
1480  this->targetRank = options.targetRank;
1481  this->lowerTensors = options.lowerTensors;
1482  }
1483 
1484  void runOnOperation() override {
1486  options.unroll = fullUnroll;
1487  options.targetRank = targetRank;
1488  options.lowerTensors = lowerTensors;
1489 
1490  // Lower permutation maps first.
1491  RewritePatternSet lowerTransferPatterns(&getContext());
1493  lowerTransferPatterns);
1494  (void)applyPatternsAndFoldGreedily(getOperation(),
1495  std::move(lowerTransferPatterns));
1496 
1497  RewritePatternSet patterns(&getContext());
1499  (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
1500  }
1501 };
1502 
1503 } // namespace
1504 
1505 std::unique_ptr<Pass>
1507  return std::make_unique<ConvertVectorToSCFPass>(options);
1508 }
MLIR_CRUNNERUTILS_EXPORT void printClose()
static MLIRContext * getContext(OpFoldResult val)
static llvm::ManagedStatic< PassManagerOptions > options
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
static void printOp(llvm::raw_ostream &os, Operation *op, OpPrintingFlags &flags)
Definition: Unit.cpp:19
static void getXferIndices(RewriterBase &rewriter, TransferOpType xferOp, AffineMap offsetMap, ArrayRef< Value > dimValues, SmallVector< Value, 4 > &indices)
For a vector TransferOpType xferOp, an empty indices vector, and an AffineMap representing offsets to...
Definition: VectorToGPU.cpp:57
static Operation * getAutomaticAllocationScope(Operation *op)
Base type for affine expression.
Definition: AffineExpr.h:68
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition: AffineMap.h:44
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
IntegerAttr getIndexAttr(int64_t value)
Definition: Builders.cpp:124
UnitAttr getUnitAttr()
Definition: Builders.cpp:114
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:128
MLIRContext * getContext() const
Definition: Builders.h:55
IndexType getIndexType()
Definition: Builders.cpp:71
This class provides support for representing a failure result, or a valid value of type T.
Definition: LogicalResult.h:78
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
OpTy create(Args &&...args)
Create an operation of specific op type at the current insertion point and location.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:63
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:60
RAII guard to reset the insertion point of the builder when destroyed.
Definition: Builders.h:333
This class helps build Operations.
Definition: Builders.h:206
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition: Builders.cpp:528
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition: Builders.h:416
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition: Builders.h:383
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
Definition: Builders.cpp:446
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
Definition: Builders.h:397
A trait of region holding operations that define a new scope for automatic allocations,...
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
unsigned getNumRegions()
Returns the number of regions held by this operation.
Definition: Operation.h:652
Region & getRegion(unsigned index)
Returns the region held by this operation at position 'index'.
Definition: Operation.h:665
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:560
Operation * getParentWithTrait()
Returns the closest surrounding parent operation with trait Trait.
Definition: Operation.h:248
user_range getUsers()
Returns a range of all users.
Definition: Operation.h:852
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:727
Block & front()
Definition: Region.h:65
MLIRContext * getContext() const
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
virtual void replaceOp(Operation *op, ValueRange newValues)
This method replaces the results of the operation with the specified list of values.
void updateRootInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around a root update of an operation.
Definition: PatternMatch.h:606
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replaces the result op with a new op that is created without verification.
Definition: PatternMatch.h:539
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:36
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:372
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:93
Type getType() const
Return the type of this value.
Definition: Value.h:122
This is a builder type that keeps local references to arguments.
Definition: BuiltinTypes.h:312
Builder & dropDim(unsigned pos)
Erase a dim from shape @pos.
Definition: BuiltinTypes.h:346
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
Definition: AffineOps.cpp:1229
FailureOr< Value > getBuffer(RewriterBase &rewriter, Value value, const BufferizationOptions &options)
Lookup the buffer for the given value.
void populateVectorTransferPermutationMapLoweringPatterns(RewritePatternSet &patterns, PatternBenefit benefit=1)
Collect a set of transfer read/write lowering patterns that simplify the permutation map (e....
Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, int64_t dim)
Helper function that creates a memref::DimOp or tensor::DimOp depending on the type of source.
Definition: VectorUtils.cpp:37
This header declares functions that assist transformations in the MemRef dialect.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:62
bool isLastMemrefDimUnitStride(MemRefType type)
Return "true" if the last dimension of the given type has a static unit stride.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
Definition: AffineExpr.h:331
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
Definition: LogicalResult.h:56
void populateVectorToSCFConversionPatterns(RewritePatternSet &patterns, const VectorTransferToSCFOptions &options=VectorTransferToSCFOptions())
Collect a set of patterns to convert from the Vector dialect to SCF + func.
LogicalResult applyPatternsAndFoldGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
std::unique_ptr< Pass > createConvertVectorToSCFPass(const VectorTransferToSCFOptions &options=VectorTransferToSCFOptions())
Create a pass to convert a subset of vector ops to SCF.
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
Definition: LogicalResult.h:72
This class represents an efficient way to signal success or failure.
Definition: LogicalResult.h:26
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:357
When lowering an N-d vector transfer op to an (N-1)-d vector transfer op, a temporary buffer is creat...
Definition: VectorToSCF.h:52