MLIR  22.0.0git
XeGPUUnroll.cpp
Go to the documentation of this file.
1 //===- XeGPUUnroll.cpp - patterns to do unrolling ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains patterns for unrolling XeGPU operations. It follows a
10 // similar concept and design as vector unroll patterns, serving as a complement
11 // to them.
12 //
13 //===----------------------------------------------------------------------===//
14 
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/Support/DebugLog.h"
21 
22 namespace mlir {
23 namespace xegpu {
24 #define GEN_PASS_DEF_XEGPUUNROLL
25 #include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
26 } // namespace xegpu
27 } // namespace mlir
28 
29 #define DEBUG_TYPE "xegpu-unroll"
30 
31 using namespace mlir;
32 
33 namespace {
34 
35 template <typename SourceOp>
36 struct UnrollPattern : public OpRewritePattern<SourceOp> {
37  UnrollPattern(MLIRContext *context, const xegpu::UnrollOptions &options,
38  PatternBenefit benefit = 1)
39  : OpRewritePattern<SourceOp>(context, benefit), options(options) {}
40 
41 protected:
42  /// Return the target shape for the given `op`. Return std::nullopt if the
43  /// op shouldn't be or cannot be unrolled.
44  std::optional<SmallVector<int64_t>> getTargetShape(Operation *op) const {
45  LDBG() << "Get unroll shape for: " << *op;
46 
47  if (options.filterConstraint && failed(options.filterConstraint(op))) {
48  LDBG() << "--no filter constraint -> BAIL";
49  return std::nullopt;
50  }
51 
52  assert(options.nativeShape &&
53  "expects the native shape for native shape call back function.");
54  auto nativeShape = options.nativeShape(op);
55  return nativeShape;
56  }
57 
58  SmallVector<Type> getUnrolledTypes(ShapedType type,
59  ArrayRef<int64_t> tileShape,
60  bool returnSingleType = false) const {
61  return options.getUnrolledTypes(type, tileShape, returnSingleType);
62  }
63 
64  /// Emulate the the unpack behavior using insert_strided_slice for VectorType
65  /// values and unrealized_conversion_cast for TensorDescType values.
66  Value unpack(ValueRange srcs, Type destTy, ArrayRef<int64_t> blockSize,
67  Location loc, PatternRewriter &rewriter) const {
68  if (auto vecTy = dyn_cast<VectorType>(destTy)) {
69  auto shape = vecTy.getShape();
70  return xegpu::createVectorWithShapeFromValues(rewriter, loc, srcs, shape);
71  }
72 
73  if (isa<xegpu::TensorDescType>(destTy)) {
74  auto attr = NamedAttribute(rewriter.getStringAttr(unpackAttrName),
75  rewriter.getUnitAttr());
76  auto blkAttr = NamedAttribute(rewriter.getStringAttr(blockAttrName),
77  rewriter.getDenseI64ArrayAttr(blockSize));
78  auto castOp = UnrealizedConversionCastOp::create(
79  rewriter, loc, destTy, srcs,
80  ArrayRef<NamedAttribute>({attr, blkAttr}));
81  return castOp.getResult(0);
82  }
83 
84  llvm_unreachable("Unexpected destTy.");
85  return Value();
86  }
87 
88  /// Emulate the the pack behavior using extract_strided_slice for VectorType
89  /// values and unrealized_conversion_cast for TensorDescType values.
90  SmallVector<Value> pack(Value src, TypeRange destTypes,
91  ArrayRef<int64_t> blockSize, Location loc,
92  PatternRewriter &rewriter) const {
93  if (auto vecTy = dyn_cast<VectorType>(src.getType())) {
94  return xegpu::extractVectorsWithShapeFromValue(rewriter, loc, src,
95  blockSize);
96  }
97 
98  if (isa<xegpu::TensorDescType>(src.getType())) {
99  auto attr = NamedAttribute(rewriter.getStringAttr(packAttrName),
100  rewriter.getUnitAttr());
101  auto blkAttr = NamedAttribute(rewriter.getStringAttr(blockAttrName),
102  rewriter.getDenseI64ArrayAttr(blockSize));
103  auto castOp = UnrealizedConversionCastOp::create(
104  rewriter, loc, destTypes, src,
105  ArrayRef<NamedAttribute>({attr, blkAttr}));
106  return castOp.getResults();
107  }
108 
109  llvm_unreachable("Unexpected src type.");
110  return SmallVector<Value>();
111  }
112 
113 private:
114  const char *const packAttrName = "__xegpu_blocking_pack__";
115  const char *const unpackAttrName = "__xegpu_blocking_unpack__";
116  const char *const blockAttrName = "__xegpu_blocking_tile_shape__";
117 
119 };
120 
121 // Generic helper function for unrolling operations with offsets.
122 //
123 // Iterates over tile offsets within the tensor descriptor shape and calls
124 // the provided createOp function for each computed offset. This is used by
125 // operations like LoadNd, StoreNd, CreateNdDesc, and PrefetchNd when they
126 // have explicit offsets that need to be adjusted for each unrolled tile.
127 SmallVector<Value> computeUnrolledOffsets(
128  SmallVector<OpFoldResult> mixedOffsets, xegpu::TensorDescType tdescTy,
129  ArrayRef<int64_t> targetShape,
130  const std::function<Value(SmallVector<OpFoldResult>)> &createOp,
131  Location loc, PatternRewriter &rewriter) {
132  int64_t rank = tdescTy.getRank();
133  ArrayRef<int64_t> shape = tdescTy.getShape();
134 
135  auto addi = [&](OpFoldResult a, int64_t b) -> Value {
136  std::optional<int64_t> maybeInt = getConstantIntValue(a);
137  if (maybeInt) {
138  return arith::ConstantIndexOp::create(rewriter, loc, *maybeInt + b);
139  } else {
140  auto aV = llvm::cast<Value>(a);
141  auto bV = arith::ConstantIndexOp::create(rewriter, loc, b);
142  return rewriter.createOrFold<arith::AddIOp>(loc, aV, bV);
143  }
144  };
145 
146  SmallVector<OpFoldResult> oldOffsets = llvm::to_vector(
147  llvm::drop_begin(mixedOffsets, mixedOffsets.size() - rank));
148  auto validIdxes =
149  llvm::seq<int64_t>(mixedOffsets.size() - rank, mixedOffsets.size());
150 
151  SmallVector<Value> newOps;
152  for (SmallVector<int64_t> offsets :
153  StaticTileOffsetRange(shape, targetShape)) {
154 
155  for (auto [idx, oldOff, offset] :
156  llvm::zip(validIdxes, oldOffsets, offsets))
157  mixedOffsets[idx] = addi(oldOff, offset);
158 
159  auto newOp = createOp(mixedOffsets);
160  newOps.push_back(newOp);
161  }
162  return newOps;
163 }
164 
165 struct UnrollCreateNdOp : public UnrollPattern<xegpu::CreateNdDescOp> {
166  using UnrollPattern<xegpu::CreateNdDescOp>::UnrollPattern;
167  LogicalResult matchAndRewrite(xegpu::CreateNdDescOp op,
168  PatternRewriter &rewriter) const override {
169  Location loc = op.getLoc();
170  xegpu::TensorDescType tdescTy = op.getType();
171 
172  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
173  if (!targetShape)
174  return failure();
175 
176  SmallVector<Value> newOps;
177 
178  auto newTdescTy = getUnrolledTypes(tdescTy, *targetShape)[0];
179  bool hasOffsets = op.getMixedOffsets().size() != 0;
180  if (!hasOffsets) {
181  auto newOp = xegpu::CreateNdDescOp::create(
182  rewriter, loc, newTdescTy, op.getSource(), op.getMixedSizes(),
183  op.getMixedStrides());
184  newOps.push_back(newOp);
185  } else {
186  auto createOp = [&](SmallVector<OpFoldResult> offsets) -> Value {
187  return xegpu::CreateNdDescOp::create(
188  rewriter, loc, newTdescTy, op.getSource(), offsets,
189  op.getMixedSizes(), op.getMixedStrides());
190  };
191 
192  newOps = computeUnrolledOffsets(op.getMixedOffsets(), tdescTy,
193  *targetShape, createOp, loc, rewriter);
194  }
195  Value castOp = unpack(newOps, tdescTy, *targetShape, loc, rewriter);
196  rewriter.replaceOp(op, castOp);
197 
198  return success();
199  }
200 };
201 
202 struct UnrollUpdateNdOffsetOp : public UnrollPattern<xegpu::UpdateNdOffsetOp> {
203  using UnrollPattern<xegpu::UpdateNdOffsetOp>::UnrollPattern;
204  LogicalResult matchAndRewrite(xegpu::UpdateNdOffsetOp op,
205  PatternRewriter &rewriter) const override {
206  Location loc = op.getLoc();
207  xegpu::TensorDescType tdescTy = op.getTensorDescType();
208 
209  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
210  if (!targetShape)
211  return failure();
212 
213  SmallVector<Type> convertedTdescTypes =
214  getUnrolledTypes(tdescTy, *targetShape);
215  SmallVector<Value> convertedTdesc = pack(
216  op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
217 
218  SmallVector<Value> newOps;
219  for (auto t : convertedTdesc) {
220  auto newOp = xegpu::UpdateNdOffsetOp::create(
221  rewriter, loc, t.getType(), t, op.getOffsets(), op.getConstOffsets());
222  newOps.push_back(newOp);
223  }
224  Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
225  rewriter.replaceOp(op, castOp);
226  return success();
227  }
228 };
229 
230 struct UnrollPrefetchNdOp : public UnrollPattern<xegpu::PrefetchNdOp> {
231  using UnrollPattern<xegpu::PrefetchNdOp>::UnrollPattern;
232  LogicalResult matchAndRewrite(xegpu::PrefetchNdOp op,
233  PatternRewriter &rewriter) const override {
234  Location loc = op.getLoc();
235  xegpu::TensorDescType tdescTy = op.getTensorDescType();
236 
237  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
238  if (!targetShape)
239  return failure();
240 
241  int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
242  bool hasOffsets = (offsetSize != 0) || op.getConstOffsetsAttr();
243 
244  SmallVector<Type> convertedTdescTypes = getUnrolledTypes(
245  tdescTy, *targetShape, /*returnSingleType*/ hasOffsets);
246 
247  SmallVector<Value> convertedTdesc = pack(
248  op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
249 
250  if (!hasOffsets) {
251  for (auto t : convertedTdesc)
252  xegpu::PrefetchNdOp::create(rewriter, loc, TypeRange(), t,
253  op->getAttrs());
254  } else {
255  auto createPrefetch = [&](SmallVector<OpFoldResult> offsets) -> Value {
256  xegpu::PrefetchNdOp::create(rewriter, loc, convertedTdesc[0], offsets,
257  op.getL1HintAttr(), op.getL2HintAttr(),
258  op.getL3HintAttr());
259  // return dummy Value to satisfy function's signature
260  return nullptr;
261  };
262 
263  computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
264  createPrefetch, loc, rewriter);
265  }
266 
267  rewriter.eraseOp(op);
268  return success();
269  }
270 };
271 
272 struct UnrollLoadNdOp : public UnrollPattern<xegpu::LoadNdOp> {
273  using UnrollPattern<xegpu::LoadNdOp>::UnrollPattern;
274  LogicalResult matchAndRewrite(xegpu::LoadNdOp op,
275  PatternRewriter &rewriter) const override {
276 
277  Location loc = op.getLoc();
278  VectorType valueTy = op.getType();
279  xegpu::TensorDescType tdescTy = op.getTensorDescType();
280 
281  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
282  if (!targetShape)
283  return failure();
284 
285  int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
286  bool hasOffsets = (offsetSize != 0) || op.getConstOffsetsAttr();
287 
288  Type elemTy = tdescTy.getElementType();
289  VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
290 
291  SmallVector<Type> convertedTdescTypes = getUnrolledTypes(
292  tdescTy, *targetShape, /*returnSingleType*/ hasOffsets);
293 
294  SmallVector<Value> convertedTdescs = pack(
295  op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
296  SmallVector<Value> newOps;
297 
298  if (!hasOffsets) {
299  for (auto t : convertedTdescs) {
300  auto newOp = xegpu::LoadNdOp::create(rewriter, loc, newValueTy, t,
301  op->getAttrs());
302  newOps.push_back(newOp);
303  }
304  } else {
305  auto createLoad = [&](SmallVector<OpFoldResult> offsets) {
306  return xegpu::LoadNdOp::create(
307  rewriter, loc, newValueTy, convertedTdescs[0], offsets,
308  op.getPackedAttr(), op.getTransposeAttr(), op.getL1HintAttr(),
309  op.getL2HintAttr(), op.getL3HintAttr());
310  };
311  newOps = computeUnrolledOffsets(op.getMixedOffsets(), tdescTy,
312  *targetShape, createLoad, loc, rewriter);
313  }
314 
315  Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
316 
317  rewriter.replaceOp(op, castOp);
318  return success();
319  }
320 };
321 
322 struct UnrollStoreNdOp : public UnrollPattern<xegpu::StoreNdOp> {
323  using UnrollPattern<xegpu::StoreNdOp>::UnrollPattern;
324  LogicalResult matchAndRewrite(xegpu::StoreNdOp op,
325  PatternRewriter &rewriter) const override {
326  Location loc = op.getLoc();
327  VectorType valueTy = op.getValueType();
328  xegpu::TensorDescType tdescTy = op.getTensorDescType();
329 
330  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
331  if (!targetShape)
332  return failure();
333 
334  int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
335  bool hasOffsets = (offsetSize != 0) || op.getConstOffsetsAttr();
336 
337  SmallVector<Type> convertedValTypes =
338  getUnrolledTypes(valueTy, *targetShape);
339  SmallVector<Type> convertedTdescTypes = getUnrolledTypes(
340  tdescTy, *targetShape, /*returnSingleType*/ hasOffsets);
341 
342  SmallVector<Value> convertedTdescs = pack(
343  op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
344 
345  SmallVector<Value> convertedValues =
346  pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
347  if (!hasOffsets) {
348  for (auto [v, t] : llvm::zip(convertedValues, convertedTdescs))
349  xegpu::StoreNdOp::create(rewriter, loc, v, t, op.getL1HintAttr(),
350  op.getL2HintAttr(), op.getL3HintAttr());
351  } else {
352  size_t valueIndex = 0;
353  auto createStore = [&](SmallVector<OpFoldResult> offsets) {
354  xegpu::StoreNdOp::create(rewriter, loc, convertedValues[valueIndex++],
355  convertedTdescs[0], offsets,
356  op.getL1HintAttr(), op.getL2HintAttr(),
357  op.getL3HintAttr());
358  // return dummy Value to satisfy function's signature
359  return nullptr;
360  };
361 
362  computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
363  createStore, loc, rewriter);
364  }
365 
366  rewriter.eraseOp(op);
367  return success();
368  }
369 };
370 
371 struct UnrollDpasOp : public UnrollPattern<xegpu::DpasOp> {
372  using UnrollPattern<xegpu::DpasOp>::UnrollPattern;
373  LogicalResult matchAndRewrite(xegpu::DpasOp op,
374  PatternRewriter &rewriter) const override {
375  Location loc = op.getLoc();
376 
377  // expecting every operands is a 2D Vector
378  if (llvm::any_of(op->getOperandTypes(), [&](Type type) {
379  auto vecTy = dyn_cast<VectorType>(type);
380  return !vecTy || vecTy.getRank() != 2;
381  }))
382  return failure();
383 
384  // A vector of 3 elements should be returned, representing M, K, N
385  // respectively.
386  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
387  if (!targetShape || targetShape->size() != 3)
388  return failure();
389  auto M = (*targetShape)[0];
390  auto K = (*targetShape)[1];
391  auto N = (*targetShape)[2];
392 
393  int64_t aBlockSize[2] = {M, K};
394  int64_t bBlockSize[2] = {K, N};
395  int64_t cBlockSize[2] = {M, N};
396 
397  auto packWrapper = [&](TypedValue<VectorType> val,
398  ArrayRef<int64_t> blockSize) {
399  VectorType type = val.getType();
400  std::optional<SmallVector<int64_t>> grids =
401  computeShapeRatio(type.getShape(), blockSize);
402  assert(grids && "Expecting grids to be computed.");
403  auto numNewOps = computeProduct(*grids);
404  if (numNewOps == 1)
405  return SmallVector<Value>({val});
406  VectorType newVecTy = type.cloneWith(blockSize, type.getElementType());
407  SmallVector<Type> convertedTypes(numNewOps, newVecTy);
408  SmallVector<Value> values =
409  pack(val, convertedTypes, blockSize, loc, rewriter);
410  return values;
411  };
412 
413  auto a = op.getLhs();
414  auto b = op.getRhs();
415  auto c = op.getAcc();
416 
417  auto aShape = a.getType().getShape();
418  auto bShape = b.getType().getShape();
419 
420  SmallVector<Value> aVals, bVals, cVals;
421  aVals = packWrapper(a, aBlockSize);
422  bVals = packWrapper(b, bBlockSize);
423 
424  if (c)
425  cVals = packWrapper(c, cBlockSize);
426 
427  // Skip the operation if every operand has an invalid blocking size (empty)
428  // or if the original shape matches the blocking size (size == 1).
429  auto ranges = c ? SmallVector<ValueRange>({aVals, bVals, cVals})
430  : SmallVector<ValueRange>({aVals, bVals});
431  if (llvm::any_of(ranges, [](auto &v) { return v.size() == 0; }) ||
432  llvm::all_of(ranges, [](auto &v) { return v.size() == 1; }))
433  return failure();
434 
435  VectorType resultTy = op.getResult().getType();
436  auto vecTy = VectorType::get(cBlockSize, resultTy.getElementType());
437 
438  int64_t mIters = aShape[0] / M;
439  int64_t kIters = aShape[1] / K;
440  int64_t nIters = bShape[1] / N;
441 
442  SmallVector<Value> newOps;
443  for (int64_t i = 0; i < mIters; ++i) {
444  for (int64_t j = 0; j < nIters; ++j) {
445  Value tmpC;
446  if (c)
447  tmpC = cVals[i * nIters + j]; // init with acc
448 
449  for (int64_t k = 0; k < kIters; ++k) {
450  Value aVec = aVals[i * kIters + k];
451  Value bVec = bVals[k * nIters + j];
452  SmallVector<Value> operands({aVec, bVec});
453  if (tmpC)
454  operands.push_back(tmpC);
455 
456  tmpC = xegpu::DpasOp::create(rewriter, loc, vecTy, operands,
457  op->getAttrs());
458  }
459  newOps.push_back(tmpC);
460  }
461  }
462  Value castOp = unpack(newOps, resultTy, cBlockSize, loc, rewriter);
463  rewriter.replaceOp(op, castOp);
464  return success();
465  }
466 };
467 
468 struct UnrollCreateDescOp : public UnrollPattern<xegpu::CreateDescOp> {
469  using UnrollPattern<xegpu::CreateDescOp>::UnrollPattern;
470  LogicalResult matchAndRewrite(xegpu::CreateDescOp op,
471  PatternRewriter &rewriter) const override {
472  Location loc = op.getLoc();
473  xegpu::TensorDescType tdescTy = op.getType();
474  TypedValue<::mlir::VectorType> indiceVec = op.getOffsets();
475  VectorType indiceVecTy = indiceVec.getType();
476 
477  if (!tdescTy.isScattered())
478  return failure();
479 
480  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
481  if (!targetShape)
482  return failure();
483 
484  SmallVector<int64_t> targetIndiceShape(*targetShape);
485  int64_t originalChunkSize = tdescTy.getChunkSizeAsInt();
486  // IndiceVec is 1 dim lower than tdescTy when chunkSize is larger than 1.
487  if (originalChunkSize > 1)
488  targetIndiceShape.pop_back();
489 
490  auto newTdescTy = getUnrolledTypes(tdescTy, *targetShape)[0];
491  SmallVector<Type> convertedIndiceTypes =
492  getUnrolledTypes(indiceVecTy, targetIndiceShape);
493  SmallVector<Value> convertedIndiceVec =
494  pack(indiceVec, convertedIndiceTypes, targetIndiceShape, loc, rewriter);
495 
496  SmallVector<Value> newOps;
497 
498  // More indices is need when chunkSize > 1. Since a big load from one
499  // address could be break into multiple small loads.
500  if (originalChunkSize > 1) {
501  int64_t blockedChunkSize = targetShape->back();
502  int64_t numNewChunks = originalChunkSize / blockedChunkSize;
503 
504  for (auto [indice, indiceType] :
505  llvm::zip(convertedIndiceVec, convertedIndiceTypes)) {
506  for (int64_t i = 0; i < numNewChunks; ++i) {
507  // Compute the offset
508  Value inc = arith::ConstantIndexOp::create(rewriter, loc,
509  i * blockedChunkSize);
510  Value incVec =
511  vector::BroadcastOp::create(rewriter, loc, indiceType, inc);
512  Value offsetIndice =
513  arith::AddIOp::create(rewriter, loc, indice, incVec);
514 
515  auto newOp = xegpu::CreateDescOp::create(
516  rewriter, loc, newTdescTy, op.getSource(), offsetIndice);
517 
518  newOps.push_back(newOp);
519  }
520  }
521  } else {
522  for (auto indice : convertedIndiceVec) {
523  auto newOp = xegpu::CreateDescOp::create(rewriter, loc, newTdescTy,
524  op.getSource(), indice);
525  newOps.push_back(newOp);
526  }
527  }
528 
529  Value castOp = unpack(newOps, tdescTy, *targetShape, loc, rewriter);
530  rewriter.replaceOp(op, castOp);
531 
532  return success();
533  }
534 };
535 
536 struct UnrollLoadGatherOp : public UnrollPattern<xegpu::LoadGatherOp> {
537  using UnrollPattern<xegpu::LoadGatherOp>::UnrollPattern;
538  LogicalResult matchAndRewrite(xegpu::LoadGatherOp op,
539  PatternRewriter &rewriter) const override {
540 
541  Location loc = op.getLoc();
542  VectorType valueTy = llvm::dyn_cast<VectorType>(op.getValue().getType());
543  xegpu::TensorDescType tdescTy = op.getTensorDescType();
544 
545  // TODO: handle the unstructure source case (!tdesTy)
546  if (!tdescTy || op.getOffsets())
547  return failure();
548 
549  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
550  if (!targetShape)
551  return failure();
552 
553  SmallVector<int64_t> targetMaskShape(*targetShape);
554  int64_t originalChunkSize = tdescTy.getChunkSizeAsInt();
555 
556  VectorType maskTy = llvm::dyn_cast<VectorType>(op.getMask().getType());
557 
558  Type elemTy = tdescTy.getElementType();
559  VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
560 
561  SmallVector<Type> convertedTdescTypes =
562  getUnrolledTypes(tdescTy, *targetShape);
563  SmallVector<Value> convertedTdescs = pack(
564  op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
565 
566  SmallVector<Type> convertedMaskTypes;
567  SmallVector<Value> convertedMasks;
568 
569  if (originalChunkSize > 1) {
570  targetMaskShape.pop_back();
571  convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
572  int64_t blockedChunkSize = targetShape->back();
573  int64_t numNewChunks = originalChunkSize / blockedChunkSize;
574 
575  // the mask is reused across the chunk_size dimension
576  for (auto mask : pack(op.getMask(), convertedMaskTypes, targetMaskShape,
577  loc, rewriter))
578  convertedMasks.append(numNewChunks, mask);
579 
580  newValueTy = valueTy.cloneWith(*targetShape, elemTy);
581  } else {
582  convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
583  convertedMasks = pack(op.getMask(), convertedMaskTypes, targetMaskShape,
584  loc, rewriter);
585  }
586 
587  SmallVector<Value> newOps;
588  for (auto [t, m] : llvm::zip(convertedTdescs, convertedMasks)) {
589  auto newOp = xegpu::LoadGatherOp::create(
590  rewriter, loc, newValueTy, t, m, op.getL1HintAttr(),
591  op.getL2HintAttr(), op.getL3HintAttr());
592  newOps.push_back(newOp);
593  }
594 
595  Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
596  rewriter.replaceOp(op, castOp);
597  return success();
598  }
599 };
600 
601 /// This pattern handles the unrolling of LoadGatherOp with offsets (gathered
602 /// load).
603 /// It unrolls the offsets and mask operands accordingly, and creates multiple
604 /// LoadGatherOp with the unrolled operands.
605 struct UnrollLoadGatherOpWithOffset
606  : public UnrollPattern<xegpu::LoadGatherOp> {
607  using UnrollPattern<xegpu::LoadGatherOp>::UnrollPattern;
608  LogicalResult matchAndRewrite(xegpu::LoadGatherOp op,
609  PatternRewriter &rewriter) const override {
610  Location loc = op.getLoc();
611  VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
612  Value offsets = op.getOffsets();
613  Value mask = op.getMask();
614 
615  // Only handle the case where offsets are present (scattered load)
616  if (!offsets)
617  return failure();
618 
619  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
620  if (!targetShape)
621  return failure();
622 
623  SmallVector<int64_t> targetMaskShape(*targetShape);
624  int64_t chunkSize = 1;
625  if (auto chunkSizeAttr = op->getAttr("chunk_size")) {
626  if (auto intAttr = llvm::dyn_cast<IntegerAttr>(chunkSizeAttr))
627  chunkSize = intAttr.getInt();
628  }
629 
630  // Unroll mask and offsets with correct shape
631  VectorType maskTy = llvm::dyn_cast<VectorType>(mask.getType());
632  VectorType offsetsTy = llvm::dyn_cast<VectorType>(offsets.getType());
633  Type elemTy = valueTy.getElementType();
634  VectorType newValueTy = VectorType::get(*targetShape, elemTy);
635 
636  SmallVector<Type> convertedMaskTypes;
637  SmallVector<Value> convertedMasks;
638  SmallVector<Type> convertedOffsetTypes;
639  SmallVector<Value> convertedOffsets;
640 
641  if (chunkSize > 1) {
642  // For chunked loads, mask and offsets have one less dimension
643  targetMaskShape.pop_back();
644  int64_t blockedChunkSize = targetShape->back();
645  int64_t numNewChunks = chunkSize / blockedChunkSize;
646  chunkSize = blockedChunkSize;
647 
648  convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
649  convertedOffsetTypes = getUnrolledTypes(offsetsTy, targetMaskShape);
650 
651  SmallVector<Value> convertedMasksBase =
652  pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
653  SmallVector<Value> convertedOffsetsBase =
654  pack(offsets, convertedOffsetTypes, targetMaskShape, loc, rewriter);
655 
656  for (auto maskVal : convertedMasksBase)
657  convertedMasks.append(numNewChunks, maskVal);
658 
659  for (auto [baseOffset, offsetType] :
660  llvm::zip(convertedOffsetsBase, convertedOffsetTypes)) {
661  for (int64_t i = 0; i < numNewChunks; ++i) {
662  Value inc = arith::ConstantIndexOp::create(rewriter, loc,
663  i * blockedChunkSize);
664  Value incVec =
665  vector::BroadcastOp::create(rewriter, loc, offsetType, inc);
666  Value offsetVal =
667  arith::AddIOp::create(rewriter, loc, baseOffset, incVec);
668  convertedOffsets.push_back(offsetVal);
669  }
670  }
671  } else {
672  convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
673  convertedMasks =
674  pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
675 
676  convertedOffsetTypes = getUnrolledTypes(offsetsTy, *targetShape);
677  convertedOffsets =
678  pack(offsets, convertedOffsetTypes, *targetShape, loc, rewriter);
679  }
680 
681  SmallVector<Value> newOps;
682  for (auto [o, m] : llvm::zip(convertedOffsets, convertedMasks)) {
683  auto newOp = xegpu::LoadGatherOp::create(
684  rewriter, loc, newValueTy, op.getSource(), o, m,
685  rewriter.getI64IntegerAttr(chunkSize), op.getL1HintAttr(),
686  op.getL2HintAttr(), op.getL3HintAttr());
687  newOps.push_back(newOp);
688  }
689 
690  Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
691  rewriter.replaceOp(op, castOp);
692  return success();
693  }
694 };
695 
696 /// This pattern handles the unrolling of StoreScatterOp with offsets (scattered
697 /// store).
698 /// It unrolls the offsets and mask operands accordingly, and creates multiple
699 /// StoreScatterOp with the unrolled operands.
700 struct UnrollStoreScatterOpWithOffsets
701  : public UnrollPattern<xegpu::StoreScatterOp> {
702  using UnrollPattern<xegpu::StoreScatterOp>::UnrollPattern;
703  LogicalResult matchAndRewrite(xegpu::StoreScatterOp op,
704  PatternRewriter &rewriter) const override {
705  Location loc = op.getLoc();
706  VectorType valueTy = llvm::dyn_cast<VectorType>(op.getValue().getType());
707  Value offsets = op.getOffsets();
708  Value mask = op.getMask();
709 
710  // Only handle the case where offsets are present (scattered store)
711  if (!offsets)
712  return failure();
713 
714  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
715  if (!targetShape)
716  return failure();
717 
718  int64_t chunkSize = 1;
719  if (auto chunkSizeAttr = op->getAttr("chunk_size")) {
720  if (auto intAttr = llvm::dyn_cast<IntegerAttr>(chunkSizeAttr))
721  chunkSize = intAttr.getInt();
722  }
723 
724  SmallVector<int64_t> targetMaskShape(*targetShape);
725  VectorType maskTy = llvm::dyn_cast<VectorType>(mask.getType());
726  VectorType offsetsTy = llvm::dyn_cast<VectorType>(offsets.getType());
727 
728  SmallVector<Type> convertedMaskTypes;
729  SmallVector<Value> convertedMasks;
730  SmallVector<Type> convertedOffsetTypes;
731  SmallVector<Value> convertedOffsets;
732 
733  if (chunkSize > 1) {
734  targetMaskShape.pop_back();
735  int64_t blockedChunkSize = targetShape->back();
736  int64_t numNewChunks = chunkSize / blockedChunkSize;
737  chunkSize = blockedChunkSize;
738 
739  convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
740  convertedOffsetTypes = getUnrolledTypes(offsetsTy, targetMaskShape);
741 
742  SmallVector<Value> convertedMasksBase =
743  pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
744  SmallVector<Value> convertedOffsetsBase =
745  pack(offsets, convertedOffsetTypes, targetMaskShape, loc, rewriter);
746 
747  for (auto maskVal : convertedMasksBase)
748  convertedMasks.append(numNewChunks, maskVal);
749 
750  for (auto [baseOffset, offsetType] :
751  llvm::zip(convertedOffsetsBase, convertedOffsetTypes)) {
752  for (int64_t i = 0; i < numNewChunks; ++i) {
753  Value inc = arith::ConstantIndexOp::create(rewriter, loc,
754  i * blockedChunkSize);
755  Value incVec =
756  vector::BroadcastOp::create(rewriter, loc, offsetType, inc);
757  Value offsetVal =
758  arith::AddIOp::create(rewriter, loc, baseOffset, incVec);
759  convertedOffsets.push_back(offsetVal);
760  }
761  }
762  } else {
763  convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
764  convertedMasks =
765  pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
766 
767  convertedOffsetTypes = getUnrolledTypes(offsetsTy, *targetShape);
768  convertedOffsets =
769  pack(offsets, convertedOffsetTypes, *targetShape, loc, rewriter);
770  }
771 
772  SmallVector<Type> convertedValTypes =
773  getUnrolledTypes(valueTy, *targetShape);
774  SmallVector<Value> convertedValues =
775  pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
776 
777  for (auto [v, o, m] :
778  llvm::zip(convertedValues, convertedOffsets, convertedMasks)) {
779  xegpu::StoreScatterOp::create(rewriter, loc, v, op.getDest(), o, m,
780  rewriter.getI64IntegerAttr(chunkSize),
781  op.getL1HintAttr(), op.getL2HintAttr(),
782  op.getL3HintAttr());
783  }
784 
785  rewriter.eraseOp(op);
786  return success();
787  }
788 };
789 
790 struct UnrollPrefetchOp : public UnrollPattern<xegpu::PrefetchOp> {
791  using UnrollPattern<xegpu::PrefetchOp>::UnrollPattern;
792  LogicalResult matchAndRewrite(xegpu::PrefetchOp op,
793  PatternRewriter &rewriter) const override {
794  Location loc = op.getLoc();
795  xegpu::TensorDescType tdescTy = op.getTensorDescType();
796 
797  // TODO: handle the unstructure source case (!tdesTy)
798  if (!tdescTy || op.getOffsets())
799  return failure();
800 
801  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
802  if (!targetShape)
803  return failure();
804 
805  SmallVector<Type> convertedTdescTypes =
806  getUnrolledTypes(tdescTy, *targetShape);
807  SmallVector<Value> convertedTdesc = pack(
808  op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
809 
810  for (auto t : convertedTdesc)
811  xegpu::PrefetchOp::create(rewriter, loc, TypeRange(), t, op->getAttrs());
812 
813  rewriter.eraseOp(op);
814  return success();
815  }
816 };
817 
818 struct UnrollStoreScatterOp : public UnrollPattern<xegpu::StoreScatterOp> {
819  using UnrollPattern<xegpu::StoreScatterOp>::UnrollPattern;
820  LogicalResult matchAndRewrite(xegpu::StoreScatterOp op,
821  PatternRewriter &rewriter) const override {
822 
823  Location loc = op.getLoc();
824  VectorType valueTy = llvm::dyn_cast<VectorType>(op.getValue().getType());
825  xegpu::TensorDescType tdescTy = op.getTensorDescType();
826 
827  // TODO: handle the unstructure source case (!tdesTy)
828  if (!tdescTy || op.getOffsets())
829  return failure();
830 
831  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
832  if (!targetShape)
833  return failure();
834 
835  SmallVector<int64_t> targetMaskShape(*targetShape);
836  int64_t originalChunkSize = tdescTy.getChunkSizeAsInt();
837 
838  VectorType maskTy = llvm::dyn_cast<VectorType>(op.getMask().getType());
839 
840  SmallVector<Type> convertedTdescTypes =
841  getUnrolledTypes(tdescTy, *targetShape);
842  SmallVector<Value> convertedTdescs = pack(
843  op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
844 
845  SmallVector<Type> convertedMaskTypes;
846  SmallVector<Value> convertedMasks;
847 
848  if (originalChunkSize > 1) {
849  targetMaskShape.pop_back();
850  int64_t blockedChunkSize = targetShape->back();
851  int64_t numNewChunks = originalChunkSize / blockedChunkSize;
852  convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
853 
854  // the mask is reused across the chunk_size dimension
855  for (auto mask : pack(op.getMask(), convertedMaskTypes, targetMaskShape,
856  loc, rewriter))
857  convertedMasks.append(numNewChunks, mask);
858  } else {
859  convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
860  convertedMasks = pack(op.getMask(), convertedMaskTypes, targetMaskShape,
861  loc, rewriter);
862  }
863 
864  SmallVector<Type> convertedValTypes =
865  getUnrolledTypes(valueTy, *targetShape);
866  SmallVector<Value> convertedValues =
867  pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
868 
869  for (size_t i = 0; i < convertedValues.size(); ++i) {
870  Value v = convertedValues[i];
871  Value t = convertedTdescs[i];
872  Value m = op.getMask() ? convertedMasks[i] : nullptr;
873  xegpu::StoreScatterOp::create(rewriter, loc, v, t, m, op.getL1HintAttr(),
874  op.getL2HintAttr(), op.getL3HintAttr());
875  }
876 
877  rewriter.eraseOp(op);
878  return success();
879  }
880 };
881 
882 struct UnrollUpdateOffsetOp : public UnrollPattern<xegpu::UpdateOffsetOp> {
883  using UnrollPattern<xegpu::UpdateOffsetOp>::UnrollPattern;
884  LogicalResult matchAndRewrite(xegpu::UpdateOffsetOp op,
885  PatternRewriter &rewriter) const override {
886  Location loc = op.getLoc();
887  xegpu::TensorDescType tdescTy = op.getTensorDescType();
888 
889  if (!tdescTy.isScattered())
890  return failure();
891 
892  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
893  if (!targetShape)
894  return failure();
895 
896  SmallVector<Type> convertedTdescTypes =
897  getUnrolledTypes(tdescTy, *targetShape);
898  SmallVector<Value> convertedTdesc = pack(
899  op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
900 
901  TypedValue<::mlir::VectorType> offsetVec = op.getOffsets();
902  VectorType offsetVecTy = offsetVec.getType();
903  SmallVector<Type> convertedOffsetTypes;
904  SmallVector<Value> convertedOffsetVec;
905  SmallVector<Value> newOps;
906  int64_t originalChunkSize = tdescTy.getChunkSizeAsInt();
907  if (originalChunkSize > 1) {
908  auto targetOffsetShape = ArrayRef<int64_t>(*targetShape).drop_back();
909  convertedOffsetTypes = getUnrolledTypes(offsetVecTy, targetOffsetShape);
910 
911  int64_t blockedChunkSize = targetShape->back();
912  int64_t numNewChunks = originalChunkSize / blockedChunkSize;
913  // the offset is reused across the chunk_size dimension
914  for (auto offset : pack(offsetVec, convertedOffsetTypes,
915  targetOffsetShape, loc, rewriter))
916  convertedOffsetVec.append(numNewChunks, offset);
917 
918  } else {
919  convertedOffsetTypes = getUnrolledTypes(offsetVecTy, *targetShape);
920  convertedOffsetVec =
921  pack(offsetVec, convertedOffsetTypes, *targetShape, loc, rewriter);
922  }
923 
924  for (auto [t, o] : llvm::zip(convertedTdesc, convertedOffsetVec)) {
925  auto newOp =
926  xegpu::UpdateOffsetOp::create(rewriter, loc, t.getType(), t, o);
927  newOps.push_back(newOp);
928  }
929  Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
930  rewriter.replaceOp(op, castOp);
931  return success();
932  }
933 };
934 
935 struct UnrollLoadMatrixOp : public UnrollPattern<xegpu::LoadMatrixOp> {
936  using UnrollPattern<xegpu::LoadMatrixOp>::UnrollPattern;
937  LogicalResult matchAndRewrite(xegpu::LoadMatrixOp op,
938  PatternRewriter &rewriter) const override {
939  Location loc = op.getLoc();
940  VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
941  assert(valueTy && "the value type must be vector type!");
942 
943  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
944  if (!targetShape || targetShape->size() != (size_t)valueTy.getRank())
945  return failure();
946 
947  Type elemTy = valueTy.getElementType();
948  ArrayRef<int64_t> shape = valueTy.getShape();
949  auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
950 
951  VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
952 
953  SmallVector<OpFoldResult> mixedOffsets = op.getMixedOffsets();
955  for (SmallVector<int64_t> offsets :
956  StaticTileOffsetRange(shape, *targetShape)) {
957  auto adds = xegpu::addElementwise(
958  rewriter, loc, mixedOffsets,
959  getAsIndexOpFoldResult(op.getContext(), offsets));
960  offsetsList.push_back(adds);
961  }
962 
963  SmallVector<Value> newOps;
964  layout = layout.dropInstData();
965  for (SmallVector<OpFoldResult> offsets : offsetsList) {
966  auto newOp = xegpu::LoadMatrixOp::create(
967  rewriter, op.getLoc(), newValueTy, op.getMemDesc(), offsets, layout);
968  newOps.push_back(newOp);
969  }
970  Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
971  rewriter.replaceOp(op, castOp);
972  return success();
973  }
974 };
975 
976 struct UnrollStoreMatrixOp : public UnrollPattern<xegpu::StoreMatrixOp> {
977  using UnrollPattern<xegpu::StoreMatrixOp>::UnrollPattern;
978  LogicalResult matchAndRewrite(xegpu::StoreMatrixOp op,
979  PatternRewriter &rewriter) const override {
980  std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
981  if (!targetShape)
982  return failure();
983 
984  Location loc = op.getLoc();
985  VectorType valueTy = llvm::dyn_cast<VectorType>(op.getData().getType());
986  assert(valueTy && "the value type must be vector type!");
987  ArrayRef<int64_t> shape = valueTy.getShape();
988  auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
989 
990  SmallVector<Type> convertedValTypes =
991  getUnrolledTypes(valueTy, *targetShape);
992  SmallVector<Value> convertedValues =
993  pack(op.getData(), convertedValTypes, *targetShape, loc, rewriter);
994 
995  SmallVector<OpFoldResult> mixedOffsets = op.getMixedOffsets();
997  for (SmallVector<int64_t> offsets :
998  StaticTileOffsetRange(shape, *targetShape)) {
999  auto adds = xegpu::addElementwise(
1000  rewriter, loc, mixedOffsets,
1001  getAsIndexOpFoldResult(op.getContext(), offsets));
1002  offsetsList.push_back(adds);
1003  }
1004 
1005  for (auto [v, offsets] : llvm::zip_equal(convertedValues, offsetsList))
1006  xegpu::StoreMatrixOp::create(rewriter, loc, v, op.getMemDesc(), offsets,
1007  layout.dropInstData());
1008 
1009  rewriter.eraseOp(op);
1010  return success();
1011  }
1012 };
1013 
1014 } // namespace
1015 
1018  patterns
1019  .add<UnrollCreateNdOp, UnrollUpdateNdOffsetOp, UnrollPrefetchNdOp,
1020  UnrollLoadNdOp, UnrollStoreNdOp, UnrollDpasOp, UnrollCreateDescOp,
1021  UnrollLoadGatherOp, UnrollStoreScatterOp, UnrollPrefetchOp,
1022  UnrollUpdateOffsetOp, UnrollLoadMatrixOp, UnrollStoreMatrixOp,
1023  UnrollLoadGatherOpWithOffset, UnrollStoreScatterOpWithOffsets>(
1024  patterns.getContext(), options);
1025 }
static llvm::ManagedStatic< PassManagerOptions > options
static std::optional< SmallVector< int64_t > > getTargetShape(const vector::UnrollVectorOptions &options, Operation *op)
Return the target shape for unrolling for the given op.
UnitAttr getUnitAttr()
Definition: Builders.cpp:98
DenseI64ArrayAttr getDenseI64ArrayAttr(ArrayRef< int64_t > values)
Definition: Builders.cpp:167
IntegerAttr getI64IntegerAttr(int64_t value)
Definition: Builders.cpp:112
StringAttr getStringAttr(const Twine &bytes)
Definition: Builders.cpp:262
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
NamedAttribute represents a combination of a name and an Attribute value.
Definition: Attributes.h:164
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:526
This class represents a single result from folding an operation.
Definition: OpDefinition.h:272
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition: PatternMatch.h:34
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition: PatternMatch.h:793
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
Type getType() const
Return the type of this value.
Definition: Value.h:105
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition: ArithOps.cpp:359
FailureOr< PackResult > pack(RewriterBase &rewriter, linalg::LinalgOp linalgOp, ArrayRef< OpFoldResult > packedSizes)
Implement packing of a single LinalgOp by packedSizes.
Definition: Transforms.cpp:464
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition: Remarks.h:561
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
Definition: XeGPUUtils.cpp:276
void populateXeGPUUnrollPatterns(RewritePatternSet &patterns, const UnrollOptions &options)
Collect a set of patterns to unroll xegpu operations to a smaller shapes.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
Definition: XeGPUUtils.cpp:239
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
Definition: XeGPUUtils.cpp:467
Include the generated interface declarations.
OpFoldResult getAsIndexOpFoldResult(MLIRContext *ctx, int64_t val)
Convert int64_t to integer attributes of index type and return them as OpFoldResult.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition: Value.h:498
int64_t computeProduct(ArrayRef< int64_t > basis)
Self-explicit.
const FrozenRewritePatternSet & patterns
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition: PatternMatch.h:314
Options to control the XeGPU unrolling.
Definition: Transforms.h:27
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.