MLIR 23.0.0git
XeGPUUnroll.cpp
Go to the documentation of this file.
1//===- XeGPUUnroll.cpp - patterns to do unrolling ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains patterns for unrolling XeGPU operations. It follows a
10// similar concept and design as vector unroll patterns, serving as a complement
11// to them.
12//
13//===----------------------------------------------------------------------===//
14
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/DebugLog.h"
21
22namespace mlir {
23namespace xegpu {
24#define GEN_PASS_DEF_XEGPUUNROLL
25#include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
26} // namespace xegpu
27} // namespace mlir
28
29#define DEBUG_TYPE "xegpu-unroll"
30
31using namespace mlir;
32
33namespace {
34
35template <typename SourceOp>
36struct UnrollPattern : public OpRewritePattern<SourceOp> {
37 UnrollPattern(MLIRContext *context, const xegpu::UnrollOptions &options,
38 PatternBenefit benefit = 1)
39 : OpRewritePattern<SourceOp>(context, benefit), options(options) {}
40
41protected:
42 /// Return the target shape for the given `op`. Return std::nullopt if the
43 /// op shouldn't be or cannot be unrolled.
44 std::optional<SmallVector<int64_t>> getTargetShape(Operation *op) const {
45 LDBG() << "Get unroll shape for: " << *op;
46
47 if (options.filterConstraint && failed(options.filterConstraint(op))) {
48 LDBG() << "--no filter constraint -> BAIL";
49 return std::nullopt;
50 }
51
52 assert(options.nativeShape &&
53 "expects the native shape for native shape call back function.");
54 auto nativeShape = options.nativeShape(op);
55 return nativeShape;
56 }
57
58 SmallVector<Type> getUnrolledTypes(ShapedType type,
59 ArrayRef<int64_t> tileShape,
60 bool returnSingleType = false) const {
61 return options.getUnrolledTypes(type, tileShape, returnSingleType);
62 }
63
64 /// Emulate the the unpack behavior using insert_strided_slice for VectorType
65 /// values and unrealized_conversion_cast for TensorDescType values.
66 Value unpack(ValueRange srcs, Type destTy, ArrayRef<int64_t> blockSize,
67 Location loc, PatternRewriter &rewriter) const {
68 if (auto vecTy = dyn_cast<VectorType>(destTy)) {
69 auto shape = vecTy.getShape();
70 return xegpu::createVectorWithShapeFromValues(rewriter, loc, srcs, shape);
71 }
72
73 if (isa<xegpu::TensorDescType>(destTy)) {
74 auto attr = NamedAttribute(rewriter.getStringAttr(unpackAttrName),
75 rewriter.getUnitAttr());
76 auto blkAttr = NamedAttribute(rewriter.getStringAttr(blockAttrName),
77 rewriter.getDenseI64ArrayAttr(blockSize));
78 auto castOp = UnrealizedConversionCastOp::create(
79 rewriter, loc, destTy, srcs,
80 ArrayRef<NamedAttribute>({attr, blkAttr}));
81 return castOp.getResult(0);
82 }
83
84 llvm_unreachable("Unexpected destTy.");
85 return Value();
86 }
87
88 /// Emulate the the pack behavior using extract_strided_slice for VectorType
89 /// values and unrealized_conversion_cast for TensorDescType values.
90 SmallVector<Value> pack(Value src, TypeRange destTypes,
91 ArrayRef<int64_t> blockSize, Location loc,
92 PatternRewriter &rewriter) const {
93 if (auto vecTy = dyn_cast<VectorType>(src.getType())) {
94 return xegpu::extractVectorsWithShapeFromValue(rewriter, loc, src,
95 blockSize);
96 }
97
98 if (isa<xegpu::TensorDescType>(src.getType())) {
99 auto attr = NamedAttribute(rewriter.getStringAttr(packAttrName),
100 rewriter.getUnitAttr());
101 auto blkAttr = NamedAttribute(rewriter.getStringAttr(blockAttrName),
102 rewriter.getDenseI64ArrayAttr(blockSize));
103 auto castOp = UnrealizedConversionCastOp::create(
104 rewriter, loc, destTypes, src,
105 ArrayRef<NamedAttribute>({attr, blkAttr}));
106 return castOp.getResults();
107 }
108
109 llvm_unreachable("Unexpected src type.");
110 return SmallVector<Value>();
111 }
112
113private:
114 const char *const packAttrName = "__xegpu_blocking_pack__";
115 const char *const unpackAttrName = "__xegpu_blocking_unpack__";
116 const char *const blockAttrName = "__xegpu_blocking_tile_shape__";
117
119};
120
121// Generic helper function for unrolling operations with offsets.
122//
123// Iterates over tile offsets within the tensor descriptor shape and calls
124// the provided createOp function for each computed offset. This is used by
125// operations like LoadNd, StoreNd, CreateNdDesc, and PrefetchNd when they
126// have explicit offsets that need to be adjusted for each unrolled tile.
127SmallVector<Value> computeUnrolledOffsets(
128 SmallVector<OpFoldResult> mixedOffsets, xegpu::TensorDescType tdescTy,
129 ArrayRef<int64_t> targetShape,
130 const std::function<Value(SmallVector<OpFoldResult>)> &createOp,
131 Location loc, PatternRewriter &rewriter) {
132 int64_t rank = tdescTy.getRank();
133 ArrayRef<int64_t> shape = tdescTy.getShape();
134
135 auto addi = [&](OpFoldResult a, int64_t b) -> Value {
136 std::optional<int64_t> maybeInt = getConstantIntValue(a);
137 if (maybeInt) {
138 return arith::ConstantIndexOp::create(rewriter, loc, *maybeInt + b);
139 } else {
140 auto aV = llvm::cast<Value>(a);
141 auto bV = arith::ConstantIndexOp::create(rewriter, loc, b);
142 return rewriter.createOrFold<arith::AddIOp>(loc, aV, bV);
143 }
144 };
145
146 SmallVector<OpFoldResult> oldOffsets = llvm::to_vector(
147 llvm::drop_begin(mixedOffsets, mixedOffsets.size() - rank));
148 auto validIdxes =
149 llvm::seq<int64_t>(mixedOffsets.size() - rank, mixedOffsets.size());
150
151 SmallVector<Value> newOps;
152 for (SmallVector<int64_t> offsets :
153 StaticTileOffsetRange(shape, targetShape)) {
154
155 for (auto [idx, oldOff, offset] :
156 llvm::zip(validIdxes, oldOffsets, offsets))
157 mixedOffsets[idx] = addi(oldOff, offset);
158
159 auto newOp = createOp(mixedOffsets);
160 newOps.push_back(newOp);
161 }
162 return newOps;
163}
164
165struct UnrollCreateNdOp : public UnrollPattern<xegpu::CreateNdDescOp> {
166 using UnrollPattern<xegpu::CreateNdDescOp>::UnrollPattern;
167 LogicalResult matchAndRewrite(xegpu::CreateNdDescOp op,
168 PatternRewriter &rewriter) const override {
169 Location loc = op.getLoc();
170 xegpu::TensorDescType tdescTy = op.getType();
171
172 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
173 if (!targetShape)
174 return failure();
175
176 SmallVector<Value> newOps;
177
178 auto newTdescTy = getUnrolledTypes(tdescTy, *targetShape)[0];
179 bool hasOffsets = op.getMixedOffsets().size() != 0;
180 if (!hasOffsets) {
181 auto newOp = xegpu::CreateNdDescOp::create(
182 rewriter, loc, newTdescTy, op.getSource(), op.getMixedSizes(),
183 op.getMixedStrides());
184 newOps.push_back(newOp);
185 } else {
186 auto createOp = [&](SmallVector<OpFoldResult> offsets) -> Value {
187 return xegpu::CreateNdDescOp::create(
188 rewriter, loc, newTdescTy, op.getSource(), offsets,
189 op.getMixedSizes(), op.getMixedStrides());
190 };
191
192 newOps = computeUnrolledOffsets(op.getMixedOffsets(), tdescTy,
193 *targetShape, createOp, loc, rewriter);
194 }
195 Value castOp = unpack(newOps, tdescTy, *targetShape, loc, rewriter);
196 rewriter.replaceOp(op, castOp);
197
198 return success();
199 }
200};
201
202struct UnrollUpdateNdOffsetOp : public UnrollPattern<xegpu::UpdateNdOffsetOp> {
203 using UnrollPattern<xegpu::UpdateNdOffsetOp>::UnrollPattern;
204 LogicalResult matchAndRewrite(xegpu::UpdateNdOffsetOp op,
205 PatternRewriter &rewriter) const override {
206 Location loc = op.getLoc();
207 xegpu::TensorDescType tdescTy = op.getTensorDescType();
208
209 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
210 if (!targetShape)
211 return failure();
212
213 SmallVector<Type> convertedTdescTypes =
214 getUnrolledTypes(tdescTy, *targetShape);
215 SmallVector<Value> convertedTdesc = pack(
216 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
217
218 SmallVector<Value> newOps;
219 for (auto t : convertedTdesc) {
220 auto newOp = xegpu::UpdateNdOffsetOp::create(
221 rewriter, loc, t.getType(), t, op.getOffsets(), op.getConstOffsets());
222 newOps.push_back(newOp);
223 }
224 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
225 rewriter.replaceOp(op, castOp);
226 return success();
227 }
228};
229
230struct UnrollPrefetchNdOp : public UnrollPattern<xegpu::PrefetchNdOp> {
231 using UnrollPattern<xegpu::PrefetchNdOp>::UnrollPattern;
232 LogicalResult matchAndRewrite(xegpu::PrefetchNdOp op,
233 PatternRewriter &rewriter) const override {
234 Location loc = op.getLoc();
235 xegpu::TensorDescType tdescTy = op.getTensorDescType();
236
237 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
238 if (!targetShape)
239 return failure();
240
241 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
242 if (layout)
243 layout = layout.dropInstData();
244 int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
245 bool hasOffsets = (offsetSize != 0) || op.getConstOffsetsAttr();
246
247 SmallVector<Type> convertedTdescTypes = getUnrolledTypes(
248 tdescTy, *targetShape, /*returnSingleType*/ hasOffsets);
249
250 SmallVector<Value> convertedTdesc = pack(
251 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
252
253 if (!hasOffsets) {
254 for (auto t : convertedTdesc)
255 xegpu::PrefetchNdOp::create(rewriter, loc, TypeRange(), t,
256 xegpu::dropInstDataOnAttrs(op->getAttrs()));
257 } else {
258 auto createPrefetch = [&](SmallVector<OpFoldResult> offsets) -> Value {
259 xegpu::PrefetchNdOp::create(rewriter, loc, convertedTdesc[0], offsets,
260 op.getL1HintAttr(), op.getL2HintAttr(),
261 op.getL3HintAttr(), layout);
262 // return dummy Value to satisfy function's signature
263 return nullptr;
264 };
265
266 computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
267 createPrefetch, loc, rewriter);
268 }
269
270 rewriter.eraseOp(op);
271 return success();
272 }
273};
274
275struct UnrollLoadNdOp : public UnrollPattern<xegpu::LoadNdOp> {
276 using UnrollPattern<xegpu::LoadNdOp>::UnrollPattern;
277 LogicalResult matchAndRewrite(xegpu::LoadNdOp op,
278 PatternRewriter &rewriter) const override {
279
280 Location loc = op.getLoc();
281 VectorType valueTy = op.getType();
282 xegpu::TensorDescType tdescTy = op.getTensorDescType();
283
284 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
285 if (!targetShape)
286 return failure();
287
288 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
289 if (layout)
290 layout = layout.dropInstData();
291 int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
292 bool hasOffsets = (offsetSize != 0) || op.getConstOffsetsAttr();
293
294 Type elemTy = tdescTy.getElementType();
295 VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
296
297 SmallVector<Type> convertedTdescTypes = getUnrolledTypes(
298 tdescTy, *targetShape, /*returnSingleType*/ hasOffsets);
299
300 SmallVector<Value> convertedTdescs = pack(
301 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
302 SmallVector<Value> newOps;
303
304 if (!hasOffsets) {
305 for (auto t : convertedTdescs) {
306 auto newOp =
307 xegpu::LoadNdOp::create(rewriter, loc, newValueTy, t,
308 xegpu::dropInstDataOnAttrs(op->getAttrs()));
309 newOps.push_back(newOp);
310 }
311 } else {
312 auto createLoad = [&](SmallVector<OpFoldResult> offsets) {
313 return xegpu::LoadNdOp::create(
314 rewriter, loc, newValueTy, convertedTdescs[0], offsets,
315 op.getPackedAttr(), op.getTransposeAttr(), op.getL1HintAttr(),
316 op.getL2HintAttr(), op.getL3HintAttr(), layout);
317 };
318 newOps = computeUnrolledOffsets(op.getMixedOffsets(), tdescTy,
319 *targetShape, createLoad, loc, rewriter);
320 }
321
322 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
323
324 rewriter.replaceOp(op, castOp);
325 return success();
326 }
327};
328
329struct UnrollStoreNdOp : public UnrollPattern<xegpu::StoreNdOp> {
330 using UnrollPattern<xegpu::StoreNdOp>::UnrollPattern;
331 LogicalResult matchAndRewrite(xegpu::StoreNdOp op,
332 PatternRewriter &rewriter) const override {
333 Location loc = op.getLoc();
334 VectorType valueTy = op.getValueType();
335 xegpu::TensorDescType tdescTy = op.getTensorDescType();
336
337 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
338 if (!targetShape)
339 return failure();
340
341 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
342 if (layout)
343 layout = layout.dropInstData();
344 int64_t offsetSize = static_cast<int64_t>(op.getOffsets().size());
345 bool hasOffsets = (offsetSize != 0) || op.getConstOffsetsAttr();
346
347 SmallVector<Type> convertedValTypes =
348 getUnrolledTypes(valueTy, *targetShape);
349 SmallVector<Type> convertedTdescTypes = getUnrolledTypes(
350 tdescTy, *targetShape, /*returnSingleType*/ hasOffsets);
351
352 SmallVector<Value> convertedTdescs = pack(
353 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
354
355 SmallVector<Value> convertedValues =
356 pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
357 if (!hasOffsets) {
358 for (auto [v, t] : llvm::zip(convertedValues, convertedTdescs))
359 xegpu::StoreNdOp::create(rewriter, loc, v, t, op.getL1HintAttr(),
360 op.getL2HintAttr(), op.getL3HintAttr());
361 } else {
362 size_t valueIndex = 0;
363 auto createStore = [&](SmallVector<OpFoldResult> offsets) {
364 xegpu::StoreNdOp::create(rewriter, loc, convertedValues[valueIndex++],
365 convertedTdescs[0], offsets,
366 op.getL1HintAttr(), op.getL2HintAttr(),
367 op.getL3HintAttr(), layout);
368 // return dummy Value to satisfy function's signature
369 return nullptr;
370 };
371
372 computeUnrolledOffsets(op.getMixedOffsets(), tdescTy, *targetShape,
373 createStore, loc, rewriter);
374 }
375
376 rewriter.eraseOp(op);
377 return success();
378 }
379};
380
381struct UnrollDpasOp : public UnrollPattern<xegpu::DpasOp> {
382 using UnrollPattern<xegpu::DpasOp>::UnrollPattern;
383 LogicalResult matchAndRewrite(xegpu::DpasOp op,
384 PatternRewriter &rewriter) const override {
385 Location loc = op.getLoc();
386
387 // expecting every operands is a 2D Vector
388 if (llvm::any_of(op->getOperandTypes(), [&](Type type) {
389 auto vecTy = dyn_cast<VectorType>(type);
390 return !vecTy || vecTy.getRank() != 2;
391 }))
392 return failure();
393
394 // A vector of 3 elements should be returned, representing M, K, N
395 // respectively.
396 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
397 if (!targetShape || targetShape->size() != 3)
398 return failure();
399 auto M = (*targetShape)[0];
400 auto K = (*targetShape)[1];
401 auto N = (*targetShape)[2];
402
403 int64_t aBlockSize[2] = {M, K};
404 int64_t bBlockSize[2] = {K, N};
405 int64_t cBlockSize[2] = {M, N};
406
407 auto packWrapper = [&](TypedValue<VectorType> val,
408 ArrayRef<int64_t> blockSize) {
409 VectorType type = val.getType();
410 std::optional<SmallVector<int64_t>> grids =
411 computeShapeRatio(type.getShape(), blockSize);
412 assert(grids && "Expecting grids to be computed.");
413 auto numNewOps = computeProduct(*grids);
414 if (numNewOps == 1)
415 return SmallVector<Value>({val});
416 VectorType newVecTy = type.cloneWith(blockSize, type.getElementType());
417 SmallVector<Type> convertedTypes(numNewOps, newVecTy);
418 SmallVector<Value> values =
419 pack(val, convertedTypes, blockSize, loc, rewriter);
420 return values;
421 };
422
423 auto a = op.getLhs();
424 auto b = op.getRhs();
425 auto c = op.getAcc();
426
427 auto aShape = a.getType().getShape();
428 auto bShape = b.getType().getShape();
429
430 SmallVector<Value> aVals, bVals, cVals;
431 aVals = packWrapper(a, aBlockSize);
432 bVals = packWrapper(b, bBlockSize);
433
434 if (c)
435 cVals = packWrapper(c, cBlockSize);
436
437 // Skip the operation if every operand has an invalid blocking size (empty)
438 // or if the original shape matches the blocking size (size == 1).
439 auto ranges = c ? SmallVector<ValueRange>({aVals, bVals, cVals})
440 : SmallVector<ValueRange>({aVals, bVals});
441 if (llvm::any_of(ranges, [](auto &v) { return v.size() == 0; }) ||
442 llvm::all_of(ranges, [](auto &v) { return v.size() == 1; }))
443 return failure();
444
445 VectorType resultTy = op.getResult().getType();
446 auto vecTy = VectorType::get(cBlockSize, resultTy.getElementType());
447
448 int64_t mIters = aShape[0] / M;
449 int64_t kIters = aShape[1] / K;
450 int64_t nIters = bShape[1] / N;
451
452 SmallVector<Value> newOps;
453 for (int64_t i = 0; i < mIters; ++i) {
454 for (int64_t j = 0; j < nIters; ++j) {
455 Value tmpC;
456 if (c)
457 tmpC = cVals[i * nIters + j]; // init with acc
458
459 for (int64_t k = 0; k < kIters; ++k) {
460 Value aVec = aVals[i * kIters + k];
461 Value bVec = bVals[k * nIters + j];
462 SmallVector<Value> operands({aVec, bVec});
463 if (tmpC)
464 operands.push_back(tmpC);
465
466 tmpC =
467 xegpu::DpasOp::create(rewriter, loc, vecTy, operands,
468 xegpu::dropInstDataOnAttrs(op->getAttrs()));
469 }
470 newOps.push_back(tmpC);
471 }
472 }
473 Value castOp = unpack(newOps, resultTy, cBlockSize, loc, rewriter);
474 rewriter.replaceOp(op, castOp);
475 return success();
476 }
477};
478
479struct UnrollCreateDescOp : public UnrollPattern<xegpu::CreateDescOp> {
480 using UnrollPattern<xegpu::CreateDescOp>::UnrollPattern;
481 LogicalResult matchAndRewrite(xegpu::CreateDescOp op,
482 PatternRewriter &rewriter) const override {
483 Location loc = op.getLoc();
484 xegpu::TensorDescType tdescTy = op.getType();
485 TypedValue<::mlir::VectorType> indiceVec = op.getOffsets();
486 VectorType indiceVecTy = indiceVec.getType();
487
488 if (!tdescTy.isScattered())
489 return failure();
490
491 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
492 if (!targetShape)
493 return failure();
494
495 SmallVector<int64_t> targetIndiceShape(*targetShape);
496 int64_t originalChunkSize = tdescTy.getChunkSizeAsInt();
497 // IndiceVec is 1 dim lower than tdescTy when chunkSize is larger than 1.
498 if (originalChunkSize > 1)
499 targetIndiceShape.pop_back();
500
501 auto newTdescTy = getUnrolledTypes(tdescTy, *targetShape)[0];
502 SmallVector<Type> convertedIndiceTypes =
503 getUnrolledTypes(indiceVecTy, targetIndiceShape);
504 SmallVector<Value> convertedIndiceVec =
505 pack(indiceVec, convertedIndiceTypes, targetIndiceShape, loc, rewriter);
506
507 SmallVector<Value> newOps;
508
509 // More indices is need when chunkSize > 1. Since a big load from one
510 // address could be break into multiple small loads.
511 if (originalChunkSize > 1) {
512 int64_t blockedChunkSize = targetShape->back();
513 int64_t numNewChunks = originalChunkSize / blockedChunkSize;
514
515 for (auto [indice, indiceType] :
516 llvm::zip(convertedIndiceVec, convertedIndiceTypes)) {
517 for (int64_t i = 0; i < numNewChunks; ++i) {
518 // Compute the offset
519 Value inc = arith::ConstantIndexOp::create(rewriter, loc,
520 i * blockedChunkSize);
521 Value incVec =
522 vector::BroadcastOp::create(rewriter, loc, indiceType, inc);
523 Value offsetIndice =
524 arith::AddIOp::create(rewriter, loc, indice, incVec);
525
526 auto newOp = xegpu::CreateDescOp::create(
527 rewriter, loc, newTdescTy, op.getSource(), offsetIndice);
528
529 newOps.push_back(newOp);
530 }
531 }
532 } else {
533 for (auto indice : convertedIndiceVec) {
534 auto newOp = xegpu::CreateDescOp::create(rewriter, loc, newTdescTy,
535 op.getSource(), indice);
536 newOps.push_back(newOp);
537 }
538 }
539
540 Value castOp = unpack(newOps, tdescTy, *targetShape, loc, rewriter);
541 rewriter.replaceOp(op, castOp);
542
543 return success();
544 }
545};
546
547struct UnrollLoadGatherOp : public UnrollPattern<xegpu::LoadGatherOp> {
548 using UnrollPattern<xegpu::LoadGatherOp>::UnrollPattern;
549 LogicalResult matchAndRewrite(xegpu::LoadGatherOp op,
550 PatternRewriter &rewriter) const override {
551
552 Location loc = op.getLoc();
553 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getValue().getType());
554 xegpu::TensorDescType tdescTy = op.getTensorDescType();
555
556 // TODO: handle the unstructure source case (!tdesTy)
557 if (!tdescTy || op.getOffsets())
558 return failure();
559
560 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
561 if (!targetShape)
562 return failure();
563
564 SmallVector<int64_t> targetMaskShape(*targetShape);
565 int64_t originalChunkSize = tdescTy.getChunkSizeAsInt();
566
567 VectorType maskTy = llvm::dyn_cast<VectorType>(op.getMask().getType());
568
569 Type elemTy = tdescTy.getElementType();
570 VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
571
572 SmallVector<Type> convertedTdescTypes =
573 getUnrolledTypes(tdescTy, *targetShape);
574 SmallVector<Value> convertedTdescs = pack(
575 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
576
577 SmallVector<Type> convertedMaskTypes;
578 SmallVector<Value> convertedMasks;
579
580 if (originalChunkSize > 1) {
581 targetMaskShape.pop_back();
582 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
583 int64_t blockedChunkSize = targetShape->back();
584 int64_t numNewChunks = originalChunkSize / blockedChunkSize;
585
586 // the mask is reused across the chunk_size dimension
587 for (auto mask : pack(op.getMask(), convertedMaskTypes, targetMaskShape,
588 loc, rewriter))
589 convertedMasks.append(numNewChunks, mask);
590
591 newValueTy = valueTy.cloneWith(*targetShape, elemTy);
592 } else {
593 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
594 convertedMasks = pack(op.getMask(), convertedMaskTypes, targetMaskShape,
595 loc, rewriter);
596 }
597
598 SmallVector<Value> newOps;
599 for (auto [t, m] : llvm::zip(convertedTdescs, convertedMasks)) {
600 auto newOp = xegpu::LoadGatherOp::create(
601 rewriter, loc, newValueTy, t, m, op.getL1HintAttr(),
602 op.getL2HintAttr(), op.getL3HintAttr());
603 newOps.push_back(newOp);
604 }
605
606 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
607 rewriter.replaceOp(op, castOp);
608 return success();
609 }
610};
611
612/// This pattern handles the unrolling of LoadGatherOp with offsets (gathered
613/// load).
614/// It unrolls the offsets and mask operands accordingly, and creates multiple
615/// LoadGatherOp with the unrolled operands.
616struct UnrollLoadGatherOpWithOffset
617 : public UnrollPattern<xegpu::LoadGatherOp> {
618 using UnrollPattern<xegpu::LoadGatherOp>::UnrollPattern;
619 LogicalResult matchAndRewrite(xegpu::LoadGatherOp op,
620 PatternRewriter &rewriter) const override {
621 Location loc = op.getLoc();
622 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
623 Value offsets = op.getOffsets();
624 Value mask = op.getMask();
625
626 // Only handle the case where offsets are present (scattered load)
627 if (!offsets)
628 return failure();
629
630 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
631 if (!targetShape)
632 return failure();
633
634 SmallVector<int64_t> targetMaskShape(*targetShape);
635 int64_t chunkSize = 1;
636 if (auto chunkSizeAttr = op->getAttr("chunk_size")) {
637 if (auto intAttr = llvm::dyn_cast<IntegerAttr>(chunkSizeAttr))
638 chunkSize = intAttr.getInt();
639 }
640
641 // Unroll mask and offsets with correct shape
642 VectorType maskTy = llvm::dyn_cast<VectorType>(mask.getType());
643 VectorType offsetsTy = llvm::dyn_cast<VectorType>(offsets.getType());
644 Type elemTy = valueTy.getElementType();
645 VectorType newValueTy = VectorType::get(*targetShape, elemTy);
646
647 SmallVector<Type> convertedMaskTypes;
648 SmallVector<Value> convertedMasks;
649 SmallVector<Type> convertedOffsetTypes;
650 SmallVector<Value> convertedOffsets;
651
652 if (chunkSize > 1) {
653 // For chunked loads, mask and offsets have one less dimension
654 targetMaskShape.pop_back();
655 int64_t blockedChunkSize = targetShape->back();
656 int64_t numNewChunks = chunkSize / blockedChunkSize;
657 chunkSize = blockedChunkSize;
658
659 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
660 convertedOffsetTypes = getUnrolledTypes(offsetsTy, targetMaskShape);
661
662 SmallVector<Value> convertedMasksBase =
663 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
664 SmallVector<Value> convertedOffsetsBase =
665 pack(offsets, convertedOffsetTypes, targetMaskShape, loc, rewriter);
666
667 for (auto maskVal : convertedMasksBase)
668 convertedMasks.append(numNewChunks, maskVal);
669
670 for (auto [baseOffset, offsetType] :
671 llvm::zip(convertedOffsetsBase, convertedOffsetTypes)) {
672 for (int64_t i = 0; i < numNewChunks; ++i) {
673 Value inc = arith::ConstantIndexOp::create(rewriter, loc,
674 i * blockedChunkSize);
675 Value incVec =
676 vector::BroadcastOp::create(rewriter, loc, offsetType, inc);
677 Value offsetVal =
678 arith::AddIOp::create(rewriter, loc, baseOffset, incVec);
679 convertedOffsets.push_back(offsetVal);
680 }
681 }
682 } else {
683 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
684 convertedMasks =
685 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
686
687 convertedOffsetTypes = getUnrolledTypes(offsetsTy, *targetShape);
688 convertedOffsets =
689 pack(offsets, convertedOffsetTypes, *targetShape, loc, rewriter);
690 }
691
692 auto layout = op.getLayoutAttr();
693 if (layout)
694 layout = layout.dropInstData();
695
696 SmallVector<Value> newOps;
697 for (auto [o, m] : llvm::zip(convertedOffsets, convertedMasks)) {
698 auto newOp = xegpu::LoadGatherOp::create(
699 rewriter, loc, newValueTy, op.getSource(), o, m,
700 rewriter.getI64IntegerAttr(chunkSize), op.getL1HintAttr(),
701 op.getL2HintAttr(), op.getL3HintAttr(), layout);
702 newOps.push_back(newOp);
703 }
704
705 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
706 rewriter.replaceOp(op, castOp);
707 return success();
708 }
709};
710
711/// This pattern handles the unrolling of StoreScatterOp with offsets (scattered
712/// store).
713/// It unrolls the offsets and mask operands accordingly, and creates multiple
714/// StoreScatterOp with the unrolled operands.
715struct UnrollStoreScatterOpWithOffsets
716 : public UnrollPattern<xegpu::StoreScatterOp> {
717 using UnrollPattern<xegpu::StoreScatterOp>::UnrollPattern;
718 LogicalResult matchAndRewrite(xegpu::StoreScatterOp op,
719 PatternRewriter &rewriter) const override {
720 Location loc = op.getLoc();
721 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getValue().getType());
722 Value offsets = op.getOffsets();
723 Value mask = op.getMask();
724
725 // Only handle the case where offsets are present (scattered store)
726 if (!offsets)
727 return failure();
728
729 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
730 if (!targetShape)
731 return failure();
732
733 int64_t chunkSize = 1;
734 if (auto chunkSizeAttr = op->getAttr("chunk_size")) {
735 if (auto intAttr = llvm::dyn_cast<IntegerAttr>(chunkSizeAttr))
736 chunkSize = intAttr.getInt();
737 }
738
739 SmallVector<int64_t> targetMaskShape(*targetShape);
740 VectorType maskTy = llvm::dyn_cast<VectorType>(mask.getType());
741 VectorType offsetsTy = llvm::dyn_cast<VectorType>(offsets.getType());
742
743 SmallVector<Type> convertedMaskTypes;
744 SmallVector<Value> convertedMasks;
745 SmallVector<Type> convertedOffsetTypes;
746 SmallVector<Value> convertedOffsets;
747
748 if (chunkSize > 1) {
749 targetMaskShape.pop_back();
750 int64_t blockedChunkSize = targetShape->back();
751 int64_t numNewChunks = chunkSize / blockedChunkSize;
752 chunkSize = blockedChunkSize;
753
754 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
755 convertedOffsetTypes = getUnrolledTypes(offsetsTy, targetMaskShape);
756
757 SmallVector<Value> convertedMasksBase =
758 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
759 SmallVector<Value> convertedOffsetsBase =
760 pack(offsets, convertedOffsetTypes, targetMaskShape, loc, rewriter);
761
762 for (auto maskVal : convertedMasksBase)
763 convertedMasks.append(numNewChunks, maskVal);
764
765 for (auto [baseOffset, offsetType] :
766 llvm::zip(convertedOffsetsBase, convertedOffsetTypes)) {
767 for (int64_t i = 0; i < numNewChunks; ++i) {
768 Value inc = arith::ConstantIndexOp::create(rewriter, loc,
769 i * blockedChunkSize);
770 Value incVec =
771 vector::BroadcastOp::create(rewriter, loc, offsetType, inc);
772 Value offsetVal =
773 arith::AddIOp::create(rewriter, loc, baseOffset, incVec);
774 convertedOffsets.push_back(offsetVal);
775 }
776 }
777 } else {
778 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
779 convertedMasks =
780 pack(mask, convertedMaskTypes, targetMaskShape, loc, rewriter);
781
782 convertedOffsetTypes = getUnrolledTypes(offsetsTy, *targetShape);
783 convertedOffsets =
784 pack(offsets, convertedOffsetTypes, *targetShape, loc, rewriter);
785 }
786
787 SmallVector<Type> convertedValTypes =
788 getUnrolledTypes(valueTy, *targetShape);
789 SmallVector<Value> convertedValues =
790 pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
791
792 auto layout = op.getLayoutAttr();
793 if (layout)
794 layout = layout.dropInstData();
795
796 for (auto [v, o, m] :
797 llvm::zip(convertedValues, convertedOffsets, convertedMasks)) {
798 xegpu::StoreScatterOp::create(rewriter, loc, v, op.getDest(), o, m,
799 rewriter.getI64IntegerAttr(chunkSize),
800 op.getL1HintAttr(), op.getL2HintAttr(),
801 op.getL3HintAttr(), layout);
802 }
803
804 rewriter.eraseOp(op);
805 return success();
806 }
807};
808
809struct UnrollPrefetchOp : public UnrollPattern<xegpu::PrefetchOp> {
810 using UnrollPattern<xegpu::PrefetchOp>::UnrollPattern;
811 LogicalResult matchAndRewrite(xegpu::PrefetchOp op,
812 PatternRewriter &rewriter) const override {
813 Location loc = op.getLoc();
814 xegpu::TensorDescType tdescTy = op.getTensorDescType();
815
816 // TODO: handle the unstructure source case (!tdesTy)
817 if (!tdescTy || op.getOffsets())
818 return failure();
819
820 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
821 if (!targetShape)
822 return failure();
823
824 SmallVector<Type> convertedTdescTypes =
825 getUnrolledTypes(tdescTy, *targetShape);
826 SmallVector<Value> convertedTdesc = pack(
827 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
828
829 for (auto t : convertedTdesc)
830 xegpu::PrefetchOp::create(rewriter, loc, TypeRange(), t,
831 xegpu::dropInstDataOnAttrs(op->getAttrs()));
832
833 rewriter.eraseOp(op);
834 return success();
835 }
836};
837
838struct UnrollStoreScatterOp : public UnrollPattern<xegpu::StoreScatterOp> {
839 using UnrollPattern<xegpu::StoreScatterOp>::UnrollPattern;
840 LogicalResult matchAndRewrite(xegpu::StoreScatterOp op,
841 PatternRewriter &rewriter) const override {
842
843 Location loc = op.getLoc();
844 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getValue().getType());
845 xegpu::TensorDescType tdescTy = op.getTensorDescType();
846
847 // TODO: handle the unstructure source case (!tdesTy)
848 if (!tdescTy || op.getOffsets())
849 return failure();
850
851 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
852 if (!targetShape)
853 return failure();
854
855 SmallVector<int64_t> targetMaskShape(*targetShape);
856 int64_t originalChunkSize = tdescTy.getChunkSizeAsInt();
857
858 VectorType maskTy = llvm::dyn_cast<VectorType>(op.getMask().getType());
859
860 SmallVector<Type> convertedTdescTypes =
861 getUnrolledTypes(tdescTy, *targetShape);
862 SmallVector<Value> convertedTdescs = pack(
863 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
864
865 SmallVector<Type> convertedMaskTypes;
866 SmallVector<Value> convertedMasks;
867
868 if (originalChunkSize > 1) {
869 targetMaskShape.pop_back();
870 int64_t blockedChunkSize = targetShape->back();
871 int64_t numNewChunks = originalChunkSize / blockedChunkSize;
872 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
873
874 // the mask is reused across the chunk_size dimension
875 for (auto mask : pack(op.getMask(), convertedMaskTypes, targetMaskShape,
876 loc, rewriter))
877 convertedMasks.append(numNewChunks, mask);
878 } else {
879 convertedMaskTypes = getUnrolledTypes(maskTy, targetMaskShape);
880 convertedMasks = pack(op.getMask(), convertedMaskTypes, targetMaskShape,
881 loc, rewriter);
882 }
883
884 SmallVector<Type> convertedValTypes =
885 getUnrolledTypes(valueTy, *targetShape);
886 SmallVector<Value> convertedValues =
887 pack(op.getValue(), convertedValTypes, *targetShape, loc, rewriter);
888
889 for (size_t i = 0; i < convertedValues.size(); ++i) {
890 Value v = convertedValues[i];
891 Value t = convertedTdescs[i];
892 Value m = op.getMask() ? convertedMasks[i] : nullptr;
893 xegpu::StoreScatterOp::create(rewriter, loc, v, t, m, op.getL1HintAttr(),
894 op.getL2HintAttr(), op.getL3HintAttr());
895 }
896
897 rewriter.eraseOp(op);
898 return success();
899 }
900};
901
902struct UnrollUpdateOffsetOp : public UnrollPattern<xegpu::UpdateOffsetOp> {
903 using UnrollPattern<xegpu::UpdateOffsetOp>::UnrollPattern;
904 LogicalResult matchAndRewrite(xegpu::UpdateOffsetOp op,
905 PatternRewriter &rewriter) const override {
906 Location loc = op.getLoc();
907 xegpu::TensorDescType tdescTy = op.getTensorDescType();
908
909 if (!tdescTy.isScattered())
910 return failure();
911
912 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
913 if (!targetShape)
914 return failure();
915
916 SmallVector<Type> convertedTdescTypes =
917 getUnrolledTypes(tdescTy, *targetShape);
918 SmallVector<Value> convertedTdesc = pack(
919 op.getTensorDesc(), convertedTdescTypes, *targetShape, loc, rewriter);
920
921 TypedValue<::mlir::VectorType> offsetVec = op.getOffsets();
922 VectorType offsetVecTy = offsetVec.getType();
923 SmallVector<Type> convertedOffsetTypes;
924 SmallVector<Value> convertedOffsetVec;
925 SmallVector<Value> newOps;
926 int64_t originalChunkSize = tdescTy.getChunkSizeAsInt();
927 if (originalChunkSize > 1) {
928 auto targetOffsetShape = ArrayRef<int64_t>(*targetShape).drop_back();
929 convertedOffsetTypes = getUnrolledTypes(offsetVecTy, targetOffsetShape);
930
931 int64_t blockedChunkSize = targetShape->back();
932 int64_t numNewChunks = originalChunkSize / blockedChunkSize;
933 // the offset is reused across the chunk_size dimension
934 for (auto offset : pack(offsetVec, convertedOffsetTypes,
935 targetOffsetShape, loc, rewriter))
936 convertedOffsetVec.append(numNewChunks, offset);
937
938 } else {
939 convertedOffsetTypes = getUnrolledTypes(offsetVecTy, *targetShape);
940 convertedOffsetVec =
941 pack(offsetVec, convertedOffsetTypes, *targetShape, loc, rewriter);
942 }
943
944 for (auto [t, o] : llvm::zip(convertedTdesc, convertedOffsetVec)) {
945 auto newOp =
946 xegpu::UpdateOffsetOp::create(rewriter, loc, t.getType(), t, o);
947 newOps.push_back(newOp);
948 }
949 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
950 rewriter.replaceOp(op, castOp);
951 return success();
952 }
953};
954
955struct UnrollLoadMatrixOp : public UnrollPattern<xegpu::LoadMatrixOp> {
956 using UnrollPattern<xegpu::LoadMatrixOp>::UnrollPattern;
957 LogicalResult matchAndRewrite(xegpu::LoadMatrixOp op,
958 PatternRewriter &rewriter) const override {
959 Location loc = op.getLoc();
960 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getType());
961 assert(valueTy && "the value type must be vector type!");
962
963 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
964 if (!targetShape || targetShape->size() != (size_t)valueTy.getRank())
965 return failure();
966
967 Type elemTy = valueTy.getElementType();
968 ArrayRef<int64_t> shape = valueTy.getShape();
969 auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
970
971 VectorType newValueTy = valueTy.cloneWith(*targetShape, elemTy);
972
973 SmallVector<OpFoldResult> mixedOffsets = op.getMixedOffsets();
975 for (SmallVector<int64_t> offsets :
976 StaticTileOffsetRange(shape, *targetShape)) {
977 auto adds = xegpu::addElementwise(
978 rewriter, loc, mixedOffsets,
979 getAsIndexOpFoldResult(op.getContext(), offsets));
980 offsetsList.push_back(adds);
981 }
982
983 SmallVector<Value> newOps;
984 layout = layout.dropInstData();
985 for (SmallVector<OpFoldResult> offsets : offsetsList) {
986 auto newOp = xegpu::LoadMatrixOp::create(
987 rewriter, op.getLoc(), newValueTy, op.getMemDesc(), offsets, layout);
988 newOps.push_back(newOp);
989 }
990 Value castOp = unpack(newOps, op.getType(), *targetShape, loc, rewriter);
991 rewriter.replaceOp(op, castOp);
992 return success();
993 }
994};
995
996struct UnrollStoreMatrixOp : public UnrollPattern<xegpu::StoreMatrixOp> {
997 using UnrollPattern<xegpu::StoreMatrixOp>::UnrollPattern;
998 LogicalResult matchAndRewrite(xegpu::StoreMatrixOp op,
999 PatternRewriter &rewriter) const override {
1000 std::optional<SmallVector<int64_t>> targetShape = getTargetShape(op);
1001 if (!targetShape)
1002 return failure();
1003
1004 Location loc = op.getLoc();
1005 VectorType valueTy = llvm::dyn_cast<VectorType>(op.getData().getType());
1006 assert(valueTy && "the value type must be vector type!");
1007 ArrayRef<int64_t> shape = valueTy.getShape();
1008 auto layout = dyn_cast<xegpu::LayoutAttr>(op.getLayoutAttr());
1009
1010 SmallVector<Type> convertedValTypes =
1011 getUnrolledTypes(valueTy, *targetShape);
1012 SmallVector<Value> convertedValues =
1013 pack(op.getData(), convertedValTypes, *targetShape, loc, rewriter);
1014
1015 SmallVector<OpFoldResult> mixedOffsets = op.getMixedOffsets();
1017 for (SmallVector<int64_t> offsets :
1018 StaticTileOffsetRange(shape, *targetShape)) {
1019 auto adds = xegpu::addElementwise(
1020 rewriter, loc, mixedOffsets,
1021 getAsIndexOpFoldResult(op.getContext(), offsets));
1022 offsetsList.push_back(adds);
1023 }
1024
1025 for (auto [v, offsets] : llvm::zip_equal(convertedValues, offsetsList))
1026 xegpu::StoreMatrixOp::create(rewriter, loc, v, op.getMemDesc(), offsets,
1027 layout.dropInstData());
1028
1029 rewriter.eraseOp(op);
1030 return success();
1031 }
1032};
1033
1034} // namespace
1035
1038 patterns
1039 .add<UnrollCreateNdOp, UnrollUpdateNdOffsetOp, UnrollPrefetchNdOp,
1040 UnrollLoadNdOp, UnrollStoreNdOp, UnrollDpasOp, UnrollCreateDescOp,
1041 UnrollLoadGatherOp, UnrollStoreScatterOp, UnrollPrefetchOp,
1042 UnrollUpdateOffsetOp, UnrollLoadMatrixOp, UnrollStoreMatrixOp,
1043 UnrollLoadGatherOpWithOffset, UnrollStoreScatterOpWithOffsets>(
1044 patterns.getContext(), options);
1045}
return success()
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
static llvm::ManagedStatic< PassManagerOptions > options
static std::optional< SmallVector< int64_t > > getTargetShape(const vector::UnrollVectorOptions &options, Operation *op)
Return the target shape for unrolling for the given op.
UnitAttr getUnitAttr()
Definition Builders.cpp:98
DenseI64ArrayAttr getDenseI64ArrayAttr(ArrayRef< int64_t > values)
Definition Builders.cpp:167
IntegerAttr getI64IntegerAttr(int64_t value)
Definition Builders.cpp:112
StringAttr getStringAttr(const Twine &bytes)
Definition Builders.cpp:262
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
NamedAttribute represents a combination of a name and an Attribute value.
Definition Attributes.h:164
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:526
This class represents a single result from folding an operation.
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:359
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
void populateXeGPUUnrollPatterns(RewritePatternSet &patterns, const UnrollOptions &options)
Collect a set of patterns to unroll xegpu operations to a smaller shapes.
SmallVector< NamedAttribute > dropInstDataOnAttrs(ArrayRef< NamedAttribute > attrs)
Updates the NamedAttribute sequence by dropping inst-data information from any DistributeLayoutAttr f...
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
Include the generated interface declarations.
OpFoldResult getAsIndexOpFoldResult(MLIRContext *ctx, int64_t val)
Convert int64_t to integer attributes of index type and return them as OpFoldResult.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
int64_t computeProduct(ArrayRef< int64_t > basis)
Self-explicit.
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition Value.h:497
const FrozenRewritePatternSet & patterns
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Options to control the XeGPU unrolling.
Definition Transforms.h:27
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.