MLIR 23.0.0git
XeGPUUtils.cpp
Go to the documentation of this file.
1//===---- XeGPUUtils.cpp - MLIR Utilities for XeGPUOps ------------------===//
2//
3// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements utility methods for working with the XeGPU dialect.
10//
11//===----------------------------------------------------------------------===//
12
20#include "mlir/IR/Builders.h"
21#include "mlir/IR/Operation.h"
22#include "mlir/IR/ValueRange.h"
25#include "llvm/Support/Casting.h"
26#include "llvm/Support/FormatVariadic.h"
27#include <cstdint>
28#include <numeric>
29
30using namespace mlir;
31
32/// convert ArrayRef<ValueRange> into SmallVector<Value>
35 for (const auto &vals : values)
36 llvm::append_range(result, vals);
37 return result;
38}
39
40FailureOr<VectorType>
41mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) {
42 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
43 // It only works for subgroup level layout, which only has lane_layout
44 // and lane_data, and is to distribute a SIMD code into SIMT code.
45 if (!layout || !layout.isForSubgroup())
46 return failure();
47
48 SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
49 SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
50 auto tdescShape = tdescTy.getShape();
51 auto elementType = tdescTy.getElementType();
52
53 // compute sgSize by multiply elements of laneLayout
54 // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
55 // e.g. for 1D layout, sgSize = laneLayout[0]
56 int64_t sgSize = llvm::product_of(laneLayout);
57
58 // Check if the tensor descriptor shape is distributable.
59 int64_t tensorSize = 1;
60 for (auto [tdescDim, laneDim, laneDataDim] :
61 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
62 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
63 "tensor descriptor shape is not distributable");
64 tensorSize *= tdescDim;
65 }
66 // tensorSize must be adjusted for array_length.
67 tensorSize *= tdescTy.getArrayLength();
68
69 return VectorType::get({tensorSize / sgSize}, elementType);
70}
71
72FailureOr<VectorType>
73mlir::xegpu::getDistributedVectorType(VectorType originalType,
74 xegpu::LayoutAttr layout) {
75 int64_t rank = originalType.getRank();
76 if (rank < 1)
77 return failure();
78 ArrayRef<int64_t> shape = originalType.getShape();
79 // For rank > 2, leading dimensions are treated as batch/array dimensions.
80 // Drop them and use the product as arrayLength.
81 int arrayLength = 1;
82 while (shape.size() > 2) {
83 arrayLength *= shape[0];
84 shape = shape.drop_front();
85 }
86 // Drop matching leading dims from layout if the layout rank exceeds the
87 // remaining shape rank.
88 auto laneLayout = layout.getEffectiveLaneLayoutAsInt();
89 auto laneData = layout.getEffectiveLaneDataAsInt();
90 while (!laneLayout.empty() && laneLayout.size() > shape.size()) {
91 laneLayout.erase(laneLayout.begin());
92 laneData.erase(laneData.begin());
93 }
94 auto trimmedLayout = xegpu::LayoutAttr::get(
95 layout.getContext(),
96 SmallVector<int32_t>(laneLayout.begin(), laneLayout.end()),
97 SmallVector<int32_t>(laneData.begin(), laneData.end()));
98 auto helperTdescTy = xegpu::TensorDescType::get(
99 shape, originalType.getElementType(), arrayLength,
100 /*boundary_check=*/true,
101 /*memory_space=*/xegpu::MemorySpace::Global, trimmedLayout);
102 return xegpu::getDistributedVectorType(helperTdescTy);
103}
104
105FailureOr<VectorType>
106xegpu::getDistVecTypeBasedOnLaneLayout(xegpu::DistributeLayoutAttr layout,
107 VectorType originalType) {
108 if (!layout)
109 return failure();
110 assert((isa<xegpu::LayoutAttr>(layout) || isa<xegpu::SliceAttr>(layout)) &&
111 "Expecting a valid layout.");
112
113 int64_t vectorRank = originalType.getRank();
114 int64_t layoutRank = layout.getRank();
115 assert(vectorRank >= layoutRank && "Vector rank must be >= layout rank.");
116
117 // When the vector has more dimensions than the layout, only the trailing
118 // dimensions are distributed. Leading dimensions are preserved as-is.
119 int64_t offset = vectorRank - layoutRank;
120 ArrayRef<int64_t> fullShape = originalType.getShape();
121 SmallVector<int64_t> trailingShape(fullShape.begin() + offset,
122 fullShape.end());
123 auto distributedShapeOrFailure =
124 layout.computeDistributedShape(trailingShape);
125 if (failed(distributedShapeOrFailure))
126 return failure();
127
128 SmallVector<int64_t> resultShape(fullShape.begin(),
129 fullShape.begin() + offset);
130 resultShape.append(distributedShapeOrFailure->begin(),
131 distributedShapeOrFailure->end());
132 return VectorType::get(resultShape, originalType.getElementType());
133}
134
135std::string xegpu::getTemporaryLayoutName(const OpOperand &operand) {
136 const StringRef prefix("layout_operand_");
137 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
138 return llvm::formatv("{0}{1}", prefix, idx).str();
139}
140
142 const StringRef prefix = "layout_result_";
143 return llvm::formatv("{0}{1}", prefix, result.getResultNumber()).str();
144}
145
146xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {
147 if (!value)
148 return nullptr;
149
150 if (auto result = dyn_cast<OpResult>(value)) {
151 Operation *defOp = result.getDefiningOp();
152 assert(defOp && "result must have a defining op");
153
154 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
155 auto layout = anchorOp.getAnchorLayout();
156 return layout;
157 }
158
159 std::string layoutName = getTemporaryLayoutName(result);
160 if (defOp->hasAttr(layoutName)) {
161 auto layout =
162 defOp->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
163 return layout;
164 }
165 }
166
167 if (auto arg = dyn_cast<BlockArgument>(value)) {
168 auto *parentOp = arg.getOwner()->getParentOp();
169 if (auto loop = dyn_cast_if_present<LoopLikeOpInterface>(parentOp)) {
170 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
171 if (tiedInit)
172 return getTemporaryLayout(*tiedInit);
173 }
174 }
175
176 if (auto tdescTy =
177 dyn_cast_if_present<xegpu::TensorDescType>(value.getType()))
178 return tdescTy.getLayoutAttr();
179
180 return nullptr;
181}
182xegpu::DistributeLayoutAttr
184 Operation *op = opr.getOwner();
185 unsigned idx = const_cast<OpOperand &>(opr).getOperandNumber();
186
187 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(op)) {
188 if (auto dpasOp = dyn_cast<xegpu::DpasOp>(op)) {
189 if (idx == 0) {
190 return dpasOp.getLayoutAAttr();
191 } else if (idx == 1) {
192 return dpasOp.getLayoutBAttr();
193 } else if (idx == 2) {
194 return dpasOp.getLayoutCdAttr();
195 }
196 }
197 if (auto dpasMxOp = dyn_cast<xegpu::DpasMxOp>(op)) {
198 // DpasMxOp has operands: a, b, optional acc, optional scale_a, optional
199 // scale_b
200 unsigned currentIdx = 0;
201
202 if (idx == currentIdx++)
203 return dpasMxOp.getLayoutAAttr();
204
205 if (idx == currentIdx++)
206 return dpasMxOp.getLayoutBAttr();
207
208 if (dpasMxOp.getAcc())
209 if (idx == currentIdx++)
210 return dpasMxOp.getLayoutCdAttr();
211
212 if (dpasMxOp.getScaleA())
213 if (idx == currentIdx++)
214 return dpasMxOp.getLayoutAScaleAttr();
215
216 if (dpasMxOp.getScaleB())
217 if (idx == currentIdx++)
218 return dpasMxOp.getLayoutBScaleAttr();
219
220 return nullptr;
221 }
222 if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
223 return convertOp.getInputLayoutAttr();
224 }
225 auto layout = anchorOp.getAnchorLayout();
226
227 if (idx == 0)
228 return layout;
229
230 // For StoreNdOp and StoreMatrixOp,
231 // the layout is valid for the first two operands: value and memref/tdesc.
232 if (isa<xegpu::StoreNdOp, xegpu::StoreMatrixOp>(op) && (idx < 2))
233 return layout;
234
235 if (isa<xegpu::StoreScatterOp>(op)) {
236 xegpu::StoreScatterOp store(op);
237 int chunkSize = store.getChunkSize().value_or(1);
238 if (layout && idx >= 2 && chunkSize > 1)
239 return layout.dropDims(llvm::to_vector(
240 llvm::seq<int64_t>(layout.getRank() - 1, layout.getRank())));
241 return layout;
242 }
243 if (isa<xegpu::LoadGatherOp>(op)) {
244 xegpu::LoadGatherOp load(op);
245 int chunkSize = load.getChunkSize().value_or(1);
246 if (layout && idx >= 1 && chunkSize > 1)
247 return layout.dropDims(llvm::to_vector(
248 llvm::seq<int64_t>(layout.getRank() - 1, layout.getRank())));
249 return layout;
250 }
251 }
252
253 std::string layoutName = xegpu::getTemporaryLayoutName(opr);
254 if (op->hasAttr(layoutName)) {
255 auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
256 return layout;
257 }
258
259 return nullptr;
260}
261
262// Returns the permanent layout attribute for the given result if it's
263// available on the defining op. Otherwise returns the provided layout.
264xegpu::DistributeLayoutAttr
265maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
266 const OpResult &result, mlir::Operation *owner,
267 const std::string &name) {
268 xegpu::DistributeLayoutAttr candidate = layout;
269
270 if (auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
271 if (auto perm = loadOp.getLayoutAttr())
272 candidate = perm;
273 }
274
275 return candidate;
276}
277
278// Returns the permanent layout attribute for the given operand if it's
279// available on the defining op. Otherwise returns the provided layout.
280xegpu::DistributeLayoutAttr
281maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,
282 const OpOperand &operand, mlir::Operation *owner,
283 const std::string &name) {
284 xegpu::DistributeLayoutAttr candidate = layout;
285 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
286
287 if (auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
288 if (idx == 0) {
289 if (auto perm = storeOp.getLayoutAttr())
290 candidate = perm;
291 }
292 }
293
294 return candidate;
295}
296
297// TODO-LayoutRefactor: Remove this function after replacing use
298// with setTemporaryLayout or setAnchorLayout
300 const mlir::OpResult &result,
301 const mlir::xegpu::DistributeLayoutAttr layout) {
302 Operation *owner = result.getOwner();
303
304 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
305 if (anchorOp.getAnchorLayout() == layout)
306 return;
307 anchorOp.setAnchorLayout(layout);
308 return;
309 }
310
311 std::string name = xegpu::getTemporaryLayoutName(result);
312 if (owner->hasAttrOfType<DistributeLayoutAttr>(name)) {
313 return;
314 }
315 if (layout) {
316 owner->setAttr(name, layout);
317 }
318}
319
320// TODO-LayoutRefactor: Remove this function after replacing use
321// with setTemporaryLayout or setAnchorLayout
323 const DistributeLayoutAttr layout) {
324 Operation *owner = operand.getOwner();
325 unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
326
327 if (!layout) {
328 return;
329 }
330 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
331 if (auto dpasOp = dyn_cast<xegpu::DpasOp>(owner)) {
332 if (idx == 0) {
333 return dpasOp.setLayoutAAttr(layout);
334 } else if (idx == 1) {
335 return dpasOp.setLayoutBAttr(layout);
336 } else if (idx == 2) {
337 return dpasOp.setLayoutCdAttr(layout);
338 }
339 }
340 if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(owner)) {
341 return convertOp.setInputLayoutAttr(layout);
342 }
343
344 // For store operations (StoreScatterOp, StoreNdOp, StoreMatrixOp),
345 // the layout is valid for the first two operands: value and memref/tdesc.
346 // For other operations, the layout applies to the first operand only.
347 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
348 owner)) {
349 if (idx < 2) {
350 anchorOp.setAnchorLayout(layout);
351 }
352 } else {
353 if (idx == 0) {
354 anchorOp.setAnchorLayout(layout);
355 }
356 }
357 }
358
359 std::string name = xegpu::getTemporaryLayoutName(operand);
360 if (owner->hasAttrOfType<DistributeLayoutAttr>(name)) {
361 return;
362 }
363 if (layout) {
364 owner->setAttr(name, layout);
365 }
366}
367
368template <typename T, typename>
369xegpu::DistributeLayoutAttr
370xegpu::getTemporaryLayout(const T &operandOrResult) {
371 Operation *op = operandOrResult.getOwner();
372
373 std::string layoutName = xegpu::getTemporaryLayoutName(operandOrResult);
374 if (op->hasAttr(layoutName)) {
375 auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
376 return layout;
377 }
378
379 return nullptr;
380}
381
382template xegpu::DistributeLayoutAttr
384template xegpu::DistributeLayoutAttr
386
387template <typename T, typename>
388void xegpu::setTemporaryLayout(const T &operandOrResult,
389 const xegpu::DistributeLayoutAttr layout) {
390 Operation *owner = operandOrResult.getOwner();
391 std::string name = xegpu::getTemporaryLayoutName(operandOrResult);
392 if (owner->hasAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
393 return;
394 }
395 if (layout) {
396 owner->setAttr(name, layout);
397 }
398}
399
401 const mlir::OpResult &result,
402 const mlir::xegpu::DistributeLayoutAttr layout);
403
405 const mlir::OpOperand &operand,
406 const mlir::xegpu::DistributeLayoutAttr layout);
407
411 auto vecTy = dyn_cast<VectorType>(value.getType());
412 if (!vecTy)
413 return {value};
414
415 ArrayRef<int64_t> srcShape = vecTy.getShape();
416 if (!computeShapeRatio(srcShape, shape))
417 return {value};
418
419 int64_t srcShapeRank = srcShape.size();
420 int64_t targetShapeRank = shape.size();
421
422 SmallVector<int64_t> adjustedTargetShape(srcShape.size());
423 int64_t rankDiff = srcShapeRank - targetShapeRank;
424 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
425 1);
426 llvm::copy(shape, adjustedTargetShape.begin() + rankDiff);
427
429 for (SmallVector<int64_t> offsets :
430 StaticTileOffsetRange(srcShape, adjustedTargetShape)) {
431 SmallVector<int64_t> staticStrides(offsets.size(), 1);
432 Value slice = vector::ExtractStridedSliceOp::create(
433 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
434
435 // Reshape to remove leading unit dims if needed
436 if (srcShapeRank > targetShapeRank) {
437 auto targetTy = VectorType::get(shape, vecTy.getElementType());
438 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
439 }
440 result.push_back(slice);
441 }
442
443 return result;
444}
445
447 ValueRange values,
449 VectorType inputTy = dyn_cast<VectorType>(values[0].getType());
450 assert(llvm::all_of(values.getTypes(),
451 [&](Type type) { return type == inputTy; }) &&
452 "values must be of the same VectorType");
453
454 Type elemTy = inputTy.getElementType();
455 ArrayRef<int64_t> tileShape = inputTy.getShape();
456
457 VectorType resultTy = VectorType::get(shape, elemTy);
458 auto zeroAttr = builder.getZeroAttr(elemTy);
459 Value result = arith::ConstantOp::create(
460 builder, loc, resultTy, DenseElementsAttr::get(resultTy, zeroAttr));
461
462 for (auto [src, offsets] :
463 llvm::zip_equal(values, StaticTileOffsetRange(shape, tileShape))) {
464 SmallVector<int64_t> staticStrides(tileShape.size(), 1);
465 result = vector::InsertStridedSliceOp::create(builder, loc, src, result,
466 offsets, staticStrides);
467 }
468 return result;
469}
470
472 Operation *op, TypeConverter converter) {
473 MLIRContext *context = op->getContext();
474
475 auto materializeCast = [](OpBuilder &builder, Type type, ValueRange inputs,
476 Location loc) -> Value {
477 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
478 .getResult(0);
479 };
480
481 { // convert VectorType to RankedTensorType for SCF Structural ops
482 TypeConverter converter;
483 converter.addConversion([](Type type) -> Type { return type; });
484 converter.addConversion([](VectorType type) -> Type {
485 return RankedTensorType::get(type.getShape(), type.getElementType());
486 });
487 converter.addSourceMaterialization(materializeCast);
488 converter.addTargetMaterialization(materializeCast);
489
490 mlir::ConversionTarget target(*context);
491 target.addLegalOp<UnrealizedConversionCastOp>();
492
493 mlir::RewritePatternSet patterns(context);
495 target);
496 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
497 }
498
499 { // propagate the layout attribute to RankedTensorType by checking
500 // BuiltInUnrealizedCastOps
501 // for VectorType to RankedTensorType cast.
502 op->walk([](UnrealizedConversionCastOp castOp) {
503 if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
504 return WalkResult::skip();
505
506 Value input = castOp.getInputs()[0];
507 Value result = castOp.getResults()[0];
508 auto inputTy = dyn_cast<VectorType>(input.getType());
509 auto resultTy = dyn_cast<RankedTensorType>(result.getType());
510
511 // Only look at ops casting from VectorType to RankedTensorType
512 if (!inputTy || !resultTy)
513 return WalkResult::skip();
514
515 xegpu::DistributeLayoutAttr layout =
517 if (!layout)
518 return WalkResult::skip();
519
520 RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
521 result.setType(newTy);
522
523 // update the arguments if user is a LoopLike op.
524 for (OpOperand &use : result.getUses()) {
525 if (auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
526 BlockArgument arg = loop.getTiedLoopRegionIterArg(&use);
527 arg.setType(newTy);
528 }
529 // whileOp has two regions, the BlockArgument of the after region
530 // is not exposed by LoopLikeOpInterface
531 if (auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
532 unsigned idx = use.getOperandNumber();
533 BlockArgument arg = whileOp.getAfterArguments()[idx];
534 arg.setType(newTy);
535 }
536 }
537 return WalkResult::advance();
538 });
539
540 // using yieldOp as anchor to update the result type of its ParentOp
541 op->walk([](scf::YieldOp yieldOp) {
542 Operation *parentOp = yieldOp->getParentOp();
543 for (OpResult r : parentOp->getOpResults()) {
544 unsigned idx = r.getResultNumber();
545 Type resultTy = r.getType();
546 Type yieldTy = yieldOp.getResults()[idx].getType();
547 if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
548 r.setType(yieldTy);
549 }
550 });
551 }
552
553 { // perform the conversion from RankedTensorType to VectorType based on the
554 // DistributeLayoutAttr
555
556 // Handle the UnrealizedConversionCastOp introduced by the first step.
557 // For vector->RankedTensorType, it will simply forward the inputs.
558 // For RankedTensorType->vector, it will update the inputs with the
559 // one from the adaptor.
560 class UnrealizedConversionCastOpPattern
561 : public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
562 using OpConversionPattern<
563 mlir::UnrealizedConversionCastOp>::OpConversionPattern;
564
565 mlir::LogicalResult
566 matchAndRewrite(mlir::UnrealizedConversionCastOp op,
567 OneToNOpAdaptor adaptor,
568 ConversionPatternRewriter &rewriter) const override {
569 auto inputs = op.getOperands();
570 auto outputs = op.getOutputs();
571
572 if (inputs.size() != 1 || outputs.size() != 1)
573 return failure();
574
575 auto inputTy = inputs[0].getType();
576 auto outputTy = outputs[0].getType();
577
578 if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
579 rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
580 return success();
581 }
582
583 if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
584 SmallVector<Value> values = xegpu::flattenValues(adaptor.getInputs());
585 auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
586 outputTy, values);
587 rewriter.replaceOp(op, newOp);
588 return success();
589 }
590 return failure();
591 }
592 };
593
594 converter.addSourceMaterialization(materializeCast);
595 converter.addTargetMaterialization([&](OpBuilder &builder, TypeRange type,
596 ValueRange inputs, Location loc) {
597 return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
598 .getResults();
599 });
600
601 mlir::ConversionTarget target(*context);
602 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
603 [](UnrealizedConversionCastOp op) {
604 auto isTensorTy = [](Type type) {
605 return isa<RankedTensorType>(type);
606 };
607 return llvm::none_of(op->getOperandTypes(), isTensorTy) &&
608 llvm::none_of(op->getResultTypes(), isTensorTy);
609 });
610 mlir::RewritePatternSet patterns(context);
611 patterns.insert<UnrealizedConversionCastOpPattern>(context);
613 target);
614 (void)mlir::applyPartialConversion(op, target, std::move(patterns));
615 }
616}
617
618std::optional<std::string> xegpu::getChipStr(Operation *op) {
619 auto gpuModuleOp = op->getParentOfType<gpu::GPUModuleOp>();
620
621 if (!gpuModuleOp)
622 return std::nullopt;
623
624 auto targetAttrs = gpuModuleOp.getTargets();
625 if (targetAttrs) {
626 for (auto &attr : *targetAttrs) {
627 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
628 if (xevmAttr)
629 return xevmAttr.getChip().str();
630 }
631 }
632
633 return std::nullopt;
634}
635
636/// Generates element-wise addition ops of two arrays with same length.
638 Location loc,
641 assert(lhs.size() == rhs.size() && "lhs and rhs must have the same size");
643 for (auto [l, r] : llvm::zip_equal(lhs, rhs)) {
644 auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
645 auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
646 results.push_back(builder.createOrFold<arith::AddIOp>(loc, lval, rval));
647 }
648 return results;
649}
650
651/// Generates element-wise addition ops of two arrays with automatic alignment.
652/// When the input arrays have different sizes, the shorter array is
653/// right-aligned with the longer array, and the unmatched leading elements from
654/// the longer array are preserved unchanged. This is commonly used for offset
655/// computation where higher-dimensional offsets need to be added to
656/// lower-dimensional adjustments.
657///
658/// Example:
659/// lhs = [l1, l2, l3], rhs = [r1, r2]
660/// Result: [11, l2+r1, l3+r2]
665 // ensure a is longer than b
666 ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
667 ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
668 SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
669 a = a.slice(a.size() - b.size());
670 results.append(addElementwise(builder, loc, a, b));
671 return results;
672}
673
674template <typename T>
676 ArrayRef<T> candidateMultiples) {
677 static_assert(std::is_integral<T>::value, "T must be an integer type");
678 int largest = -1;
679 SmallVector<T> multiples = {1};
680 if (!candidateMultiples.empty())
681 multiples =
682 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
683 for (T candidate : candidates) {
684 for (T multiple : multiples) {
685 int value = static_cast<int>(candidate * multiple);
686 if (value != 0 && dim % value == 0 && value > largest)
687 largest = value;
688 }
689 }
690 return largest;
691}
692
694 vector::CombiningKind kind, uint32_t size) {
695 // First reduce on a single thread to get per lane reduction value.
696 Value laneVal = vector::ReductionOp::create(builder, loc, kind, input);
697 // Parallel reduction using butterfly shuffles.
698 for (uint64_t i = 1; i < size; i <<= 1) {
699 Value shuffled =
700 gpu::ShuffleOp::create(builder, loc, laneVal, i, /** width = **/ size,
701 /** mode = **/ gpu::ShuffleMode::XOR)
702 .getShuffleResult();
703 laneVal = makeArithReduction(builder, loc, kind, laneVal, shuffled);
704 }
705 return laneVal;
706}
707
710 vector::CombiningKind kind,
711 int64_t reductionDim, Location loc,
712 PatternRewriter &rewriter) {
713 VectorType sourceType = src.getType();
714 int64_t sourceRank = sourceType.getRank();
715 // Expecting at least a 2D source vector. Leading dimensions (all except the
716 // last two) must be unit.
717 assert(sourceRank >= 2 && "expected at least a 2D source vector");
718 for (int64_t i = 0; i < sourceRank - 2; ++i)
719 assert(sourceType.getShape()[i] == 1 &&
720 "expected leading dimensions to be unit");
721 int64_t rowIdx = sourceRank - 2;
722 int64_t columnIdx = sourceRank - 1;
723 int64_t sourceH = sourceType.getShape()[rowIdx];
724 int64_t sourceW = sourceType.getShape()[columnIdx];
725 int nSlices = (reductionDim == rowIdx) ? sourceW : sourceH;
726 // Create a constant vector to hold the result of the reduction.
727 TypedAttr zeroAttr = rewriter.getZeroAttr(sourceType.getElementType());
728 Value reductionResult = arith::ConstantOp::create(
729 rewriter, loc, acc.getType(),
730 DenseElementsAttr::get(acc.getType(), zeroAttr));
731 auto srcLayout = xegpu::getTemporaryLayout(dyn_cast<OpResult>(src));
732 auto accLayout = xegpu::getTemporaryLayout(dyn_cast<OpResult>(acc));
733 // Reduction result should have the same layout as the accumulator.
734 xegpu::setTemporaryLayout(cast<OpResult>(reductionResult), accLayout);
735 // For each slice of the source, extract the slice vector, do a reduction
736 // and, insert the reduced value back to the result vector.
737 int64_t accRank = acc.getType().getRank();
738 for (int i = 0; i < nSlices; ++i) {
739 // Build nD offsets, sizes, and strides. Leading unit dims get
740 // offset=0, size=1. The last two dims are set based on reductionDim.
741 SmallVector<int64_t> sliceOffsets(sourceRank, 0);
742 SmallVector<int64_t> sliceSizes(sourceRank, 1);
743 SmallVector<int64_t> strides(sourceRank, 1);
744 if (reductionDim == columnIdx) {
745 sliceOffsets[rowIdx] = i;
746 sliceSizes[columnIdx] = sourceW;
747 } else {
748 sliceOffsets[columnIdx] = i;
749 sliceSizes[rowIdx] = sourceH;
750 }
751
752 vector::ExtractStridedSliceOp extractOp =
753 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
754 sliceSizes, strides);
755 // Extract strided slice has the same layout as src.
756 xegpu::setTemporaryLayout(extractOp->getOpResult(0), srcLayout);
757
758 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
759
760 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
761 rewriter, loc,
762 VectorType::get({nSliceElements}, sourceType.getElementType()),
763 extractOp.getResult());
764
765 // Shape cast output has the same layout as the accumulator. Shape cast
766 // source has the same layout as the original reduction source.
767 xegpu::setTemporaryLayout(slice->getOpOperand(0), srcLayout);
768 xegpu::setTemporaryLayout(slice->getOpResult(0), accLayout);
769 // Extract and reduction results in scalars, so no result layout is needed.
770 // Build multi-dim index into acc (sourceRank-1 dims, i.e. source shape with
771 // the reduction dim removed). Leading unit dims get index 0.
772 SmallVector<int64_t> accIdx(accRank, 0);
773 accIdx[accRank - 1] = i;
774 Value accExtract = vector::ExtractOp::create(rewriter, loc, acc, accIdx);
775 Value reduction = vector::ReductionOp::create(
776 rewriter, loc, kind, slice.getResult(), accExtract);
777 reductionResult = vector::InsertOp::create(rewriter, loc, reduction,
778 reductionResult, accIdx);
779 // Insert op should have the same layout as the accumulator.
780 xegpu::setTemporaryLayout(cast<OpResult>(reductionResult), accLayout);
781 }
782 return reductionResult;
783}
784
787 vector::CombiningKind kind, int64_t reductionDim, int64_t reductionSize,
788 Location loc, PatternRewriter &rewriter) {
789 VectorType sourceType = src.getType();
790 int64_t sourceRank = sourceType.getRank();
791 // Expecting at least a 2D source vector. Leading dimensions (all except the
792 // last two) must be unit.
793 assert(sourceRank >= 2 && "expected at least a 2D source vector");
794 for (int64_t i = 0; i < sourceRank - 2; ++i)
795 assert(sourceType.getShape()[i] == 1 &&
796 "expected leading dimensions to be unit");
797 int64_t rowIdx = sourceRank - 2;
798 int64_t columnIdx = sourceRank - 1;
799 int64_t sourceH = sourceType.getShape()[rowIdx];
800 int64_t sourceW = sourceType.getShape()[columnIdx];
801
802 // Create a constant vector to hold the result of the reduction.
803 TypedAttr zeroAttr = rewriter.getZeroAttr(sourceType.getElementType());
804 Value reductionResult = arith::ConstantOp::create(
805 rewriter, loc, acc.getType(),
806 DenseElementsAttr::get(acc.getType(), zeroAttr));
807
808 // nSlices is the number of reduction operations needed to reduce the entire
809 // source vector. For example, if reductionDim is the row dim, we are
810 // reducing across rows, and each slice is a column. So the number of slices
811 // is the number of columns, which is sourceW.
812 int nSlices = (reductionDim == rowIdx) ? sourceW : sourceH;
813
814 // For each slice of the source, extract the slice vector, do a reduction
815 // and, insert the reduced value back to the result vector.
816 int64_t accRank = acc.getType().getRank();
817 for (int i = 0; i < nSlices; ++i) {
818 // Build nD offsets, sizes, and strides. Leading unit dims get
819 // offset=0, size=1. The last two dims are set based on reductionDim.
820 SmallVector<int64_t> sliceOffsets(sourceRank, 0);
821 SmallVector<int64_t> sliceSizes(sourceRank, 1);
822 SmallVector<int64_t> strides(sourceRank, 1);
823 if (reductionDim == columnIdx) {
824 sliceOffsets[rowIdx] = i;
825 sliceSizes[columnIdx] = sourceW;
826 } else {
827 sliceOffsets[columnIdx] = i;
828 sliceSizes[rowIdx] = sourceH;
829 }
830
831 vector::ExtractStridedSliceOp extractOp =
832 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
833 sliceSizes, strides);
834 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
835 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
836 rewriter, loc,
837 VectorType::get({nSliceElements}, sourceType.getElementType()),
838 extractOp.getResult());
839
840 SmallVector<int64_t> accIdx(accRank, 0);
841 accIdx[accRank - 1] = i;
842 Value accExtract = vector::ExtractOp::create(rewriter, loc, acc, accIdx);
843 Value fullReduce =
844 xegpu::subgroupReduction(loc, rewriter, slice, kind, reductionSize);
845 fullReduce =
846 vector::makeArithReduction(rewriter, loc, kind, fullReduce, accExtract);
847 reductionResult = vector::InsertOp::create(rewriter, loc, fullReduce,
848 reductionResult, accIdx);
849 }
850 return reductionResult;
851}
852
854 Type type,
855 vector::CombiningKind kind) {
856 auto vecTy = dyn_cast<VectorType>(type);
857 Type elemTy = vecTy ? vecTy.getElementType() : type;
858
859 // Helper to create either a splat vector or scalar constant from an attr.
860 auto makeConst = [&](Attribute scalarAttr) -> Value {
861 if (vecTy)
862 return arith::ConstantOp::create(
863 builder, loc, vecTy, DenseElementsAttr::get(vecTy, scalarAttr));
864 return arith::ConstantOp::create(builder, loc, cast<TypedAttr>(scalarAttr));
865 };
866
867 switch (kind) {
868 case vector::CombiningKind::ADD:
869 case vector::CombiningKind::XOR:
870 case vector::CombiningKind::OR:
871 case vector::CombiningKind::MAXUI:
872 return makeConst(builder.getZeroAttr(elemTy));
873
874 case vector::CombiningKind::MUL:
875 case vector::CombiningKind::AND:
876 return makeConst(builder.getOneAttr(elemTy));
877
878 case vector::CombiningKind::MINSI:
879 if (auto intTy = dyn_cast<IntegerType>(elemTy))
880 return makeConst(builder.getIntegerAttr(
881 elemTy, APInt::getSignedMaxValue(intTy.getWidth())));
882 return nullptr;
883
884 case vector::CombiningKind::MINUI:
885 if (auto intTy = dyn_cast<IntegerType>(elemTy))
886 return makeConst(
887 builder.getIntegerAttr(elemTy, APInt::getMaxValue(intTy.getWidth())));
888 return nullptr;
889
890 case vector::CombiningKind::MAXSI:
891 if (auto intTy = dyn_cast<IntegerType>(elemTy))
892 return makeConst(builder.getIntegerAttr(
893 elemTy, APInt::getSignedMinValue(intTy.getWidth())));
894 return nullptr;
895
896 case vector::CombiningKind::MINNUMF:
897 case vector::CombiningKind::MINIMUMF:
898 if (auto floatTy = dyn_cast<FloatType>(elemTy))
899 return makeConst(builder.getFloatAttr(
900 elemTy, APFloat::getInf(floatTy.getFloatSemantics())));
901 return nullptr;
902
903 case vector::CombiningKind::MAXNUMF:
904 case vector::CombiningKind::MAXIMUMF:
905 if (auto floatTy = dyn_cast<FloatType>(elemTy))
906 return makeConst(builder.getFloatAttr(
907 elemTy, APFloat::getInf(floatTy.getFloatSemantics(), true)));
908 return nullptr;
909 }
910 return nullptr;
911}
912
913/// Explicit instantiations
914template int xegpu::getLargestDivisor<int>(int dim, ArrayRef<int> candidates,
915 ArrayRef<int> candidateMultiples);
916template int
918 ArrayRef<unsigned> candidateMultiples);
919
920bool xegpu::requirePacked(const xegpu::DistributeLayoutAttr layout) {
921 if (!layout)
922 return false;
923 auto laneData = layout.getEffectiveLaneDataAsInt();
924 if (laneData.size() != 2)
925 return false;
926 return laneData[0] != 1;
927}
928
929bool xegpu::requireTranspose(const xegpu::DistributeLayoutAttr layout,
930 const xegpu::uArch::uArch *uArch) {
931 // Return false for unsupported targets.
932 // TODO: Add more support or move to target info.
933 if (uArch->getName().equals_insensitive("pvc") &&
934 uArch->getName().equals_insensitive("bmg") &&
935 uArch->getName().equals_insensitive("cri"))
936 return false;
937 if (!layout)
938 return false;
939 auto laneLayout = layout.getEffectiveLaneLayoutAsInt();
940 if (laneLayout.size() != 2)
941 return false;
942 return laneLayout[0] == uArch->getSubgroupSize() && laneLayout[1] == 1;
943}
944
945// Check if dst shape is an expansion of src shape by inserting unit dimensions.
946// Returns true if all dimensions in src match corresponding dimensions in dst
947// (after skipping unit dimensions), and populates expandedUnitDims with the
948// indices of the unit dimensions in dst that were added (not present in src).
949// Example: src=[2,3], dst=[1,2,3,1] -> true, expandedUnitDims=[0,3]
951 SmallVector<int64_t> &expandedUnitDims) {
952 // All unit dimensions in dst that don't appear in src are the expanded
953 // unit dimensions
954 size_t srcIdx = 0;
955 for (size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx)
956 if (srcIdx < src.size() && src[srcIdx] == dst[dstIdx])
957 srcIdx++;
958 else if (dst[dstIdx] == 1)
959 expandedUnitDims.push_back(dstIdx);
960 else
961 return false;
962 return srcIdx == src.size();
963}
964
965// Checks if dst shape is an expansion of src shape where each dimension in src
966// is split into one or more consecutive dimensions in dst whose product equals
967// the original dimension. Populates splitDimGroups with groups of dst indices
968// that correspond to each src dimension. Example: src=[6,4], dst=[2,3,2,2] ->
969// true
972 SmallVector<SmallVector<int64_t>> &splitDimGroups) {
973 // each dim in src can be mapped to one or more dims in dst whose product
974 // equals to the src dim
975 size_t srcIdx = 0;
976 int64_t accumulatedSize = 1;
977 SmallVector<int64_t> currentDstDims;
978
979 splitDimGroups.clear();
980 for (size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx) {
981 if (srcIdx >= src.size())
982 return false;
983 accumulatedSize *= dst[dstIdx];
984 currentDstDims.push_back(dstIdx);
985
986 if (accumulatedSize == src[srcIdx]) {
987 // Also collect trailing unit dims in destination, if any.
988 // Leading unit dims were implicitly collected.
989 if (srcIdx == src.size() - 1) {
990 while (++dstIdx < dst.size() && dst[dstIdx] == 1)
991 currentDstDims.push_back(dstIdx);
992 }
993 // Record the mapping: srcIdx -> currentDstDims
994 splitDimGroups.push_back(currentDstDims);
995 // move to next src dim
996 srcIdx++;
997 accumulatedSize = 1;
998 currentDstDims.clear();
999 } else if (accumulatedSize > src[srcIdx]) {
1000 return false;
1001 }
1002 }
1003 return srcIdx == src.size();
1004}
return success()
lhs
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
auto load
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
Attributes are known-constant values of operations.
Definition Attributes.h:25
This class represents an argument of a Block.
Definition Value.h:306
IntegerAttr getIntegerAttr(Type type, int64_t value)
Definition Builders.cpp:233
FloatAttr getFloatAttr(Type type, double value)
Definition Builders.cpp:259
TypedAttr getZeroAttr(Type type)
Definition Builders.cpp:329
TypedAttr getOneAttr(Type type)
Definition Builders.cpp:347
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This class helps build Operations.
Definition Builders.h:209
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:528
This class represents an operand of an operation.
Definition Value.h:254
This is a value defined by a result of an operation.
Definition Value.h:454
Operation is the basic unit of execution within MLIR.
Definition Operation.h:87
AttrClass getAttrOfType(StringAttr name)
Definition Operation.h:575
bool hasAttrOfType(NameT &&name)
Definition Operation.h:600
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition Operation.h:585
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:251
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition Operation.h:255
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition Operation.h:607
operand_type_range getOperandTypes()
Definition Operation.h:422
result_type_range getResultTypes()
Definition Operation.h:453
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:822
result_range getOpResults()
Definition Operation.h:445
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:233
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:40
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:389
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Definition Value.h:116
Type getType() const
Return the type of this value.
Definition Value.h:105
static WalkResult skip()
Definition WalkResult.h:48
static WalkResult advance()
Definition WalkResult.h:47
Operation * getOwner() const
Return the owner of this operand.
Definition UseDefLists.h:38
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:717
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value makeArithReduction(OpBuilder &b, Location loc, CombiningKind kind, Value v1, Value acc, arith::FastMathFlagsAttr fastmath=nullptr, Value mask=nullptr)
Returns the result value of reducing two scalar/vector values with the corresponding arith operation.
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
bool requirePacked(const DistributeLayoutAttr layout)
Helper function to check if the layout is packed.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
Value createReductionNeutralValue(OpBuilder &builder, Location loc, Type type, vector::CombiningKind kind)
Creates a constant filled with the neutral (identity) value for the given reduction kind.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
Value subgroupReduction(Location loc, OpBuilder &builder, Value input, vector::CombiningKind kind, uint32_t size)
Given an input value representing per-lane data, this function returns the result after performing a ...
bool matchUnitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< int64_t > &expandedUnitDims)
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
Value lowerToVectorReductions(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, Location loc, PatternRewriter &rewriter)
Given a src and an acc argumments from a vector::MultiDimReductionOp, lower to a set of vector::Reduc...
bool requireTranspose(const DistributeLayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
bool matchSplitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &splitDimGroups)
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
Value lowerCrossLaneReductionToShuffles(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, int64_t reductionSize, Location loc, PatternRewriter &rewriter)
Lowers cross-lane reductions to shuffle operations on a 2D vector.
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:307
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition Value.h:494
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:114
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
virtual int getSubgroupSize() const =0
StringRef getName() const
Definition uArchBase.h:163