MLIR 22.0.0git
XeGPUDialect.cpp
Go to the documentation of this file.
1//===- XeGPUDialect.cpp - MLIR XeGPU dialect implementation -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
14#include "mlir/IR/Builders.h"
16#include "llvm/ADT/TypeSwitch.h"
17#include "llvm/Support/Debug.h"
18
19using std::optional;
20
21namespace mlir {
22namespace xegpu {
23
24void XeGPUDialect::initialize() {
25 addTypes<
26#define GET_TYPEDEF_LIST
27#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.cpp.inc>
28 >();
29 addOperations<
30#define GET_OP_LIST
31#include <mlir/Dialect/XeGPU/IR/XeGPU.cpp.inc>
32 >();
33 addAttributes<
34#define GET_ATTRDEF_LIST
35#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.cpp.inc>
36 >();
37}
38
39// A `srcShape` consists of N distribution units, each being `subShapesLayout` x
40// `subShape`. A `delinearizedId` is used to identify a particular `subShape`
41// within each distribution unit.
42// Example:
43// WG data is 128x256. SG data is 16x32, in 4x2 layout, this gives a
44// distribution unit of shape 64x64, we have 2x4 such distribution units.
45// `delinearizedId` is used to identify a 16x32 of a subgroup in each
46// distribution unit.
47static SmallVector<SmallVector<Value>>
49 SmallVector<Value> delinearizedId,
50 ArrayRef<int64_t> subShapesLayout, ArrayRef<int64_t> subShape,
51 ArrayRef<int64_t> srcShape) {
53
54 // A distribution unit must be less than or equal to `srcShape`
55 SmallVector<int64_t> distUnitShape = llvm::map_to_vector(
56 llvm::zip_equal(srcShape,
57 computeElementwiseMul(subShapesLayout, subShape)),
58 [](const auto &t) { return std::min(std::get<0>(t), std::get<1>(t)); });
59
60 // Get the offset of `subShape` within a distribution unit.
61 SmallVector<Value> distUnitLocalOffset = llvm::map_to_vector(
62 llvm::zip(delinearizedId, subShape), [&](const auto &t) -> Value {
63 return builder.createOrFold<arith::MulIOp>(
64 loc, std::get<0>(t),
65 builder.createOrFold<arith::ConstantIndexOp>(loc, std::get<1>(t)));
66 });
67
68 // For each dist unit
69 for (SmallVector<int64_t> unitOffs :
70 StaticTileOffsetRange(srcShape, distUnitShape)) {
71 // Get dist unit offset within `srcShape`.
73 llvm::map_to_vector(unitOffs, [&](int64_t d) -> Value {
74 return arith::ConstantIndexOp::create(builder, loc, d);
75 });
76 // Calculate `subShape` offset within `srcShape`.
78 llvm::map_to_vector(llvm::zip_equal(base, distUnitLocalOffset),
79 [&](const auto &t) -> Value {
80 return builder.createOrFold<arith::AddIOp>(
81 loc, std::get<0>(t), std::get<1>(t));
82 });
83 // Do not go beyond `srcShape` bounds.
84 SmallVector<Value> mods = llvm::map_to_vector(
85 llvm::zip_equal(adds, srcShape), [&](const auto &t) -> Value {
86 return builder.createOrFold<arith::RemUIOp>(
87 loc, std::get<0>(t),
88 arith::ConstantIndexOp::create(builder, loc, std::get<1>(t)));
89 });
90
91 coordinates.push_back(mods);
92 }
93 return coordinates;
94}
95
96// Checks if the given shape can be evenly distributed based on the layout
97// and data factors provided by the LayoutAttr.
98bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
99 xegpu::DistributeLayoutAttr attr) {
100 assert(attr && "Layout attribute is missing.");
101
102 // Checks whether the given shape can be evenly distributed using the
103 // specified layout and data attributes. If successful, it returns the work
104 // size for each compute unit; otherwise, it returns `std::nullopt`. The work
105 // size per compute unit is calculated as follows:
106 // - If `data` is null: newShape[i] = shape[i] / layout[i]
107 // - If `data` is not null: newShape[i] = data[i]
108 // When round-robin distribution (`rr`) is enabled, `shape[i]` can be
109 // smaller than `layout[i] * data[i]`, allowing multiple compute units to
110 // share the data.
111 auto tryDistribute = [&](llvm::ArrayRef<int64_t> shape,
114 bool rr = true) -> optional<SmallVector<int64_t>> {
116 if (layout.size()) {
117 if (layout.size() != shape.size())
118 return std::nullopt;
119 auto ratio = computeShapeRatio(shape, layout);
120 if (ratio.has_value()) {
121 newShape = ratio.value();
122 } else if (!rr || !computeShapeRatio(layout, shape).has_value()) {
123 return std::nullopt;
124 }
125 // Round-robin case: continue with original newShape
126 }
127
128 if (data.size()) {
129 if (data.size() != shape.size())
130 return std::nullopt;
131 auto ratio = computeShapeRatio(newShape, data);
132 if (!ratio.has_value() && rr)
133 ratio = computeShapeRatio(data, newShape);
134 if (!ratio.has_value())
135 return std::nullopt;
136
137 // if data is not null, we always return it for next phase.
138 newShape = data;
139 }
140 return newShape;
141 };
142
143 // check the sgLayout and sgData
144 auto maybeSgShape = tryDistribute(shape, attr.getEffectiveSgLayoutAsInt(),
145 attr.getEffectiveSgDataAsInt());
146 if (!maybeSgShape)
147 return false;
148 auto sgShape = maybeSgShape.value();
149
150 // check InstData, it neither have layout nor need round-robin
151 auto maybeInstShape =
152 tryDistribute(sgShape, {}, attr.getEffectiveInstDataAsInt(), false);
153 if (!maybeInstShape)
154 return false;
155 auto instShape = maybeInstShape.value();
156
157 // check LaneLayout and LaneData
158 auto maybeLaneShape =
159 tryDistribute(instShape, attr.getEffectiveLaneLayoutAsInt(),
160 attr.getEffectiveLaneDataAsInt(), false);
161 return maybeLaneShape.has_value();
162}
163
164//===----------------------------------------------------------------------===//
165// XeGPU_BlockTensorDescAttr
166//===----------------------------------------------------------------------===//
167BlockTensorDescAttr BlockTensorDescAttr::get(mlir::MLIRContext *context,
168 xegpu::MemorySpace memory_space,
169 int array_length,
170 bool boundary_check) {
171 auto scopeAttr = MemorySpaceAttr::get(context, memory_space);
172 auto lengthAttr =
173 IntegerAttr::get(IntegerType::get(context, 64), array_length);
174 auto boundaryAttr = BoolAttr::get(context, boundary_check);
175 return Base::get(context, scopeAttr, lengthAttr, boundaryAttr);
176}
177
178bool BlockTensorDescAttr::hasDefaultsOnly() {
179 return getMemorySpace().getValue() == xegpu::MemorySpace::Global &&
180 getArrayLength().getInt() == 1 && getBoundaryCheck().getValue();
181}
182
183//===----------------------------------------------------------------------===//
184// XeGPU_ScatterTensorDescAttr
185//===----------------------------------------------------------------------===//
186ScatterTensorDescAttr
187ScatterTensorDescAttr::get(mlir::MLIRContext *context,
188 xegpu::MemorySpace memory_space, int chunk_size) {
189 auto scopeAttr = MemorySpaceAttr::get(context, memory_space);
190 auto chunkSizeAttr =
191 IntegerAttr::get(IntegerType::get(context, 64), chunk_size);
192 return Base::get(context, scopeAttr, chunkSizeAttr);
193}
194
195LogicalResult ScatterTensorDescAttr::verify(
196 llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
197 MemorySpaceAttr memory_space, IntegerAttr chunk_size) {
198 int64_t chunkSize = chunk_size.getInt();
199 if (chunkSize <= 0)
200 return emitError() << "invalid chunk size";
201
202 return success();
203}
204
205//===----------------------------------------------------------------------===//
206// XeGPU_LayoutAttr
207//===----------------------------------------------------------------------===//
208LogicalResult
209LayoutAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
210 DenseI32ArrayAttr sg_layout, DenseI32ArrayAttr sg_data,
211 DenseI32ArrayAttr inst_data, DenseI32ArrayAttr lane_layout,
212 DenseI32ArrayAttr lane_data, DenseI32ArrayAttr order) {
213
214 // A valid layout must include at least one of sg_layout and lane_layout.
215 // sg_layout is essential for Workgroup layout, while lane_layout is
216 // required for Subgroup layout.
217 if (!sg_layout && !inst_data && !lane_layout) {
218 return emitError()
219 << "expected at least one of sg_layout, inst_data or lane_layout";
220 }
221
222 // generate code to check sg_laout, inst_data and lane_layout having the same
223 // rank if they are not null.
224
225 if (sg_layout && inst_data && sg_layout.size() != inst_data.size()) {
226 return emitError()
227 << "expected sg_layout and inst_data to have the same rank";
228 }
229
230 if (sg_layout && lane_layout && sg_layout.size() != lane_layout.size()) {
231 return emitError()
232 << "expected sg_layout and lane_layout to have the same rank";
233 }
234
235 if (inst_data && lane_layout && inst_data.size() != lane_layout.size()) {
236 return emitError() << "expected inst_data and lane_layout to have the same "
237 "rank, got inst_data "
238 << inst_data.size() << ", lane_layout "
239 << lane_layout.size();
240 }
241
242 // sg_data is optional for Workgroup layout, but its presence requires
243 // sg_layout.
244 if (sg_data) {
245 if (!sg_layout)
246 return emitError() << "expected sg_layout being used with sg_data";
247 if (sg_data.size() != sg_layout.size())
248 return emitError()
249 << "expected sg_data and sg_layout to have the same rank";
250 }
251
252 // lane_data is optional for Subgroup layout, but its presence requires
253 // lane_layout.
254 if (lane_data) {
255 if (!lane_layout)
256 return emitError() << "expected lane_layout being used with lane_data";
257 if (lane_data.size() != lane_layout.size())
258 return emitError()
259 << "expected lane_data and lane_layout to have the same rank";
260 }
261
262 if (order) {
263 if (!sg_layout && !lane_layout)
264 return emitError()
265 << "expected sg_layout/lane_layout being used with order";
266
267 if (sg_layout && order.size() != sg_layout.size())
268 return emitError()
269 << "expected order and sg_layout to have the same rank";
270
271 if (lane_layout && order.size() != lane_layout.size())
272 return emitError()
273 << "expected order and lane_layout to have the same rank";
274 }
275
276 return success();
277}
278
279FailureOr<SmallVector<Value>>
280LayoutAttr::delinearizeId(OpBuilder &builder, Location loc, Value linearId) {
281
282 SmallVector<int64_t> sgLayoutInt;
283 if (isForWorkgroup()) {
284 sgLayoutInt = getEffectiveSgLayoutAsInt();
285 } else if (isForSubgroup()) {
286 sgLayoutInt = getEffectiveLaneLayoutAsInt();
287 } else {
288 return failure();
289 }
290
291 DenseI32ArrayAttr orderAttr = getOrder();
292
293 // Handle order attribute
294 SmallVector<int64_t> order;
295 if (orderAttr && !orderAttr.empty()) {
296 order = llvm::to_vector(
297 llvm::map_range(orderAttr.asArrayRef(),
298 [](int32_t idx) { return static_cast<int64_t>(idx); }));
299 } else {
300 // Default order: [1, 0] for 2D (row-major), [2, 1, 0] for 3D, etc.
301 order = llvm::to_vector(
302 llvm::reverse(llvm::seq<int64_t>(0, sgLayoutInt.size())));
303 }
304
305 if (order.size() != sgLayoutInt.size()) {
306 return failure();
307 }
308
309 SmallVector<Value> result(sgLayoutInt.size());
310 Value remaining = linearId;
311
312 /// Process dimensions in the order they appear in the order array
313 /// The first dimension in order is the fastest-changing
314 ///
315 /// Example walkthrough for linearId=22, sgLayout=[2,4,4], order=[2,1,0]:
316 ///
317 /// Initial: remaining=22, dimIdx = order[i], dimSize = sgLayout[dimIdx],
318 /// result=[?,?,?]
319 ///
320 /// i=0 (process columns, dimIdx=2, dimSize=4):
321 /// result[2] = 22 % 4 = 2 (column coordinate)
322 /// remaining = 22 / 4 = 5 (5 complete groups of 4 columns processed)
323 ///
324 /// i=1 (process rows, dimIdx=1, dimSize=4):
325 /// result[1] = 5 % 4 = 1 (row coordinate)
326 /// remaining = 5 / 4 = 1 (1 complete group of 4 rows processed)
327 ///
328 /// i=2 (process layers, dimIdx=0, dimSize=2):
329 /// result[0] = 1 % 2 = 1 (layer coordinate)
330 /// (no remaining update - last iteration)
331 ///
332 /// Final result: [1,1,2] = Layer 1, Row 1, Column 2
333 for (size_t i = 0; i < order.size(); ++i) {
334 int64_t dimIdx = order[i];
335 int64_t dimSize = sgLayoutInt[dimIdx];
336
337 Value dimSizeVal =
338 builder.createOrFold<arith::ConstantIndexOp>(loc, dimSize);
339
340 /// Extract the coordinate for this dimension using modulo operation
341 /// This gives us "how far within this dimension" we are
342 /// e.g., linearId=22, dimSize=4: 22 % 4 = 2 (we're at position 2 within
343 /// this dimension)
344 result[dimIdx] =
345 builder.createOrFold<arith::RemUIOp>(loc, remaining, dimSizeVal);
346
347 /// Update remaining for the next dimension by removing what we've already
348 /// processed. Division tells us "how many complete groups of this dimension
349 /// we've gone through" e.g., linearId=22, dimSize=4: 22 / 4 = 5 (we've
350 /// completed 5 groups of 4) Skip this for the last iteration since there's
351 /// no next dimension to process
352 if (i < order.size() - 1) {
353 remaining =
354 builder.createOrFold<arith::DivUIOp>(loc, remaining, dimSizeVal);
355 }
356 }
357 return result;
358}
359
360/// Implements DistributeLayoutAttr::computeDistributedCoords to generate
361/// instructions for computing multi-dimensional offsets when distributed by
362/// LayoutAttr.
363FailureOr<SmallVector<SmallVector<Value>>>
364LayoutAttr::computeDistributedCoords(OpBuilder &builder, Location loc,
365 Value linearId, ArrayRef<int64_t> shape) {
366 SmallVector<int64_t> layout;
367 SmallVector<int64_t> subShape;
368 if (isForWorkgroup()) {
369 layout = getEffectiveSgLayoutAsInt();
370 subShape = getEffectiveSgDataAsInt();
371 } else if (isForSubgroup()) {
372 layout = getEffectiveLaneLayoutAsInt();
373 subShape = getEffectiveLaneDataAsInt();
374 } else {
375 return failure();
376 }
377 if (subShape.empty()) {
378 if (auto derivedShape = computeShapeRatio(shape, layout))
379 subShape = derivedShape.value();
380 else
381 return failure();
382 }
383
384 // delinearize Ids
385 auto maybeIds = delinearizeId(builder, loc, linearId);
386 if (failed(maybeIds))
387 return failure();
388 SmallVector<Value> ids = *maybeIds;
389
390 return genCoordinates(builder, loc, ids, layout, subShape, shape);
391}
392
393bool LayoutAttr::isEqualTo(const xegpu::DistributeLayoutAttr &other) {
394 if (dyn_cast<xegpu::SliceAttr>(other))
395 return false;
396
397 return *this == dyn_cast<xegpu::LayoutAttr>(other);
398}
399
400// set the layout for unit dims: sg_data, inst_data and lane_data to 1
401DistributeLayoutAttr LayoutAttr::setUnitDimData(SetVector<int64_t> unitDims) {
402 auto sgDataOpt = getSgData();
403 auto instDataOpt = getInstData();
404 auto laneDataOpt = getLaneData();
405
406 SmallVector<int32_t> sgData;
407 SmallVector<int32_t> instData;
408 SmallVector<int32_t> laneData;
409
410 if (sgDataOpt) {
411 sgData = llvm::to_vector(sgDataOpt.asArrayRef());
412 }
413 if (instDataOpt) {
414 instData = llvm::to_vector(instDataOpt.asArrayRef());
415 }
416 if (laneDataOpt) {
417 laneData = llvm::to_vector(laneDataOpt.asArrayRef());
418 }
419
420 for (auto dim : unitDims) {
421 if (dim < static_cast<int64_t>(sgData.size()))
422 sgData[dim] = 1;
423 if (dim < static_cast<int64_t>(instData.size()))
424 instData[dim] = 1;
425 if (dim < static_cast<int64_t>(laneData.size()))
426 laneData[dim] = 1;
427 }
428
429 return LayoutAttr::get(
430 getContext(), getSgLayout(),
431 sgData.empty() ? DenseI32ArrayAttr()
433 instData.empty() ? DenseI32ArrayAttr()
434 : DenseI32ArrayAttr::get(getContext(), instData),
435 getLaneLayout(),
436 laneData.empty() ? DenseI32ArrayAttr()
437 : DenseI32ArrayAttr::get(getContext(), laneData),
438 getOrder());
439}
440
441// set the layout for the sepcified unit dims: sg_lane and lane_layout to 1
442DistributeLayoutAttr LayoutAttr::setUnitDimLayout(SetVector<int64_t> unitDims) {
443 auto sgLayoutOpt = getSgLayout();
444 auto laneLayoutOpt = getLaneLayout();
445
446 SmallVector<int32_t> sgLayout;
447 SmallVector<int32_t> laneLayout;
448
449 if (sgLayoutOpt) {
450 sgLayout = llvm::to_vector(sgLayoutOpt.asArrayRef());
451 }
452 if (laneLayoutOpt) {
453 laneLayout = llvm::to_vector(laneLayoutOpt.asArrayRef());
454 }
455
456 for (auto dim : unitDims) {
457 if (dim < static_cast<int64_t>(sgLayout.size()))
458 sgLayout[dim] = 1;
459 if (dim < static_cast<int64_t>(laneLayout.size()))
460 laneLayout[dim] = 1;
461 }
462
463 return LayoutAttr::get(
464 getContext(),
465 sgLayout.empty() ? DenseI32ArrayAttr()
466 : DenseI32ArrayAttr::get(getContext(), sgLayout),
467 getSgData(), getInstData(),
468 laneLayout.empty() ? DenseI32ArrayAttr()
469 : DenseI32ArrayAttr::get(getContext(), laneLayout),
470 getLaneData(), getOrder());
471}
472
473//===----------------------------------------------------------------------===//
474// XeGPU_SliceAttr
475//===----------------------------------------------------------------------===//
476LogicalResult
477SliceAttr::verify(llvm::function_ref<InFlightDiagnostic()> emitError,
478 xegpu::DistributeLayoutAttr parent, DenseI64ArrayAttr dims) {
479 if (!parent || !dims)
480 return emitError() << "expected parent layout and dims attribute";
481
482 int64_t rank = parent.getRank();
483
484 // check every element in dims is unique and smaller than rank
485 llvm::SmallDenseSet<int64_t> seen;
486 for (int64_t dim : dims.asArrayRef()) {
487 if (dim < 0 || dim >= rank)
488 return emitError() << "invalid dim (" << dim << ") in slice attribute.";
489 if (!seen.insert(dim).second)
490 return emitError() << "repeated dim (" << dim << ") in slice attribute.";
491 }
492 return success();
493}
494
495SliceAttr SliceAttr::flatten() const {
496 xegpu::DistributeLayoutAttr parent = getParent();
497 SmallVector<DenseI64ArrayAttr> slicedDims({getDims()});
498
499 while (auto sliceAttr = dyn_cast<xegpu::SliceAttr>(parent)) {
500 parent = sliceAttr.getParent();
501 slicedDims.push_back(sliceAttr.getDims());
502 }
503
504 auto layoutAttr = dyn_cast<xegpu::LayoutAttr>(parent);
505 SmallVector<int64_t> indices =
506 llvm::to_vector(llvm::seq<int64_t>(0, layoutAttr.getRank()));
507
508 // get remaining dims (flattend) by applying slice ops with all slicedDims
509 SmallVector<int64_t> remainingDims(indices);
510 for (auto dim : llvm::reverse(slicedDims))
511 remainingDims = XeGPUDialect::slice(llvm::ArrayRef<int64_t>(remainingDims),
512 dim.asArrayRef());
513
514 // get flattend sliced dims by applying slice ops with the remaining dims
515 SmallVector<int64_t> flattendDims = XeGPUDialect::slice(
516 llvm::ArrayRef<int64_t>(indices), llvm::ArrayRef<int64_t>(remainingDims));
517
518 return xegpu::SliceAttr::get(
519 getContext(), layoutAttr,
520 DenseI64ArrayAttr::get(getContext(), flattendDims));
521}
522
523FailureOr<SmallVector<Value>>
524SliceAttr::delinearizeId(OpBuilder &builder, Location loc, Value linearId) {
525 SliceAttr attr = flatten();
526 auto parent = dyn_cast<LayoutAttr>(attr.getParent());
527 return parent.delinearizeId(builder, loc, linearId);
528}
529
530// Implements DistributeLayoutAttr::computeDistributedCoords to generate
531// instructions for computing multi-dimensional offsets when distributed by
532// LayoutAttr.
533FailureOr<SmallVector<SmallVector<Value>>>
534SliceAttr::computeDistributedCoords(OpBuilder &builder, Location loc,
535 Value linearId, ArrayRef<int64_t> shape) {
536 assert(getRank() == static_cast<int64_t>(shape.size()) && "invalid shape.");
537 if (!isForWorkgroup())
538 return failure();
539
540 SmallVector<int64_t> layout;
541 SmallVector<int64_t> subShape;
542 if (isForWorkgroup()) {
543 layout = getEffectiveSgLayoutAsInt();
544 subShape = getEffectiveSgDataAsInt();
545 } else if (isForSubgroup()) {
546 layout = getEffectiveLaneLayoutAsInt();
547 subShape = getEffectiveLaneDataAsInt();
548 } else {
549 return failure();
550 }
551
552 if (subShape.empty()) {
553 if (auto derivedShape = computeShapeRatio(shape, layout))
554 subShape = derivedShape.value();
555 else
556 return failure();
557 }
558
559 // delinearize Ids
560 auto maybeIds = delinearizeId(builder, loc, linearId);
561 if (failed(maybeIds))
562 return failure();
563
564 // The effective sgIds for offsets computing correspond
565 // to the dims that are not sliced.
566 ArrayRef<int64_t> dims = flatten().getDims().asArrayRef();
567 SmallVector<Value> sgIds =
568 XeGPUDialect::slice(ArrayRef<Value>(*maybeIds), dims);
569
570 return genCoordinates(builder, loc, sgIds, layout, subShape, shape);
571}
572
573bool SliceAttr::isSliceOf(const xegpu::DistributeLayoutAttr &other) {
574 auto flattenedThis = flatten();
575 // If other is a LayoutAttr, just compare directly with parent of
576 // flattenedThis.
577 if (auto otherLayout = dyn_cast<xegpu::LayoutAttr>(other))
578 return flattenedThis.getParent() == otherLayout;
579 // If other is a SliceAttr, flatten it first before comparing.
580 auto flattenedOther = dyn_cast<xegpu::SliceAttr>(other).flatten();
581 // Both must have common parent LayoutAttr.
582 if (flattenedThis.getParent() != flattenedOther.getParent())
583 return false;
584 // otherFlattened's sliced dims must be a subset of flattenedThis's sliced
585 // dims.
586 llvm::SmallDenseSet<int64_t> thisDims(
587 flattenedThis.getDims().asArrayRef().begin(),
588 flattenedThis.getDims().asArrayRef().end());
589 return llvm::all_of(flattenedOther.getDims().asArrayRef(),
590 [&](int64_t dim) { return thisDims.contains(dim); });
591}
592
593bool SliceAttr::isEqualTo(const xegpu::DistributeLayoutAttr &other) {
594 if (dyn_cast<xegpu::LayoutAttr>(other))
595 return false;
596
597 auto flattenedThis = flatten();
598 auto flattenedOther = dyn_cast<xegpu::SliceAttr>(other).flatten();
599
600 return ((flattenedThis.getParent() == flattenedOther.getParent()) &&
601 (flattenedThis.getDims() == flattenedOther.getDims()));
602}
603
604// Helper function to adjust unit dimensions from sliced space to parent space
607 ArrayRef<int64_t> sliceDims) {
608 // Reconstruct parent's non-sliced dimensions
609
610 int64_t parentRank = sliceDims.size() + unitDims.size();
611 llvm::SmallDenseSet<int64_t> slicedDimsSet(sliceDims.begin(),
612 sliceDims.end());
613 SmallVector<int64_t> nonSlicedDims;
614 for (int64_t i = 0; i < parentRank; ++i) {
615 if (!slicedDimsSet.contains(i))
616 nonSlicedDims.push_back(i);
617 }
618
619 // Map unit dims from sliced space to parent space
620 SetVector<int64_t> adjustUnitDims;
621 for (auto dim : unitDims) {
622 if (dim < static_cast<int64_t>(nonSlicedDims.size())) {
623 adjustUnitDims.insert(nonSlicedDims[dim]);
624 }
625 }
626
627 return adjustUnitDims;
628}
629
630// set the layout for unit dims: sg_data, inst_data and lane_data to 1
631DistributeLayoutAttr SliceAttr::setUnitDimData(SetVector<int64_t> unitDims) {
632 SliceAttr attr = flatten();
633 ArrayRef<int64_t> sliceDims = attr.getDims().asArrayRef();
634 auto parent = dyn_cast<LayoutAttr>(attr.getParent());
635
636 SetVector<int64_t> adjustUnitDims =
637 adjustUnitDimsWithSliceDims(unitDims, sliceDims);
638
639 return SliceAttr::get(getContext(), parent.setUnitDimData(adjustUnitDims),
640 attr.getDims());
641}
642
643// set the layout for the sepcified unit dims: sg_lane and lane_layout to 1
644DistributeLayoutAttr SliceAttr::setUnitDimLayout(SetVector<int64_t> unitDims) {
645 SliceAttr attr = flatten();
646 ArrayRef<int64_t> sliceDims = attr.getDims().asArrayRef();
647 auto parent = dyn_cast<LayoutAttr>(attr.getParent());
648
649 SetVector<int64_t> adjustUnitDims =
650 adjustUnitDimsWithSliceDims(unitDims, sliceDims);
651
652 return SliceAttr::get(getContext(), parent.setUnitDimLayout(adjustUnitDims),
653 attr.getDims());
654}
655
656//===----------------------------------------------------------------------===//
657// XeGPU_RangeAttr
658//===----------------------------------------------------------------------===//
659
660LogicalResult
661RangeAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
662 IntegerAttr startOfRange, IntegerAttr endOfRange) {
663 if (startOfRange.getInt() >= endOfRange.getInt())
664 return emitError() << "'end' : " << endOfRange.getInt()
665 << " must be greater than 'start' : "
666 << startOfRange.getInt();
667
668 return success();
669}
670
671//===----------------------------------------------------------------------===//
672// XeGPU_TensorDescType
673//===----------------------------------------------------------------------===//
674
675mlir::Type TensorDescType::parse(AsmParser &parser) {
676 llvm::SmallVector<int64_t> shape;
677 mlir::Type elementType;
678 mlir::FailureOr<mlir::Attribute> encoding;
679 mlir::FailureOr<mlir::Attribute> layout;
680
681 // Parse literal '<'
682 if (parser.parseLess())
683 return {};
684
685 auto shapeLoc = parser.getCurrentLocation();
686 if (mlir::failed(parser.parseDimensionList(shape))) {
687 parser.emitError(shapeLoc, "failed to parse parameter 'shape'");
688 return {};
689 }
690
691 auto elemTypeLoc = parser.getCurrentLocation();
692 if (mlir::failed(parser.parseType(elementType))) {
693 parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'");
694 return {};
695 }
696
697 // parse optional attributes
698 while (mlir::succeeded(parser.parseOptionalComma())) {
699 mlir::Attribute attr;
700 ParseResult res = parser.parseAttribute(attr);
701 if (mlir::succeeded(res)) {
702 if (mlir::isa<LayoutAttr>(attr)) {
703 layout = attr;
704 continue;
705 }
706 if (mlir::isa<BlockTensorDescAttr, ScatterTensorDescAttr>(attr)) {
707 encoding = attr;
708 continue;
709 }
710 }
711 return {};
712 }
713
714 // Parse literal '>'
715 if (parser.parseGreater())
716 return {};
717
718 MLIRContext *ctxt = parser.getContext();
719 return TensorDescType::getChecked(
720 [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape,
721 elementType, encoding.value_or(BlockTensorDescAttr::get(ctxt)),
722 layout.value_or(mlir::Attribute()));
723}
724
725void TensorDescType::print(AsmPrinter &printer) const {
726 printer << "<";
727
728 auto shape = getShape();
729 for (int64_t dim : shape) {
730 if (mlir::ShapedType::isDynamic(dim))
731 printer << '?';
732 else
733 printer << dim;
734 printer << 'x';
735 }
736
737 printer << getElementType();
738
739 auto encoding = getEncoding();
740 auto blockAttr = llvm::dyn_cast_if_present<BlockTensorDescAttr>(encoding);
741 if (encoding && (!blockAttr || !blockAttr.hasDefaultsOnly()))
742 printer << ", " << encoding;
743
744 if (auto layout = getLayout())
745 printer << ", " << layout;
746
747 printer << ">";
748}
749
750TensorDescType TensorDescType::get(llvm::ArrayRef<int64_t> shape,
751 mlir::Type elementType, int array_length,
752 bool boundary_check,
753 MemorySpace memory_space,
754 mlir::Attribute layout) {
755 auto context = elementType.getContext();
756 auto attr = BlockTensorDescAttr::get(context, memory_space, array_length,
757 boundary_check);
758 return Base::get(context, shape, elementType, attr, layout);
759}
760
761TensorDescType TensorDescType::get(llvm::ArrayRef<int64_t> shape,
762 mlir::Type elementType, int chunk_size,
763 MemorySpace memory_space,
764 mlir::Attribute layout) {
765 auto context = elementType.getContext();
766 auto attr = ScatterTensorDescAttr::get(context, memory_space, chunk_size);
767 return Base::get(context, shape, elementType, attr, layout);
768}
769
770LogicalResult
771TensorDescType::verify(llvm::function_ref<InFlightDiagnostic()> emitError,
772 llvm::ArrayRef<int64_t> shape, mlir::Type elementType,
773 mlir::Attribute encoding, mlir::Attribute layout) {
774 size_t rank = shape.size();
775
776 if (rank == 0)
777 return emitError() << "expected non-zero rank tensor";
778
779 auto blockAttr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding);
780 if (blockAttr) {
781 MemorySpaceAttr memorySpaceAttr = blockAttr.getMemorySpace();
782 if (rank > 1 && memorySpaceAttr &&
783 memorySpaceAttr.getValue() == MemorySpace::SLM)
784 return emitError() << "SLM is only supported for 1D block tensor";
785 }
786
787 // for gather and scatter ops, Low-precision types are packed in 32-bit units.
788 unsigned bitWidth = elementType.getIntOrFloatBitWidth();
789 int chunkAlignmentFactor =
792 : 1;
793 auto scatterAttr = mlir::dyn_cast_if_present<ScatterTensorDescAttr>(encoding);
794 if (scatterAttr) {
795 int64_t chunkSize = scatterAttr.getChunkSizeAsInt();
796 if (rank == 1 && chunkSize != 1)
797 return emitError() << "expected non-contiguous elements for 1D tensor";
798
799 // If chunk size > 1, the second dimension of the tensor shape must be
800 // equal to chunk size and it must be a multiple of the
801 // chunkAlignmentFactor.
802 if (chunkSize > 1) {
803 if (shape.back() != chunkSize)
804 return emitError() << "expected last dim of tensor to match chunk size";
805 if (shape.back() % chunkAlignmentFactor != 0)
806 return emitError() << "expected last dim of tensor to be a multiple of "
807 << chunkAlignmentFactor;
808 }
809 }
810
811 auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout);
812 if (layoutAttr) {
813 if (rank != (size_t)layoutAttr.getRank())
814 return emitError() << "expected layout rank to match tensor rank";
815
816 auto laneData = layoutAttr.getLaneData();
817 if (scatterAttr && laneData) {
818 // Validate subgroup mapping rules for scattered tensors.
819 // if chunkSize > 1, the last dimension of the tensor should
820 // be distributed in the units divisible by chunkAlignmentFactor.
821 int64_t chunkSize = scatterAttr.getChunkSizeAsInt();
822 if (chunkSize > 1 && laneData[rank - 1] % chunkAlignmentFactor)
823 return emitError()
824 << "expected last dim of lane_data to be a multiple of: "
825 << chunkAlignmentFactor;
826 }
827
828 if (!XeGPUDialect::isEvenlyDistributable(shape, layoutAttr)) {
829 std::string shapeStr;
830 llvm::raw_string_ostream stream(shapeStr);
831 llvm::interleaveComma(shape, stream);
832 return emitError() << "cannot distribute [" << shapeStr << "] using "
833 << layoutAttr;
834 }
835 }
836 return success();
837}
838
839//===----------------------------------------------------------------------===//
840// XeGPU_MemDescType
841//===----------------------------------------------------------------------===//
842mlir::Type MemDescType::parse(AsmParser &parser) {
843 llvm::SmallVector<int64_t> shape;
844 mlir::Type elementType;
845 mlir::FailureOr<MemLayoutAttr> layout;
846
847 // Parse literal '<'
848 if (parser.parseLess())
849 return {};
850
851 auto shapeLoc = parser.getCurrentLocation();
852 if (mlir::failed(parser.parseDimensionList(shape, false, true))) {
853 parser.emitError(shapeLoc, "failed to parse parameter 'shape'");
854 return {};
855 }
856
857 auto elemTypeLoc = parser.getCurrentLocation();
858 if (mlir::failed(parser.parseType(elementType))) {
859 parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'");
860 return {};
861 }
862
863 // parse optional attributes
864 if (mlir::succeeded(parser.parseOptionalComma())) {
865 MemLayoutAttr attr;
866 ParseResult res = parser.parseAttribute(attr);
867 if (mlir::failed(res))
868 return {};
869 layout = attr;
870 }
871
872 // Parse literal '>'
873 if (parser.parseGreater())
874 return {};
875
876 MLIRContext *ctxt = parser.getContext();
877 return MemDescType::getChecked(
878 [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape,
879 elementType, layout.value_or(MemLayoutAttr()));
880}
881
882void MemDescType::print(AsmPrinter &printer) const {
883 printer << "<";
884
885 printer.printDimensionList(getShape());
886 printer << 'x';
887 printer << getElementType();
888
889 if (auto layout = getMemLayout())
890 printer << ", " << layout;
891
892 printer << ">";
893}
894
895//===----------------------------------------------------------------------===//
896// XeGPU_MemDescType
897//===----------------------------------------------------------------------===//
898
899Attribute MemLayoutAttr::parse(AsmParser &parser, Type type) {
900
901 auto context = parser.getContext();
902 llvm::SMLoc loc = parser.getCurrentLocation();
903
904 llvm::SmallDenseSet<StringRef> seenKeys;
905 SmallVector<NamedAttribute> attributes;
906
907 auto parseElt = [&]() -> ParseResult {
908 StringRef nameId;
909 if (failed(parser.parseKeyword(&nameId)))
910 return parser.emitError(loc, "expected valid attribute name");
911
912 if (!seenKeys.insert(nameId).second)
913 return parser.emitError(loc, "duplicate key '")
914 << nameId << " in mem layout attribute";
915
916 if (failed(parser.parseEqual()))
917 return failure();
918
919 Attribute attr;
920 if (failed(parser.parseAttribute(attr)))
921 return failure();
922 attributes.emplace_back(nameId, attr);
923 return success();
924 };
925
926 // Parse literal '<'
927 if (parser.parseLess())
928 return {};
929
930 if (failed(parser.parseCommaSeparatedList(parseElt)))
931 return {};
932
933 // Parse literal '>'
934 if (parser.parseGreater())
935 return {};
936
937 return parser.getChecked<MemLayoutAttr>(
938 loc, context, DictionaryAttr::get(context, attributes));
939}
940
941void MemLayoutAttr::print(AsmPrinter &printer) const {
942 printer << "<";
943 ArrayRef<NamedAttribute> attrs = getAttrs().getValue();
944 for (size_t i = 0; i < attrs.size(); i++) {
945 printer << attrs[i].getName().str() << " = " << attrs[i].getValue();
946 if (i < attrs.size() - 1)
947 printer << ", ";
948 }
949 printer << ">";
950}
951// a helper utility to perform binary operation on OpFoldResult.
952// If both a and b are attributes, it will simply return the result.
953// Otherwise, the corresponding arith op will be generated, and an
954// contant op will be created if one of them is an attribute.
955template <typename ArithOp>
957 OpBuilder &builder) {
958 auto aVal = getValueOrCreateConstantIndexOp(builder, loc, a);
959 auto bVal = getValueOrCreateConstantIndexOp(builder, loc, b);
960 return ArithOp::create(builder, loc, aVal, bVal).getResult();
961}
962
963// a helper utility to perform division operation on OpFoldResult and int64_t.
964#define div(a, b) \
965 genBinOp<arith::DivSIOp>(a, builder.getIndexAttr(b), loc, builder)
966
967// a helper utility to perform reminder operation on OpFoldResult and int64_t.
968#define rem(a, b) \
969 genBinOp<arith::RemSIOp>(a, builder.getIndexAttr(b), loc, builder)
970
971// a helper utility to perform multiply operation on OpFoldResult and int64_t.
972#define mul(a, b) \
973 genBinOp<arith::MulIOp>(a, builder.getIndexAttr(b), loc, builder)
974
975// a helper utility to perform addition operation on two OpFoldResult.
976#define add(a, b) genBinOp<arith::AddIOp>(a, b, loc, builder)
977
978// block the given offsets according to the block shape
979// say the original offset is [y, x], and the block shape is [By, Bx],
980// then the blocked offset is [y/By, x/Bx, y%By, x%Bx]
983 ArrayRef<int64_t> blockShape) {
984
985 assert(offsets.size() == blockShape.size() &&
986 "offsets and blockShape must have the same size");
987 SmallVector<OpFoldResult> blockedOffsets;
988 SmallVector<OpFoldResult> divs, rems;
989
990 for (auto [offset, block] : llvm::zip(offsets, blockShape)) {
991 divs.push_back(div(offset, block));
992 rems.push_back(rem(offset, block));
993 }
994 blockedOffsets.append(divs.begin(), divs.end());
995 blockedOffsets.append(rems.begin(), rems.end());
996
997 return blockedOffsets;
998}
999
1000// Get strides as vector of integer for MemDesc.
1001SmallVector<int64_t> MemDescType::getStrideShape() {
1002
1003 SmallVector<int64_t> matrixShape(getShape().begin(), getShape().end());
1004
1005 ArrayAttr strideAttr = getStrideAttr();
1006 SmallVector<int64_t> strides;
1007 for (Attribute attr : strideAttr.getValue()) {
1008 strides.push_back(cast<IntegerAttr>(attr).getInt());
1009 }
1010
1011 SmallVector<int64_t> innerBlkShape = getBlockShape();
1012
1013 // get perm from FCD to LCD
1014 // perm[i] = the dim with i-th smallest stride
1015 SmallVector<int, 4> perm =
1016 llvm::to_vector<4>(llvm::seq<int>(0, strides.size()));
1017 llvm::sort(perm, [&](int a, int b) { return strides[a] < strides[b]; });
1018
1019 assert(strides[perm[0]] == 1 && "inner most dim must have stride 1");
1020
1021 SmallVector<int64_t> innerBlkStride(innerBlkShape.size());
1022 innerBlkStride[perm[0]] = 1;
1023 for (size_t i = 1; i < perm.size(); ++i)
1024 innerBlkStride[perm[i]] =
1025 innerBlkStride[perm[i - 1]] * innerBlkShape[perm[i - 1]];
1026
1027 // compute the original matrix shape using the stride info
1028 // and compute the number of blocks in each dimension
1029 // The shape of highest dim can't be derived from stride info,
1030 // but doesn't impact the stride computation for blocked layout.
1031 SmallVector<int64_t> matrixShapeOrig(matrixShape.size());
1032 SmallVector<int64_t> BlkShapeOrig(matrixShape.size());
1033 for (size_t i = 0; i < perm.size() - 1; ++i) {
1034 matrixShapeOrig[perm[i]] = strides[perm[i + 1]] / strides[perm[i]];
1035 BlkShapeOrig[perm[i]] = matrixShapeOrig[perm[i]] / innerBlkShape[perm[i]];
1036 }
1037
1038 int64_t innerBlkSize = 1;
1039 for (auto s : innerBlkShape)
1040 innerBlkSize *= s;
1041
1042 SmallVector<int64_t> outerBlkStride(matrixShape.size());
1043 outerBlkStride[perm[0]] = innerBlkSize;
1044 for (size_t i = 0; i < perm.size() - 1; ++i) {
1045 outerBlkStride[perm[i + 1]] =
1046 outerBlkStride[perm[i]] * BlkShapeOrig[perm[i]];
1047 }
1048
1049 // combine the inner and outer strides
1050 SmallVector<int64_t> blockedStrides;
1051 blockedStrides.append(outerBlkStride.begin(), outerBlkStride.end());
1052 blockedStrides.append(innerBlkStride.begin(), innerBlkStride.end());
1053
1054 return blockedStrides;
1055}
1056
1057// Calculate the linear offset using the blocked offsets and stride
1058Value MemDescType::getLinearOffsets(OpBuilder &builder, Location loc,
1059 ArrayRef<OpFoldResult> offsets) {
1060
1061 SmallVector<int64_t> matrixShape(getShape().begin(), getShape().end());
1062 SmallVector<int64_t> blockShape = getBlockShape();
1063 SmallVector<int64_t> strides = getStrideShape();
1064 SmallVector<OpFoldResult> blockedOffsets;
1065
1066 // blockshape equal to matrixshape means no blocking
1067 if (llvm::equal(blockShape, matrixShape)) {
1068 // remove the outer dims from strides
1069 strides.erase(strides.begin(), strides.begin() + matrixShape.size());
1070 } else {
1071 assert(offsets.size() == blockShape.size() &&
1072 "offsets and blockShape must have the same size");
1073 // say the original offset is [y, x], and the block shape is [By, Bx],
1074 // then the blocked offset is [y/By, x/Bx, y%By, x%Bx]
1075
1076 SmallVector<OpFoldResult> divs, rems;
1077
1078 for (auto [offset, block] : llvm::zip(offsets, blockShape)) {
1079 divs.push_back(div(offset, block));
1080 rems.push_back(rem(offset, block));
1081 }
1082 blockedOffsets.append(divs.begin(), divs.end());
1083 blockedOffsets.append(rems.begin(), rems.end());
1084 offsets = blockedOffsets;
1085 }
1086
1087 // Start with initial value as matrix descriptor's base offset.
1088 Value linearOffset = arith::ConstantIndexOp::create(builder, loc, 0);
1089 for (size_t i = 0; i < offsets.size(); ++i) {
1090 OpFoldResult mulResult = mul(offsets[i], strides[i]);
1091 Value mulVal = getValueOrCreateConstantIndexOp(builder, loc, mulResult);
1092 linearOffset = arith::AddIOp::create(builder, loc, mulVal, linearOffset);
1093 }
1094
1095 return linearOffset;
1096}
1097
1098} // namespace xegpu
1099} // namespace mlir
1100
1101#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.cpp.inc>
1102#define GET_ATTRDEF_CLASSES
1103#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.cpp.inc>
1104#define GET_TYPEDEF_CLASSES
1105#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.cpp.inc>
return success()
static Type getElementType(Type type)
Determine the element type of type.
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
b getContext())
static ArrayRef< int64_t > getShape(Type type)
Returns the shape of the given type.
Definition Traits.cpp:117
#define mul(a, b)
#define div(a, b)
#define rem(a, b)
virtual ParseResult parseCommaSeparatedList(Delimiter delimiter, function_ref< ParseResult()> parseElementFn, StringRef contextMessage=StringRef())=0
Parse a list of comma-separated items with an optional delimiter.
MLIRContext * getContext() const
virtual InFlightDiagnostic emitError(SMLoc loc, const Twine &message={})=0
Emit a diagnostic at the specified location and return failure.
virtual ParseResult parseLess()=0
Parse a '<' token.
virtual ParseResult parseDimensionList(SmallVectorImpl< int64_t > &dimensions, bool allowDynamic=true, bool withTrailingX=true)=0
Parse a dimension list of a tensor or memref type.
virtual ParseResult parseEqual()=0
Parse a = token.
virtual SMLoc getCurrentLocation()=0
Get the location of the next token and store it into the argument.
virtual ParseResult parseOptionalComma()=0
Parse a , token if present.
auto getChecked(SMLoc loc, ParamsT &&...params)
Invoke the getChecked method of the given Attribute or Type class, using the provided location to emi...
virtual SMLoc getNameLoc() const =0
Return the location of the original name token.
virtual ParseResult parseGreater()=0
Parse a '>' token.
virtual ParseResult parseType(Type &result)=0
Parse a type.
ParseResult parseKeyword(StringRef keyword)
Parse a given keyword.
virtual ParseResult parseAttribute(Attribute &result, Type type={})=0
Parse an arbitrary attribute of a given type and return it in result.
void printDimensionList(ArrayRef< int64_t > shape)
Attributes are known-constant values of operations.
Definition Attributes.h:25
static BoolAttr get(MLIRContext *context, bool value)
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:207
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:526
This class represents a single result from folding an operation.
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
MLIRContext * getContext() const
Return the MLIRContext in which this type was uniqued.
Definition Types.cpp:35
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition Types.cpp:122
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Specialization of arith.constant op that returns an integer of index type.
Definition Arith.h:113
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:359
static DenseArrayAttrImpl get(MLIRContext *context, ArrayRef< int32_t > content)
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:573
auto getDims(VectorType vType)
Returns a range over the dims (size and scalability) of a VectorType.
constexpr unsigned generalPackedFormatBitSize
Definition uArchBase.h:32
static SetVector< int64_t > adjustUnitDimsWithSliceDims(const SetVector< int64_t > &unitDims, ArrayRef< int64_t > sliceDims)
SmallVector< OpFoldResult > getBlockedOffsets(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > offsets, ArrayRef< int64_t > blockShape)
OpFoldResult genBinOp(OpFoldResult a, OpFoldResult b, Location loc, OpBuilder &builder)
static SmallVector< SmallVector< Value > > genCoordinates(OpBuilder &builder, Location loc, SmallVector< Value > delinearizedId, ArrayRef< int64_t > subShapesLayout, ArrayRef< int64_t > subShape, ArrayRef< int64_t > srcShape)
Include the generated interface declarations.
detail::DenseArrayAttrImpl< int64_t > DenseI64ArrayAttr
SmallVector< int64_t > computeElementwiseMul(ArrayRef< int64_t > v1, ArrayRef< int64_t > v2)
Return a vector containing llvm::zip_equal(v1, v2) multiplied elementwise.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:131
detail::DenseArrayAttrImpl< int32_t > DenseI32ArrayAttr
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.