MLIR 22.0.0git
XeGPUDialect.cpp
Go to the documentation of this file.
1//===- XeGPUDialect.cpp - MLIR XeGPU dialect implementation -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
14#include "mlir/IR/Builders.h"
16#include "llvm/ADT/TypeSwitch.h"
17#include "llvm/Support/Debug.h"
18
19using std::optional;
20
21namespace mlir {
22namespace xegpu {
23
24void XeGPUDialect::initialize() {
25 addTypes<
26#define GET_TYPEDEF_LIST
27#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.cpp.inc>
28 >();
29 addOperations<
30#define GET_OP_LIST
31#include <mlir/Dialect/XeGPU/IR/XeGPU.cpp.inc>
32 >();
33 addAttributes<
34#define GET_ATTRDEF_LIST
35#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.cpp.inc>
36 >();
37}
38#define GET_OP_INTERFACE_CLASSES
39#include "mlir/Dialect/XeGPU/IR/XeGPUOpInterface.cpp.inc"
40
41// A `srcShape` consists of N distribution units, each being `subShapesLayout` x
42// `subShape`. A `delinearizedId` is used to identify a particular `subShape`
43// within each distribution unit.
44// Example:
45// WG data is 128x256. SG data is 16x32, in 4x2 layout, this gives a
46// distribution unit of shape 64x64, we have 2x4 such distribution units.
47// `delinearizedId` is used to identify a 16x32 of a subgroup in each
48// distribution unit.
51 SmallVector<Value> delinearizedId,
52 ArrayRef<int64_t> subShapesLayout, ArrayRef<int64_t> subShape,
53 ArrayRef<int64_t> srcShape) {
55
56 // A distribution unit must be less than or equal to `srcShape`
57 SmallVector<int64_t> distUnitShape = llvm::map_to_vector(
58 llvm::zip_equal(srcShape,
59 computeElementwiseMul(subShapesLayout, subShape)),
60 [](const auto &t) { return std::min(std::get<0>(t), std::get<1>(t)); });
61
62 // Get the offset of `subShape` within a distribution unit.
63 SmallVector<Value> distUnitLocalOffset = llvm::map_to_vector(
64 llvm::zip(delinearizedId, subShape), [&](const auto &t) -> Value {
65 return builder.createOrFold<arith::MulIOp>(
66 loc, std::get<0>(t),
67 builder.createOrFold<arith::ConstantIndexOp>(loc, std::get<1>(t)));
68 });
69
70 // For each dist unit
71 for (SmallVector<int64_t> unitOffs :
72 StaticTileOffsetRange(srcShape, distUnitShape)) {
73 // Get dist unit offset within `srcShape`.
75 llvm::map_to_vector(unitOffs, [&](int64_t d) -> Value {
76 return arith::ConstantIndexOp::create(builder, loc, d);
77 });
78 // Calculate `subShape` offset within `srcShape`.
80 llvm::map_to_vector(llvm::zip_equal(base, distUnitLocalOffset),
81 [&](const auto &t) -> Value {
82 return builder.createOrFold<arith::AddIOp>(
83 loc, std::get<0>(t), std::get<1>(t));
84 });
85 // Do not go beyond `srcShape` bounds.
86 SmallVector<Value> mods = llvm::map_to_vector(
87 llvm::zip_equal(adds, srcShape), [&](const auto &t) -> Value {
88 return builder.createOrFold<arith::RemUIOp>(
89 loc, std::get<0>(t),
90 arith::ConstantIndexOp::create(builder, loc, std::get<1>(t)));
91 });
92
93 coordinates.push_back(mods);
94 }
95 return coordinates;
96}
97
98// Checks if the given shape can be evenly distributed based on the layout
99// and data factors provided by the LayoutAttr.
100bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
101 xegpu::DistributeLayoutAttr attr) {
102 assert(attr && "Layout attribute is missing.");
103
104 // Checks whether the given shape can be evenly distributed using the
105 // specified layout and data attributes. If successful, it returns the work
106 // size for each compute unit; otherwise, it returns `std::nullopt`. The work
107 // size per compute unit is calculated as follows:
108 // - If `data` is null: newShape[i] = shape[i] / layout[i]
109 // - If `data` is not null: newShape[i] = data[i]
110 // When round-robin distribution (`rr`) is enabled, `shape[i]` can be
111 // smaller than `layout[i] * data[i]`, allowing multiple compute units to
112 // share the data.
113 auto tryDistribute = [&](llvm::ArrayRef<int64_t> shape,
116 bool rr = true) -> optional<SmallVector<int64_t>> {
118 if (layout.size()) {
119 if (layout.size() != shape.size())
120 return std::nullopt;
121 auto ratio = computeShapeRatio(shape, layout);
122 if (ratio.has_value()) {
123 newShape = ratio.value();
124 } else if (!rr || !computeShapeRatio(layout, shape).has_value()) {
125 return std::nullopt;
126 }
127 // Round-robin case: continue with original newShape
128 }
129
130 if (data.size()) {
131 if (data.size() != shape.size())
132 return std::nullopt;
133 auto ratio = computeShapeRatio(newShape, data);
134 if (!ratio.has_value() && rr)
135 ratio = computeShapeRatio(data, newShape);
136 if (!ratio.has_value())
137 return std::nullopt;
138
139 // if data is not null, we always return it for next phase.
140 newShape = data;
141 }
142 return newShape;
143 };
144
145 // check the sgLayout and sgData
146 auto maybeSgShape = tryDistribute(shape, attr.getEffectiveSgLayoutAsInt(),
147 attr.getEffectiveSgDataAsInt());
148 if (!maybeSgShape)
149 return false;
150 auto sgShape = maybeSgShape.value();
151
152 // check InstData, it neither have layout nor need round-robin
153 auto maybeInstShape =
154 tryDistribute(sgShape, {}, attr.getEffectiveInstDataAsInt(), false);
155 if (!maybeInstShape)
156 return false;
157 auto instShape = maybeInstShape.value();
158
159 // check LaneLayout and LaneData
160 auto maybeLaneShape =
161 tryDistribute(instShape, attr.getEffectiveLaneLayoutAsInt(),
162 attr.getEffectiveLaneDataAsInt(), false);
163 return maybeLaneShape.has_value();
164}
165
166//===----------------------------------------------------------------------===//
167// XeGPU_BlockTensorDescAttr
168//===----------------------------------------------------------------------===//
169BlockTensorDescAttr BlockTensorDescAttr::get(mlir::MLIRContext *context,
170 xegpu::MemorySpace memory_space,
171 int array_length,
172 bool boundary_check) {
173 auto scopeAttr = MemorySpaceAttr::get(context, memory_space);
174 auto lengthAttr =
175 IntegerAttr::get(IntegerType::get(context, 64), array_length);
176 auto boundaryAttr = BoolAttr::get(context, boundary_check);
177 return Base::get(context, scopeAttr, lengthAttr, boundaryAttr);
178}
179
180bool BlockTensorDescAttr::hasDefaultsOnly() {
181 return getMemorySpace().getValue() == xegpu::MemorySpace::Global &&
182 getArrayLength().getInt() == 1 && getBoundaryCheck().getValue();
183}
184
185//===----------------------------------------------------------------------===//
186// XeGPU_ScatterTensorDescAttr
187//===----------------------------------------------------------------------===//
188ScatterTensorDescAttr
189ScatterTensorDescAttr::get(mlir::MLIRContext *context,
190 xegpu::MemorySpace memory_space, int chunk_size) {
191 auto scopeAttr = MemorySpaceAttr::get(context, memory_space);
192 auto chunkSizeAttr =
193 IntegerAttr::get(IntegerType::get(context, 64), chunk_size);
194 return Base::get(context, scopeAttr, chunkSizeAttr);
195}
196
197LogicalResult ScatterTensorDescAttr::verify(
198 llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
199 MemorySpaceAttr memory_space, IntegerAttr chunk_size) {
200 int64_t chunkSize = chunk_size.getInt();
201 if (chunkSize <= 0)
202 return emitError() << "invalid chunk size";
203
204 return success();
205}
206
207//===----------------------------------------------------------------------===//
208// XeGPU_LayoutAttr
209//===----------------------------------------------------------------------===//
210LogicalResult
211LayoutAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
212 DenseI32ArrayAttr sg_layout, DenseI32ArrayAttr sg_data,
213 DenseI32ArrayAttr inst_data, DenseI32ArrayAttr lane_layout,
214 DenseI32ArrayAttr lane_data, DenseI32ArrayAttr order) {
215
216 // A valid layout must include at least one of sg_layout and lane_layout.
217 // sg_layout is essential for Workgroup layout, while lane_layout is
218 // required for Subgroup layout.
219 if (!sg_layout && !inst_data && !lane_layout) {
220 return emitError()
221 << "expected at least one of sg_layout, inst_data or lane_layout";
222 }
223
224 // generate code to check sg_laout, inst_data and lane_layout having the same
225 // rank if they are not null.
226
227 if (sg_layout && inst_data && sg_layout.size() != inst_data.size()) {
228 return emitError()
229 << "expected sg_layout and inst_data to have the same rank";
230 }
231
232 if (sg_layout && lane_layout && sg_layout.size() != lane_layout.size()) {
233 return emitError()
234 << "expected sg_layout and lane_layout to have the same rank";
235 }
236
237 if (inst_data && lane_layout && inst_data.size() != lane_layout.size()) {
238 return emitError() << "expected inst_data and lane_layout to have the same "
239 "rank, got inst_data "
240 << inst_data.size() << ", lane_layout "
241 << lane_layout.size();
242 }
243
244 // sg_data is optional for Workgroup layout, but its presence requires
245 // sg_layout.
246 if (sg_data) {
247 if (!sg_layout)
248 return emitError() << "expected sg_layout being used with sg_data";
249 if (sg_data.size() != sg_layout.size())
250 return emitError()
251 << "expected sg_data and sg_layout to have the same rank";
252 }
253
254 // lane_data is optional for Subgroup layout, but its presence requires
255 // lane_layout.
256 if (lane_data) {
257 if (!lane_layout)
258 return emitError() << "expected lane_layout being used with lane_data";
259 if (lane_data.size() != lane_layout.size())
260 return emitError()
261 << "expected lane_data and lane_layout to have the same rank";
262 }
263
264 if (order) {
265 if (!sg_layout && !lane_layout)
266 return emitError()
267 << "expected sg_layout/lane_layout being used with order";
268
269 if (sg_layout && order.size() != sg_layout.size())
270 return emitError()
271 << "expected order and sg_layout to have the same rank";
272
273 if (lane_layout && order.size() != lane_layout.size())
274 return emitError()
275 << "expected order and lane_layout to have the same rank";
276 }
277
278 return success();
279}
280
281FailureOr<SmallVector<Value>>
282LayoutAttr::delinearizeId(OpBuilder &builder, Location loc, Value linearId) {
283
284 SmallVector<int64_t> sgLayoutInt;
285 if (isForWorkgroup()) {
286 sgLayoutInt = getEffectiveSgLayoutAsInt();
287 } else if (isForSubgroup()) {
288 sgLayoutInt = getEffectiveLaneLayoutAsInt();
289 } else {
290 return failure();
291 }
292
293 DenseI32ArrayAttr orderAttr = getOrder();
294
295 // Handle order attribute
296 SmallVector<int64_t> order;
297 if (orderAttr && !orderAttr.empty()) {
298 order = llvm::to_vector(
299 llvm::map_range(orderAttr.asArrayRef(),
300 [](int32_t idx) { return static_cast<int64_t>(idx); }));
301 } else {
302 // Default order: [1, 0] for 2D (row-major), [2, 1, 0] for 3D, etc.
303 order = llvm::to_vector(
304 llvm::reverse(llvm::seq<int64_t>(0, sgLayoutInt.size())));
305 }
306
307 if (order.size() != sgLayoutInt.size()) {
308 return failure();
309 }
310
311 SmallVector<Value> result(sgLayoutInt.size());
312 Value remaining = linearId;
313
314 /// Process dimensions in the order they appear in the order array
315 /// The first dimension in order is the fastest-changing
316 ///
317 /// Example walkthrough for linearId=22, sgLayout=[2,4,4], order=[2,1,0]:
318 ///
319 /// Initial: remaining=22, dimIdx = order[i], dimSize = sgLayout[dimIdx],
320 /// result=[?,?,?]
321 ///
322 /// i=0 (process columns, dimIdx=2, dimSize=4):
323 /// result[2] = 22 % 4 = 2 (column coordinate)
324 /// remaining = 22 / 4 = 5 (5 complete groups of 4 columns processed)
325 ///
326 /// i=1 (process rows, dimIdx=1, dimSize=4):
327 /// result[1] = 5 % 4 = 1 (row coordinate)
328 /// remaining = 5 / 4 = 1 (1 complete group of 4 rows processed)
329 ///
330 /// i=2 (process layers, dimIdx=0, dimSize=2):
331 /// result[0] = 1 % 2 = 1 (layer coordinate)
332 /// (no remaining update - last iteration)
333 ///
334 /// Final result: [1,1,2] = Layer 1, Row 1, Column 2
335 for (size_t i = 0; i < order.size(); ++i) {
336 int64_t dimIdx = order[i];
337 int64_t dimSize = sgLayoutInt[dimIdx];
338
339 Value dimSizeVal =
340 builder.createOrFold<arith::ConstantIndexOp>(loc, dimSize);
341
342 /// Extract the coordinate for this dimension using modulo operation
343 /// This gives us "how far within this dimension" we are
344 /// e.g., linearId=22, dimSize=4: 22 % 4 = 2 (we're at position 2 within
345 /// this dimension)
346 result[dimIdx] =
347 builder.createOrFold<arith::RemUIOp>(loc, remaining, dimSizeVal);
348
349 /// Update remaining for the next dimension by removing what we've already
350 /// processed. Division tells us "how many complete groups of this dimension
351 /// we've gone through" e.g., linearId=22, dimSize=4: 22 / 4 = 5 (we've
352 /// completed 5 groups of 4) Skip this for the last iteration since there's
353 /// no next dimension to process
354 if (i < order.size() - 1) {
355 remaining =
356 builder.createOrFold<arith::DivUIOp>(loc, remaining, dimSizeVal);
357 }
358 }
359 return result;
360}
361
362/// Implements DistributeLayoutAttr::computeDistributedCoords to generate
363/// instructions for computing multi-dimensional offsets when distributed by
364/// LayoutAttr.
365FailureOr<SmallVector<SmallVector<Value>>>
366LayoutAttr::computeDistributedCoords(OpBuilder &builder, Location loc,
367 Value linearId, ArrayRef<int64_t> shape) {
368 SmallVector<int64_t> layout;
369 SmallVector<int64_t> subShape;
370 if (isForWorkgroup()) {
371 layout = getEffectiveSgLayoutAsInt();
372 subShape = getEffectiveSgDataAsInt();
373 } else if (isForSubgroup()) {
374 layout = getEffectiveLaneLayoutAsInt();
375 subShape = getEffectiveLaneDataAsInt();
376 } else {
377 return failure();
378 }
379 if (subShape.empty()) {
380 if (auto derivedShape = computeShapeRatio(shape, layout))
381 subShape = derivedShape.value();
382 else
383 return failure();
384 }
385
386 // delinearize Ids
387 auto maybeIds = delinearizeId(builder, loc, linearId);
388 if (failed(maybeIds))
389 return failure();
390 SmallVector<Value> ids = *maybeIds;
391
392 return genCoordinates(builder, loc, ids, layout, subShape, shape);
393}
394
395bool LayoutAttr::isEqualTo(const xegpu::DistributeLayoutAttr &other) {
396 if (dyn_cast<xegpu::SliceAttr>(other))
397 return false;
398
399 return *this == dyn_cast<xegpu::LayoutAttr>(other);
400}
401
402// set the layout for unit dims: sg_data, inst_data and lane_data to 1
403DistributeLayoutAttr LayoutAttr::setUnitDimData(SetVector<int64_t> unitDims) {
404 auto sgDataOpt = getSgData();
405 auto instDataOpt = getInstData();
406 auto laneDataOpt = getLaneData();
407
408 SmallVector<int32_t> sgData;
409 SmallVector<int32_t> instData;
410 SmallVector<int32_t> laneData;
411
412 if (sgDataOpt) {
413 sgData = llvm::to_vector(sgDataOpt.asArrayRef());
414 }
415 if (instDataOpt) {
416 instData = llvm::to_vector(instDataOpt.asArrayRef());
417 }
418 if (laneDataOpt) {
419 laneData = llvm::to_vector(laneDataOpt.asArrayRef());
420 }
421
422 for (auto dim : unitDims) {
423 if (dim < static_cast<int64_t>(sgData.size()))
424 sgData[dim] = 1;
425 if (dim < static_cast<int64_t>(instData.size()))
426 instData[dim] = 1;
427 if (dim < static_cast<int64_t>(laneData.size()))
428 laneData[dim] = 1;
429 }
430
431 return LayoutAttr::get(
432 getContext(), getSgLayout(),
433 sgData.empty() ? DenseI32ArrayAttr()
435 instData.empty() ? DenseI32ArrayAttr()
436 : DenseI32ArrayAttr::get(getContext(), instData),
437 getLaneLayout(),
438 laneData.empty() ? DenseI32ArrayAttr()
439 : DenseI32ArrayAttr::get(getContext(), laneData),
440 getOrder());
441}
442
443// set the layout for the sepcified unit dims: sg_lane and lane_layout to 1
444DistributeLayoutAttr LayoutAttr::setUnitDimLayout(SetVector<int64_t> unitDims) {
445 auto sgLayoutOpt = getSgLayout();
446 auto laneLayoutOpt = getLaneLayout();
447
448 SmallVector<int32_t> sgLayout;
449 SmallVector<int32_t> laneLayout;
450
451 if (sgLayoutOpt) {
452 sgLayout = llvm::to_vector(sgLayoutOpt.asArrayRef());
453 }
454 if (laneLayoutOpt) {
455 laneLayout = llvm::to_vector(laneLayoutOpt.asArrayRef());
456 }
457
458 for (auto dim : unitDims) {
459 if (dim < static_cast<int64_t>(sgLayout.size()))
460 sgLayout[dim] = 1;
461 if (dim < static_cast<int64_t>(laneLayout.size()))
462 laneLayout[dim] = 1;
463 }
464
465 return LayoutAttr::get(
466 getContext(),
467 sgLayout.empty() ? DenseI32ArrayAttr()
468 : DenseI32ArrayAttr::get(getContext(), sgLayout),
469 getSgData(), getInstData(),
470 laneLayout.empty() ? DenseI32ArrayAttr()
471 : DenseI32ArrayAttr::get(getContext(), laneLayout),
472 getLaneData(), getOrder());
473}
474
475//===----------------------------------------------------------------------===//
476// XeGPU_SliceAttr
477//===----------------------------------------------------------------------===//
478LogicalResult
479SliceAttr::verify(llvm::function_ref<InFlightDiagnostic()> emitError,
480 xegpu::DistributeLayoutAttr parent, DenseI64ArrayAttr dims) {
481 if (!parent || !dims)
482 return emitError() << "expected parent layout and dims attribute";
483
484 int64_t rank = parent.getRank();
485
486 // check every element in dims is unique and smaller than rank
487 llvm::SmallDenseSet<int64_t> seen;
488 for (int64_t dim : dims.asArrayRef()) {
489 if (dim < 0 || dim >= rank)
490 return emitError() << "invalid dim (" << dim << ") in slice attribute.";
491 if (!seen.insert(dim).second)
492 return emitError() << "repeated dim (" << dim << ") in slice attribute.";
493 }
494 return success();
495}
496
497SliceAttr SliceAttr::flatten() const {
498 xegpu::DistributeLayoutAttr parent = getParent();
499 SmallVector<DenseI64ArrayAttr> slicedDims({getDims()});
500
501 while (auto sliceAttr = dyn_cast<xegpu::SliceAttr>(parent)) {
502 parent = sliceAttr.getParent();
503 slicedDims.push_back(sliceAttr.getDims());
504 }
505
506 auto layoutAttr = dyn_cast<xegpu::LayoutAttr>(parent);
507 SmallVector<int64_t> indices =
508 llvm::to_vector(llvm::seq<int64_t>(0, layoutAttr.getRank()));
509
510 // get remaining dims (flattend) by applying slice ops with all slicedDims
511 SmallVector<int64_t> remainingDims(indices);
512 for (auto dim : llvm::reverse(slicedDims))
513 remainingDims = XeGPUDialect::slice(llvm::ArrayRef<int64_t>(remainingDims),
514 dim.asArrayRef());
515
516 // get flattend sliced dims by applying slice ops with the remaining dims
517 SmallVector<int64_t> flattendDims = XeGPUDialect::slice(
518 llvm::ArrayRef<int64_t>(indices), llvm::ArrayRef<int64_t>(remainingDims));
519
520 return xegpu::SliceAttr::get(
521 getContext(), layoutAttr,
522 DenseI64ArrayAttr::get(getContext(), flattendDims));
523}
524
525FailureOr<SmallVector<Value>>
526SliceAttr::delinearizeId(OpBuilder &builder, Location loc, Value linearId) {
527 SliceAttr attr = flatten();
528 auto parent = dyn_cast<LayoutAttr>(attr.getParent());
529 return parent.delinearizeId(builder, loc, linearId);
530}
531
532// Implements DistributeLayoutAttr::computeDistributedCoords to generate
533// instructions for computing multi-dimensional offsets when distributed by
534// LayoutAttr.
535FailureOr<SmallVector<SmallVector<Value>>>
536SliceAttr::computeDistributedCoords(OpBuilder &builder, Location loc,
537 Value linearId, ArrayRef<int64_t> shape) {
538 assert(getRank() == static_cast<int64_t>(shape.size()) && "invalid shape.");
539 if (!isForWorkgroup())
540 return failure();
541
542 SmallVector<int64_t> layout;
543 SmallVector<int64_t> subShape;
544 if (isForWorkgroup()) {
545 layout = getEffectiveSgLayoutAsInt();
546 subShape = getEffectiveSgDataAsInt();
547 } else if (isForSubgroup()) {
548 layout = getEffectiveLaneLayoutAsInt();
549 subShape = getEffectiveLaneDataAsInt();
550 } else {
551 return failure();
552 }
553
554 if (subShape.empty()) {
555 if (auto derivedShape = computeShapeRatio(shape, layout))
556 subShape = derivedShape.value();
557 else
558 return failure();
559 }
560
561 // delinearize Ids
562 auto maybeIds = delinearizeId(builder, loc, linearId);
563 if (failed(maybeIds))
564 return failure();
565
566 // The effective sgIds for offsets computing correspond
567 // to the dims that are not sliced.
568 ArrayRef<int64_t> dims = flatten().getDims().asArrayRef();
569 SmallVector<Value> sgIds =
570 XeGPUDialect::slice(ArrayRef<Value>(*maybeIds), dims);
571
572 return genCoordinates(builder, loc, sgIds, layout, subShape, shape);
573}
574
575bool SliceAttr::isSliceOf(const xegpu::DistributeLayoutAttr &other) {
576 auto flattenedThis = flatten();
577 // If other is a LayoutAttr, just compare directly with parent of
578 // flattenedThis.
579 if (auto otherLayout = dyn_cast<xegpu::LayoutAttr>(other))
580 return flattenedThis.getParent() == otherLayout;
581 // If other is a SliceAttr, flatten it first before comparing.
582 auto flattenedOther = dyn_cast<xegpu::SliceAttr>(other).flatten();
583 // Both must have common parent LayoutAttr.
584 if (flattenedThis.getParent() != flattenedOther.getParent())
585 return false;
586 // otherFlattened's sliced dims must be a subset of flattenedThis's sliced
587 // dims.
588 llvm::SmallDenseSet<int64_t> thisDims(
589 flattenedThis.getDims().asArrayRef().begin(),
590 flattenedThis.getDims().asArrayRef().end());
591 return llvm::all_of(flattenedOther.getDims().asArrayRef(),
592 [&](int64_t dim) { return thisDims.contains(dim); });
593}
594
595bool SliceAttr::isEqualTo(const xegpu::DistributeLayoutAttr &other) {
596 if (dyn_cast<xegpu::LayoutAttr>(other))
597 return false;
598
599 auto flattenedThis = flatten();
600 auto flattenedOther = dyn_cast<xegpu::SliceAttr>(other).flatten();
601
602 return ((flattenedThis.getParent() == flattenedOther.getParent()) &&
603 (flattenedThis.getDims() == flattenedOther.getDims()));
604}
605
606// Helper function to adjust unit dimensions from sliced space to parent space
609 ArrayRef<int64_t> sliceDims) {
610 // Reconstruct parent's non-sliced dimensions
611
612 int64_t parentRank = sliceDims.size() + unitDims.size();
613 llvm::SmallDenseSet<int64_t> slicedDimsSet(sliceDims.begin(),
614 sliceDims.end());
615 SmallVector<int64_t> nonSlicedDims;
616 for (int64_t i = 0; i < parentRank; ++i) {
617 if (!slicedDimsSet.contains(i))
618 nonSlicedDims.push_back(i);
619 }
620
621 // Map unit dims from sliced space to parent space
622 SetVector<int64_t> adjustUnitDims;
623 for (auto dim : unitDims) {
624 if (dim < static_cast<int64_t>(nonSlicedDims.size())) {
625 adjustUnitDims.insert(nonSlicedDims[dim]);
626 }
627 }
628
629 return adjustUnitDims;
630}
631
632// set the layout for unit dims: sg_data, inst_data and lane_data to 1
633DistributeLayoutAttr SliceAttr::setUnitDimData(SetVector<int64_t> unitDims) {
634 SliceAttr attr = flatten();
635 ArrayRef<int64_t> sliceDims = attr.getDims().asArrayRef();
636 auto parent = dyn_cast<LayoutAttr>(attr.getParent());
637
638 SetVector<int64_t> adjustUnitDims =
639 adjustUnitDimsWithSliceDims(unitDims, sliceDims);
640
641 return SliceAttr::get(getContext(), parent.setUnitDimData(adjustUnitDims),
642 attr.getDims());
643}
644
645// set the layout for the sepcified unit dims: sg_lane and lane_layout to 1
646DistributeLayoutAttr SliceAttr::setUnitDimLayout(SetVector<int64_t> unitDims) {
647 SliceAttr attr = flatten();
648 ArrayRef<int64_t> sliceDims = attr.getDims().asArrayRef();
649 auto parent = dyn_cast<LayoutAttr>(attr.getParent());
650
651 SetVector<int64_t> adjustUnitDims =
652 adjustUnitDimsWithSliceDims(unitDims, sliceDims);
653
654 return SliceAttr::get(getContext(), parent.setUnitDimLayout(adjustUnitDims),
655 attr.getDims());
656}
657
658//===----------------------------------------------------------------------===//
659// XeGPU_RangeAttr
660//===----------------------------------------------------------------------===//
661
662LogicalResult
663RangeAttr::verify(llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
664 IntegerAttr startOfRange, IntegerAttr endOfRange) {
665 if (startOfRange.getInt() >= endOfRange.getInt())
666 return emitError() << "'end' : " << endOfRange.getInt()
667 << " must be greater than 'start' : "
668 << startOfRange.getInt();
669
670 return success();
671}
672
673//===----------------------------------------------------------------------===//
674// XeGPU_TensorDescType
675//===----------------------------------------------------------------------===//
676
677mlir::Type TensorDescType::parse(AsmParser &parser) {
678 llvm::SmallVector<int64_t> shape;
679 mlir::Type elementType;
680 mlir::FailureOr<mlir::Attribute> encoding;
681 mlir::FailureOr<mlir::Attribute> layout;
682
683 // Parse literal '<'
684 if (parser.parseLess())
685 return {};
686
687 auto shapeLoc = parser.getCurrentLocation();
688 if (mlir::failed(parser.parseDimensionList(shape))) {
689 parser.emitError(shapeLoc, "failed to parse parameter 'shape'");
690 return {};
691 }
692
693 auto elemTypeLoc = parser.getCurrentLocation();
694 if (mlir::failed(parser.parseType(elementType))) {
695 parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'");
696 return {};
697 }
698
699 // parse optional attributes
700 while (mlir::succeeded(parser.parseOptionalComma())) {
701 mlir::Attribute attr;
702 ParseResult res = parser.parseAttribute(attr);
703 if (mlir::succeeded(res)) {
704 if (mlir::isa<LayoutAttr>(attr)) {
705 layout = attr;
706 continue;
707 }
708 if (mlir::isa<BlockTensorDescAttr, ScatterTensorDescAttr>(attr)) {
709 encoding = attr;
710 continue;
711 }
712 }
713 return {};
714 }
715
716 // Parse literal '>'
717 if (parser.parseGreater())
718 return {};
719
720 MLIRContext *ctxt = parser.getContext();
721 return TensorDescType::getChecked(
722 [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape,
723 elementType, encoding.value_or(BlockTensorDescAttr::get(ctxt)),
724 layout.value_or(mlir::Attribute()));
725}
726
727void TensorDescType::print(AsmPrinter &printer) const {
728 printer << "<";
729
730 auto shape = getShape();
731 for (int64_t dim : shape) {
732 if (mlir::ShapedType::isDynamic(dim))
733 printer << '?';
734 else
735 printer << dim;
736 printer << 'x';
737 }
738
739 printer << getElementType();
740
741 auto encoding = getEncoding();
742 auto blockAttr = llvm::dyn_cast_if_present<BlockTensorDescAttr>(encoding);
743 if (encoding && (!blockAttr || !blockAttr.hasDefaultsOnly()))
744 printer << ", " << encoding;
745
746 if (auto layout = getLayout())
747 printer << ", " << layout;
748
749 printer << ">";
750}
751
752TensorDescType TensorDescType::get(llvm::ArrayRef<int64_t> shape,
753 mlir::Type elementType, int array_length,
754 bool boundary_check,
755 MemorySpace memory_space,
756 mlir::Attribute layout) {
757 auto context = elementType.getContext();
758 auto attr = BlockTensorDescAttr::get(context, memory_space, array_length,
759 boundary_check);
760 return Base::get(context, shape, elementType, attr, layout);
761}
762
763TensorDescType TensorDescType::get(llvm::ArrayRef<int64_t> shape,
764 mlir::Type elementType, int chunk_size,
765 MemorySpace memory_space,
766 mlir::Attribute layout) {
767 auto context = elementType.getContext();
768 auto attr = ScatterTensorDescAttr::get(context, memory_space, chunk_size);
769 return Base::get(context, shape, elementType, attr, layout);
770}
771
772LogicalResult
773TensorDescType::verify(llvm::function_ref<InFlightDiagnostic()> emitError,
774 llvm::ArrayRef<int64_t> shape, mlir::Type elementType,
775 mlir::Attribute encoding, mlir::Attribute layout) {
776 size_t rank = shape.size();
777
778 if (rank == 0)
779 return emitError() << "expected non-zero rank tensor";
780
781 auto blockAttr = mlir::dyn_cast_if_present<BlockTensorDescAttr>(encoding);
782 if (blockAttr) {
783 MemorySpaceAttr memorySpaceAttr = blockAttr.getMemorySpace();
784 if (rank > 1 && memorySpaceAttr &&
785 memorySpaceAttr.getValue() == MemorySpace::SLM)
786 return emitError() << "SLM is only supported for 1D block tensor";
787 }
788
789 // for gather and scatter ops, Low-precision types are packed in 32-bit units.
790 unsigned bitWidth = elementType.getIntOrFloatBitWidth();
791 int chunkAlignmentFactor =
794 : 1;
795 auto scatterAttr = mlir::dyn_cast_if_present<ScatterTensorDescAttr>(encoding);
796 if (scatterAttr) {
797 int64_t chunkSize = scatterAttr.getChunkSizeAsInt();
798 if (rank == 1 && chunkSize != 1)
799 return emitError() << "expected non-contiguous elements for 1D tensor";
800
801 // If chunk size > 1, the second dimension of the tensor shape must be
802 // equal to chunk size and it must be a multiple of the
803 // chunkAlignmentFactor.
804 if (chunkSize > 1) {
805 if (shape.back() != chunkSize)
806 return emitError() << "expected last dim of tensor to match chunk size";
807 if (shape.back() % chunkAlignmentFactor != 0)
808 return emitError() << "expected last dim of tensor to be a multiple of "
809 << chunkAlignmentFactor;
810 }
811 }
812
813 auto layoutAttr = llvm::dyn_cast_if_present<LayoutAttr>(layout);
814 if (layoutAttr) {
815 if (rank != (size_t)layoutAttr.getRank())
816 return emitError() << "expected layout rank to match tensor rank";
817
818 auto laneData = layoutAttr.getLaneData();
819 if (scatterAttr && laneData) {
820 // Validate subgroup mapping rules for scattered tensors.
821 // if chunkSize > 1, the last dimension of the tensor should
822 // be distributed in the units divisible by chunkAlignmentFactor.
823 int64_t chunkSize = scatterAttr.getChunkSizeAsInt();
824 if (chunkSize > 1 && laneData[rank - 1] % chunkAlignmentFactor)
825 return emitError()
826 << "expected last dim of lane_data to be a multiple of: "
827 << chunkAlignmentFactor;
828 }
829
830 if (!XeGPUDialect::isEvenlyDistributable(shape, layoutAttr)) {
831 std::string shapeStr;
832 llvm::raw_string_ostream stream(shapeStr);
833 llvm::interleaveComma(shape, stream);
834 return emitError() << "cannot distribute [" << shapeStr << "] using "
835 << layoutAttr;
836 }
837 }
838 return success();
839}
840
841//===----------------------------------------------------------------------===//
842// XeGPU_MemDescType
843//===----------------------------------------------------------------------===//
844mlir::Type MemDescType::parse(AsmParser &parser) {
845 llvm::SmallVector<int64_t> shape;
846 mlir::Type elementType;
847 mlir::FailureOr<MemLayoutAttr> layout;
848
849 // Parse literal '<'
850 if (parser.parseLess())
851 return {};
852
853 auto shapeLoc = parser.getCurrentLocation();
854 if (mlir::failed(parser.parseDimensionList(shape, false, true))) {
855 parser.emitError(shapeLoc, "failed to parse parameter 'shape'");
856 return {};
857 }
858
859 auto elemTypeLoc = parser.getCurrentLocation();
860 if (mlir::failed(parser.parseType(elementType))) {
861 parser.emitError(elemTypeLoc, "failed to parse parameter 'elementType'");
862 return {};
863 }
864
865 // parse optional attributes
866 if (mlir::succeeded(parser.parseOptionalComma())) {
867 MemLayoutAttr attr;
868 ParseResult res = parser.parseAttribute(attr);
869 if (mlir::failed(res))
870 return {};
871 layout = attr;
872 }
873
874 // Parse literal '>'
875 if (parser.parseGreater())
876 return {};
877
878 MLIRContext *ctxt = parser.getContext();
879 return MemDescType::getChecked(
880 [&]() { return parser.emitError(parser.getNameLoc()); }, ctxt, shape,
881 elementType, layout.value_or(MemLayoutAttr()));
882}
883
884void MemDescType::print(AsmPrinter &printer) const {
885 printer << "<";
886
887 printer.printDimensionList(getShape());
888 printer << 'x';
889 printer << getElementType();
890
891 if (auto layout = getMemLayout())
892 printer << ", " << layout;
893
894 printer << ">";
895}
896
897//===----------------------------------------------------------------------===//
898// XeGPU_MemDescType
899//===----------------------------------------------------------------------===//
900
901Attribute MemLayoutAttr::parse(AsmParser &parser, Type type) {
902
903 auto context = parser.getContext();
904 llvm::SMLoc loc = parser.getCurrentLocation();
905
906 llvm::SmallDenseSet<StringRef> seenKeys;
907 SmallVector<NamedAttribute> attributes;
908
909 auto parseElt = [&]() -> ParseResult {
910 StringRef nameId;
911 if (failed(parser.parseKeyword(&nameId)))
912 return parser.emitError(loc, "expected valid attribute name");
913
914 if (!seenKeys.insert(nameId).second)
915 return parser.emitError(loc, "duplicate key '")
916 << nameId << " in mem layout attribute";
917
918 if (failed(parser.parseEqual()))
919 return failure();
920
921 Attribute attr;
922 if (failed(parser.parseAttribute(attr)))
923 return failure();
924 attributes.emplace_back(nameId, attr);
925 return success();
926 };
927
928 // Parse literal '<'
929 if (parser.parseLess())
930 return {};
931
932 if (failed(parser.parseCommaSeparatedList(parseElt)))
933 return {};
934
935 // Parse literal '>'
936 if (parser.parseGreater())
937 return {};
938
939 return parser.getChecked<MemLayoutAttr>(
940 loc, context, DictionaryAttr::get(context, attributes));
941}
942
943void MemLayoutAttr::print(AsmPrinter &printer) const {
944 printer << "<";
945 ArrayRef<NamedAttribute> attrs = getAttrs().getValue();
946 for (size_t i = 0; i < attrs.size(); i++) {
947 printer << attrs[i].getName().str() << " = " << attrs[i].getValue();
948 if (i < attrs.size() - 1)
949 printer << ", ";
950 }
951 printer << ">";
952}
953// a helper utility to perform binary operation on OpFoldResult.
954// If both a and b are attributes, it will simply return the result.
955// Otherwise, the corresponding arith op will be generated, and an
956// contant op will be created if one of them is an attribute.
957template <typename ArithOp>
959 OpBuilder &builder) {
960 auto aVal = getValueOrCreateConstantIndexOp(builder, loc, a);
961 auto bVal = getValueOrCreateConstantIndexOp(builder, loc, b);
962 return ArithOp::create(builder, loc, aVal, bVal).getResult();
963}
964
965// a helper utility to perform division operation on OpFoldResult and int64_t.
966#define div(a, b) \
967 genBinOp<arith::DivSIOp>(a, builder.getIndexAttr(b), loc, builder)
968
969// a helper utility to perform reminder operation on OpFoldResult and int64_t.
970#define rem(a, b) \
971 genBinOp<arith::RemSIOp>(a, builder.getIndexAttr(b), loc, builder)
972
973// a helper utility to perform multiply operation on OpFoldResult and int64_t.
974#define mul(a, b) \
975 genBinOp<arith::MulIOp>(a, builder.getIndexAttr(b), loc, builder)
976
977// a helper utility to perform addition operation on two OpFoldResult.
978#define add(a, b) genBinOp<arith::AddIOp>(a, b, loc, builder)
979
980// block the given offsets according to the block shape
981// say the original offset is [y, x], and the block shape is [By, Bx],
982// then the blocked offset is [y/By, x/Bx, y%By, x%Bx]
985 ArrayRef<int64_t> blockShape) {
986
987 assert(offsets.size() == blockShape.size() &&
988 "offsets and blockShape must have the same size");
989 SmallVector<OpFoldResult> blockedOffsets;
990 SmallVector<OpFoldResult> divs, rems;
991
992 for (auto [offset, block] : llvm::zip(offsets, blockShape)) {
993 divs.push_back(div(offset, block));
994 rems.push_back(rem(offset, block));
995 }
996 blockedOffsets.append(divs.begin(), divs.end());
997 blockedOffsets.append(rems.begin(), rems.end());
998
999 return blockedOffsets;
1000}
1001
1002// Get strides as vector of integer for MemDesc.
1003SmallVector<int64_t> MemDescType::getStrideShape() {
1004
1005 SmallVector<int64_t> matrixShape(getShape().begin(), getShape().end());
1006
1007 ArrayAttr strideAttr = getStrideAttr();
1008 SmallVector<int64_t> strides;
1009 for (Attribute attr : strideAttr.getValue()) {
1010 strides.push_back(cast<IntegerAttr>(attr).getInt());
1011 }
1012
1013 SmallVector<int64_t> innerBlkShape = getBlockShape();
1014
1015 // get perm from FCD to LCD
1016 // perm[i] = the dim with i-th smallest stride
1017 SmallVector<int, 4> perm =
1018 llvm::to_vector<4>(llvm::seq<int>(0, strides.size()));
1019 llvm::sort(perm, [&](int a, int b) { return strides[a] < strides[b]; });
1020
1021 assert(strides[perm[0]] == 1 && "inner most dim must have stride 1");
1022
1023 SmallVector<int64_t> innerBlkStride(innerBlkShape.size());
1024 innerBlkStride[perm[0]] = 1;
1025 for (size_t i = 1; i < perm.size(); ++i)
1026 innerBlkStride[perm[i]] =
1027 innerBlkStride[perm[i - 1]] * innerBlkShape[perm[i - 1]];
1028
1029 // compute the original matrix shape using the stride info
1030 // and compute the number of blocks in each dimension
1031 // The shape of highest dim can't be derived from stride info,
1032 // but doesn't impact the stride computation for blocked layout.
1033 SmallVector<int64_t> matrixShapeOrig(matrixShape.size());
1034 SmallVector<int64_t> BlkShapeOrig(matrixShape.size());
1035 for (size_t i = 0; i < perm.size() - 1; ++i) {
1036 matrixShapeOrig[perm[i]] = strides[perm[i + 1]] / strides[perm[i]];
1037 BlkShapeOrig[perm[i]] = matrixShapeOrig[perm[i]] / innerBlkShape[perm[i]];
1038 }
1039
1040 int64_t innerBlkSize = 1;
1041 for (auto s : innerBlkShape)
1042 innerBlkSize *= s;
1043
1044 SmallVector<int64_t> outerBlkStride(matrixShape.size());
1045 outerBlkStride[perm[0]] = innerBlkSize;
1046 for (size_t i = 0; i < perm.size() - 1; ++i) {
1047 outerBlkStride[perm[i + 1]] =
1048 outerBlkStride[perm[i]] * BlkShapeOrig[perm[i]];
1049 }
1050
1051 // combine the inner and outer strides
1052 SmallVector<int64_t> blockedStrides;
1053 blockedStrides.append(outerBlkStride.begin(), outerBlkStride.end());
1054 blockedStrides.append(innerBlkStride.begin(), innerBlkStride.end());
1055
1056 return blockedStrides;
1057}
1058
1059// Calculate the linear offset using the blocked offsets and stride
1060Value MemDescType::getLinearOffsets(OpBuilder &builder, Location loc,
1061 ArrayRef<OpFoldResult> offsets) {
1062
1063 SmallVector<int64_t> matrixShape(getShape().begin(), getShape().end());
1064 SmallVector<int64_t> blockShape = getBlockShape();
1065 SmallVector<int64_t> strides = getStrideShape();
1066 SmallVector<OpFoldResult> blockedOffsets;
1067
1068 // blockshape equal to matrixshape means no blocking
1069 if (llvm::equal(blockShape, matrixShape)) {
1070 // remove the outer dims from strides
1071 strides.erase(strides.begin(), strides.begin() + matrixShape.size());
1072 } else {
1073 assert(offsets.size() == blockShape.size() &&
1074 "offsets and blockShape must have the same size");
1075 // say the original offset is [y, x], and the block shape is [By, Bx],
1076 // then the blocked offset is [y/By, x/Bx, y%By, x%Bx]
1077
1078 SmallVector<OpFoldResult> divs, rems;
1079
1080 for (auto [offset, block] : llvm::zip(offsets, blockShape)) {
1081 divs.push_back(div(offset, block));
1082 rems.push_back(rem(offset, block));
1083 }
1084 blockedOffsets.append(divs.begin(), divs.end());
1085 blockedOffsets.append(rems.begin(), rems.end());
1086 offsets = blockedOffsets;
1087 }
1088
1089 // Start with initial value as matrix descriptor's base offset.
1090 Value linearOffset = arith::ConstantIndexOp::create(builder, loc, 0);
1091 for (size_t i = 0; i < offsets.size(); ++i) {
1092 OpFoldResult mulResult = mul(offsets[i], strides[i]);
1093 Value mulVal = getValueOrCreateConstantIndexOp(builder, loc, mulResult);
1094 linearOffset = arith::AddIOp::create(builder, loc, mulVal, linearOffset);
1095 }
1096
1097 return linearOffset;
1098}
1099
1100} // namespace xegpu
1101} // namespace mlir
1102
1103#include <mlir/Dialect/XeGPU/IR/XeGPUDialect.cpp.inc>
1104#define GET_ATTRDEF_CLASSES
1105#include <mlir/Dialect/XeGPU/IR/XeGPUAttrs.cpp.inc>
1106#define GET_TYPEDEF_CLASSES
1107#include <mlir/Dialect/XeGPU/IR/XeGPUTypes.cpp.inc>
return success()
static Type getElementType(Type type)
Determine the element type of type.
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
b getContext())
static ArrayRef< int64_t > getShape(Type type)
Returns the shape of the given type.
Definition Traits.cpp:117
#define mul(a, b)
#define div(a, b)
#define rem(a, b)
virtual ParseResult parseCommaSeparatedList(Delimiter delimiter, function_ref< ParseResult()> parseElementFn, StringRef contextMessage=StringRef())=0
Parse a list of comma-separated items with an optional delimiter.
MLIRContext * getContext() const
virtual InFlightDiagnostic emitError(SMLoc loc, const Twine &message={})=0
Emit a diagnostic at the specified location and return failure.
virtual ParseResult parseLess()=0
Parse a '<' token.
virtual ParseResult parseDimensionList(SmallVectorImpl< int64_t > &dimensions, bool allowDynamic=true, bool withTrailingX=true)=0
Parse a dimension list of a tensor or memref type.
virtual ParseResult parseEqual()=0
Parse a = token.
virtual SMLoc getCurrentLocation()=0
Get the location of the next token and store it into the argument.
virtual ParseResult parseOptionalComma()=0
Parse a , token if present.
auto getChecked(SMLoc loc, ParamsT &&...params)
Invoke the getChecked method of the given Attribute or Type class, using the provided location to emi...
virtual SMLoc getNameLoc() const =0
Return the location of the original name token.
virtual ParseResult parseGreater()=0
Parse a '>' token.
virtual ParseResult parseType(Type &result)=0
Parse a type.
ParseResult parseKeyword(StringRef keyword)
Parse a given keyword.
virtual ParseResult parseAttribute(Attribute &result, Type type={})=0
Parse an arbitrary attribute of a given type and return it in result.
void printDimensionList(ArrayRef< int64_t > shape)
Attributes are known-constant values of operations.
Definition Attributes.h:25
static BoolAttr get(MLIRContext *context, bool value)
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
This class helps build Operations.
Definition Builders.h:207
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:526
This class represents a single result from folding an operation.
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
MLIRContext * getContext() const
Return the MLIRContext in which this type was uniqued.
Definition Types.cpp:35
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition Types.cpp:122
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Specialization of arith.constant op that returns an integer of index type.
Definition Arith.h:113
static ConstantIndexOp create(OpBuilder &builder, Location location, int64_t value)
Definition ArithOps.cpp:359
static DenseArrayAttrImpl get(MLIRContext *context, ArrayRef< int32_t > content)
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:573
auto getDims(VectorType vType)
Returns a range over the dims (size and scalability) of a VectorType.
constexpr unsigned generalPackedFormatBitSize
Definition uArchBase.h:32
static SetVector< int64_t > adjustUnitDimsWithSliceDims(const SetVector< int64_t > &unitDims, ArrayRef< int64_t > sliceDims)
SmallVector< OpFoldResult > getBlockedOffsets(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > offsets, ArrayRef< int64_t > blockShape)
OpFoldResult genBinOp(OpFoldResult a, OpFoldResult b, Location loc, OpBuilder &builder)
static SmallVector< SmallVector< Value > > genCoordinates(OpBuilder &builder, Location loc, SmallVector< Value > delinearizedId, ArrayRef< int64_t > subShapesLayout, ArrayRef< int64_t > subShape, ArrayRef< int64_t > srcShape)
Include the generated interface declarations.
detail::DenseArrayAttrImpl< int64_t > DenseI64ArrayAttr
SmallVector< int64_t > computeElementwiseMul(ArrayRef< int64_t > v1, ArrayRef< int64_t > v2)
Return a vector containing llvm::zip_equal(v1, v2) multiplied elementwise.
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:131
detail::DenseArrayAttrImpl< int32_t > DenseI32ArrayAttr
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.