MLIR 23.0.0git
XeGPUSgToWiDistributeExperimental.cpp
Go to the documentation of this file.
1//===- XeGPUSgToWiDistributeExperimental.cpp - XeGPU SG to WI Pass --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
18#include "mlir/IR/Builders.h"
20#include "mlir/IR/BuiltinOps.h"
22#include "mlir/IR/MLIRContext.h"
23#include "mlir/IR/Operation.h"
24#include "mlir/IR/Value.h"
25#include "mlir/IR/ValueRange.h"
27#include "llvm/ADT/SetVector.h"
28#include "llvm/Support/LogicalResult.h"
29#include "llvm/Support/raw_ostream.h"
30#include <optional>
31
32namespace mlir {
33namespace xegpu {
34#define GEN_PASS_DEF_XEGPUSGTOWIDISTRIBUTEEXPERIMENTAL
35#include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
36} // namespace xegpu
37} // namespace mlir
38
39using namespace mlir;
40
41#define DEBUG_TYPE "xegpu-sg-to-wi-distribute-experimental"
42#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
43
44namespace {
45
46/// Casts the given vector value `v` to the expected vector type `expectedTy`.
47static Value castValueTo(ConversionPatternRewriter &rewriter,
48 TypedValue<VectorType> v, VectorType expectedTy) {
49 // If the type matches, simply return the value itself.
50 if (v.getType() == expectedTy)
51 return v;
52 // If only shape differs, use shape cast.
53 if (isa<VectorType>(v.getType()) &&
54 v.getType().getNumElements() == expectedTy.getNumElements())
55 return vector::ShapeCastOp::create(rewriter, v.getLoc(), expectedTy, v);
56
57 // Else create an unrealized cast.
58 auto newOp = UnrealizedConversionCastOp::create(rewriter, v.getLoc(),
59 expectedTy, ValueRange{v});
60 return newOp.getResult(0);
61}
62
63/// Checks if all XeGPU anchor ops and vector results have valid layouts.
64static LogicalResult verifyLayouts(Operation *root) {
65 auto walkResult = root->walk([&](Operation *nestedOp) -> WalkResult {
66 if (auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(nestedOp)) {
67 auto layout = anchorOp.getAnchorLayout();
68 if (!layout) {
69 nestedOp->emitError("expected anchor layout attribute on operation");
70 return WalkResult::interrupt();
71 }
72 return WalkResult::advance();
73 }
74 // For each vector result, check if the op contains a result layout
75 // attribute.
76 for (OpResult result : nestedOp->getResults()) {
77 if (isa<VectorType>(result.getType())) {
79 if (!layout) {
80 nestedOp->emitError(
81 "expected result layout attribute on vector result");
82 return WalkResult::interrupt();
83 }
84 }
85 }
86 return WalkResult::advance();
87 });
88 return walkResult.wasInterrupted() ? failure() : success();
89}
90
91/// Distributes a subgroup-level CreateNdDesc op to workitem-level CreateNdDesc
92/// op. This simply drops the layout attribute from the tensor descriptor type.
93struct SgToWiCreateNdDesc : public OpConversionPattern<xegpu::CreateNdDescOp> {
94 using OpConversionPattern<xegpu::CreateNdDescOp>::OpConversionPattern;
95
96 LogicalResult
97 matchAndRewrite(xegpu::CreateNdDescOp op, OpAdaptor adaptor,
98 ConversionPatternRewriter &rewriter) const override {
99 xegpu::TensorDescType resultType = op.getType();
100 // If no layout, nothing to do.
101 if (!resultType.getLayout())
102 return failure();
103
104 auto newOp = xegpu::CreateNdDescOp::create(
105 rewriter, op.getLoc(), resultType.dropLayouts(), op.getOperands(),
106 op->getAttrs());
107 rewriter.replaceOp(op, newOp.getResult());
108 return success();
109 }
110};
111
112/// Distributes a subgroup-level LoadNd op to workitem-level LoadNd op. Output
113/// of workitem-level LoadNd op is 1D. ShapeCast is added to restore the
114/// original rank.
115struct SgToWiLoadNd : public OpConversionPattern<xegpu::LoadNdOp> {
116 using OpConversionPattern<xegpu::LoadNdOp>::OpConversionPattern;
117
118 LogicalResult
119 matchAndRewrite(xegpu::LoadNdOp op, OpAdaptor adaptor,
120 ConversionPatternRewriter &rewriter) const override {
121 xegpu::DistributeLayoutAttr layout = op.getAnchorLayout();
122 // If no layout, nothing to do.
123 if (!layout)
124 return failure();
125 // Check if the layout attached to the tensor descriptor is same as the
126 // anchor layout. Otherwise, this is a conflict.
127 if (op.getTensorDescType().getLayout() != layout)
128 return rewriter.notifyMatchFailure(
129 op, "conflicting layout attributes on tensor descriptor and anchor");
130 auto uArch = getUArch(xegpu::getChipStr(op).value_or(""));
131 if (!uArch)
132 return rewriter.notifyMatchFailure(
133 op, "xegpu::LoadNdOp require target attribute attached to "
134 "determine transpose "
135 "requirement");
136 auto supportedWiResultTyOrFailure =
137 xegpu::getDistributedVectorType(op.getTensorDescType());
138 auto expectedWiResultTyOrFailure =
139 xegpu::getDistVecTypeBasedOnLaneLayout(layout, op.getType());
140 if (failed(supportedWiResultTyOrFailure))
141 return rewriter.notifyMatchFailure(
142 op, "unable to compute the workitem vector type for LoadNdOp");
143 if (failed(expectedWiResultTyOrFailure))
144 return rewriter.notifyMatchFailure(
145 op,
146 "unable to compute expected workitem vector type from lane layout");
147 auto newOp = xegpu::LoadNdOp::create(
148 rewriter, op.getLoc(), supportedWiResultTyOrFailure.value(),
149 adaptor.getTensorDesc(), op.getMixedOffsets(), op.getPackedAttr(),
150 op.getTransposeAttr(), op.getL1HintAttr(), op.getL2HintAttr(),
151 op.getL3HintAttr(), /**layout**/ nullptr);
152 // Set the packed attribute if the layout requires it.
153 newOp.setPacked(xegpu::requirePacked(cast<xegpu::LayoutAttr>(layout)));
154 // Set the transpose attribute if the layout requires it.
155 if (xegpu::requireTranspose(cast<xegpu::LayoutAttr>(layout), uArch))
156 newOp.setTranspose(DenseI64ArrayAttr::get(rewriter.getContext(), {1, 0}));
157 rewriter.replaceOp(op, castValueTo(rewriter, newOp.getResult(),
158 expectedWiResultTyOrFailure.value()));
159 return success();
160 }
161};
162
163/// Distributes a subgroup-level StoreNd op to workitem-level StoreNd op. Stored
164/// value in workitem-level StoreNd op is 1D. ShapeCast is added to cast the
165/// incoming value to 1D.
166struct SgToWiStoreNd : public OpConversionPattern<xegpu::StoreNdOp> {
167 using OpConversionPattern<xegpu::StoreNdOp>::OpConversionPattern;
168
169 LogicalResult
170 matchAndRewrite(xegpu::StoreNdOp op, OpAdaptor adaptor,
171 ConversionPatternRewriter &rewriter) const override {
172 xegpu::DistributeLayoutAttr layout = op.getAnchorLayout();
173 // If no layout, nothing to do.
174 if (!layout)
175 return failure();
176 // Check if the layout attached to the tensor descriptor and value layout is
177 // same as the anchor layout. Otherwise, this is a conflict.
178 if (op.getTensorDescType().getLayout() != layout)
179 return rewriter.notifyMatchFailure(
180 op, "conflicting layout attributes on tensor descriptor and anchor");
181 auto valueLayout = xegpu::getDistributeLayoutAttr(op->getOpOperand(0));
182 if (valueLayout != layout)
183 return rewriter.notifyMatchFailure(
184 op, "conflicting layout attributes on value and anchor");
185 auto supportedWiValueTyOrFailure =
186 xegpu::getDistributedVectorType(op.getTensorDescType());
187 if (failed(supportedWiValueTyOrFailure))
188 return rewriter.notifyMatchFailure(
189 op,
190 "unable to compute wi vector type for StoreNdOp value from tensor "
191 "descriptor");
192
193 xegpu::StoreNdOp::create(
194 rewriter, op.getLoc(),
195 castValueTo(rewriter, cast<TypedValue<VectorType>>(adaptor.getValue()),
196 supportedWiValueTyOrFailure.value()),
197 adaptor.getTensorDesc(), op.getMixedOffsets(), op.getL1HintAttr(),
198 op.getL2HintAttr(), op.getL3HintAttr(), /**layout**/ nullptr);
199 rewriter.eraseOp(op);
200 return success();
201 }
202};
203
204/// Distributes a subgroup-level Dpas op to workitem-level Dpas op. All inpputs
205/// and output of workitem-level Dpas op are 1D. Necessary casts are added to
206/// convert the inputs and output to/from 1D.
207struct SgToWiDpas : public OpConversionPattern<xegpu::DpasOp> {
208 using OpConversionPattern<xegpu::DpasOp>::OpConversionPattern;
209
210 LogicalResult
211 matchAndRewrite(xegpu::DpasOp op, OpAdaptor adaptor,
212 ConversionPatternRewriter &rewriter) const override {
213 // llvm::errs() << "DpasOpPattern matchAndRewrite called\n";
214 // Check if the op has A, B and CD layouts attached.
215 auto layoutA = cast<xegpu::LayoutAttr>(op.getLayoutAAttr());
216 auto layoutB = cast<xegpu::LayoutAttr>(op.getLayoutBAttr());
217 auto layoutCd = cast<xegpu::LayoutAttr>(op.getLayoutCdAttr());
218 if (!layoutA || !layoutB || !layoutCd)
219 return failure();
220 // llvm::errs() << "tryning to calculate wi types for dpas op\n";
221 auto wiResultTyOrFailure =
222 xegpu::getDistributedVectorType(op.getType(), layoutCd);
223 auto wiATypeOrFailure =
224 xegpu::getDistributedVectorType(op.getLhs().getType(), layoutA);
225 auto wiBTypeOrFailure =
226 xegpu::getDistributedVectorType(op.getRhs().getType(), layoutB);
227 auto expectedWiResultTyOrFailure =
228 xegpu::getDistVecTypeBasedOnLaneLayout(layoutCd, op.getType());
229 if (failed(wiResultTyOrFailure) || failed(wiATypeOrFailure) ||
230 failed(wiBTypeOrFailure))
231 return rewriter.notifyMatchFailure(
232 op, "failed to calculate supported workitem vector types for DpasOp "
233 "from layouts");
234 if (failed(expectedWiResultTyOrFailure))
235 return rewriter.notifyMatchFailure(
236 op, "unable to compute expected workitem vector type for DpasOp from "
237 "lane layout");
238 auto newOp = xegpu::DpasOp::create(
239 rewriter, op->getLoc(), wiResultTyOrFailure.value(),
240 castValueTo(rewriter, cast<TypedValue<VectorType>>(adaptor.getLhs()),
241 wiATypeOrFailure.value()),
242 castValueTo(rewriter, cast<TypedValue<VectorType>>(adaptor.getRhs()),
243 wiBTypeOrFailure.value()),
244 castValueTo(rewriter, cast<TypedValue<VectorType>>(adaptor.getAcc()),
245 wiResultTyOrFailure.value()),
246 /** layoutA**/ nullptr,
247 /** layoutB**/ nullptr, /** layoutCd**/ nullptr);
248 // Explicitly set the new types to enable correct type materializations.
249 rewriter.replaceOp(op, castValueTo(rewriter, newOp.getResult(),
250 expectedWiResultTyOrFailure.value()));
251 return success();
252 }
253};
254
255/// Distributes elementwise ops to workitem-level elementwise ops. This
256/// currently handles elementwise ops with single result only.
257struct SgToWiElementWise : public ConversionPattern {
258 SgToWiElementWise(TypeConverter &typeConverter, MLIRContext *ctx)
259 : ConversionPattern(MatchAnyOpTypeTag(), /*benefit=*/1, ctx) {}
260
261 LogicalResult
262 matchAndRewrite(Operation *op, ArrayRef<Value> operands,
263 ConversionPatternRewriter &rewriter) const override {
264 // Only match ops with elementwise trait and single result.
266 return failure();
267
268 auto resultType = dyn_cast<VectorType>(op->getResult(0).getType());
269 if (!resultType)
270 return rewriter.notifyMatchFailure(
271 op, "operation result is not a vector type");
272
273 xegpu::DistributeLayoutAttr layout =
274 xegpu::getTemporaryLayout(llvm::cast<OpResult>(op->getResult(0)));
275 if (!layout || !layout.isForSubgroup())
276 return rewriter.notifyMatchFailure(
277 op, "operation result does not have subgroup distribute layout");
278
279 auto wiShapeOrFailure =
280 xegpu::getDistVecTypeBasedOnLaneLayout(layout, resultType);
281
282 if (failed(wiShapeOrFailure))
283 return rewriter.notifyMatchFailure(
284 op, "unable to compute workitem vector type from the layout");
285
286 VectorType newResultType = wiShapeOrFailure.value();
287 OperationState state(op->getLoc(), op->getName());
288 state.addOperands(operands);
289 state.addTypes(newResultType);
290 // Copy all attributes except for DistributeLayoutAttr.
291 for (auto attr : op->getAttrs()) {
292 if (!isa<xegpu::DistributeLayoutAttr>(attr.getValue()))
293 state.addAttribute(attr.getName(), attr.getValue());
294 }
295 Operation *newOp = rewriter.create(state);
296
297 rewriter.replaceOp(op, newOp->getResult(0));
298 return success();
299 }
300};
301
302/// Distributes a subgroup-level arith ConstantOp to workitem-level arith
303/// ConstantOp.
304struct SgToWiArithConstant : public OpConversionPattern<arith::ConstantOp> {
305 using OpConversionPattern<arith::ConstantOp>::OpConversionPattern;
306
307 LogicalResult
308 matchAndRewrite(arith::ConstantOp op, OpAdaptor adaptor,
309 ConversionPatternRewriter &rewriter) const override {
310 auto resultType = dyn_cast<VectorType>(op.getType());
311 if (!resultType)
312 return failure();
313
314 // Only handle dense vector constants
315 auto dense = dyn_cast<SplatElementsAttr>(op.getValue());
316 if (!dense)
317 return rewriter.notifyMatchFailure(
318 op, "only dense splat vector constants are supported");
319
320 xegpu::DistributeLayoutAttr layout =
321 xegpu::getTemporaryLayout(llvm::cast<OpResult>(op.getResult()));
322 if (!layout || !layout.isForSubgroup())
323 return rewriter.notifyMatchFailure(
324 op, "operation result does not have subgroup distribute layout");
325
326 auto wiShapeOrFailure =
327 xegpu::getDistVecTypeBasedOnLaneLayout(layout, resultType);
328
329 if (failed(wiShapeOrFailure))
330 return rewriter.notifyMatchFailure(
331 op, "unable to compute workitem vector type from the layout");
332
333 VectorType newResultType = wiShapeOrFailure.value();
334 auto sclarValue = dense.getSplatValue<Attribute>();
335 auto newDenseAttr = DenseElementsAttr::get(newResultType, sclarValue);
336
337 auto newOp = arith::ConstantOp::create(rewriter, op.getLoc(), newResultType,
338 newDenseAttr);
339 rewriter.replaceOp(op, newOp.getResult());
340 return success();
341 }
342};
343
344/// Distributes a subgroup-level PrefetchNd op to workitem-level PrefetchNd op.
345struct SgToWiPrefetchNd : public OpConversionPattern<xegpu::PrefetchNdOp> {
346 using OpConversionPattern<xegpu::PrefetchNdOp>::OpConversionPattern;
347
348 LogicalResult
349 matchAndRewrite(xegpu::PrefetchNdOp op, OpAdaptor adaptor,
350 ConversionPatternRewriter &rewriter) const override {
351 xegpu::DistributeLayoutAttr layout = op.getAnchorLayout();
352 // If no layout, nothing to do.
353 if (!layout)
354 return failure();
355
356 xegpu::PrefetchNdOp::create(rewriter, op.getLoc(), adaptor.getTensorDesc(),
357 op.getMixedOffsets(), op.getL1HintAttr(),
358 op.getL2HintAttr(), op.getL3HintAttr(),
359 /**layout**/ nullptr);
360 rewriter.eraseOp(op);
361 return success();
362 }
364
365struct XeGPUSgToWiDistributeExperimentalPass
367 XeGPUSgToWiDistributeExperimentalPass> {
368 void runOnOperation() override;
371} // namespace
373void XeGPUSgToWiDistributeExperimentalPass::runOnOperation() {
374
375 // Verify if all XeGPU anchor ops and vector ops have result layouts.
376 // TODO: This can be removed once the full layout refactoring is done.
377 Operation *root = getOperation();
378 if (failed(verifyLayouts(root))) {
379 LLVM_DEBUG(DBGS() << "XeGPUSgToWiDistributeExperimentalPass: layout "
380 "verification failed\n");
382 return;
384 // Collect existing UnrealizedConversionCastOps. These must be preserved.
385 llvm::SmallSetVector<UnrealizedConversionCastOp, 8> existingCasts;
386 root->walk(
387 [&](UnrealizedConversionCastOp castOp) { existingCasts.insert(castOp); });
388 // Perform a structural type conversion to convert structural ops to have WI
389 // types. This will insert UnrealizedConversionCastOps to make the IR
390 // valid.
391 auto materializeCast = [&](mlir::OpBuilder &builder, mlir::Type type,
392 mlir::ValueRange inputs,
394 UnrealizedConversionCastOp castOp =
395 UnrealizedConversionCastOp::create(builder, loc, type, inputs);
396 return castOp.getResult(0);
397 };
398 {
400 TypeConverter typeConverter;
402 typeConverter.addSourceMaterialization(materializeCast);
403 typeConverter.addTargetMaterialization(materializeCast);
408 typeConverter, patterns, target);
409 target.addLegalOp<UnrealizedConversionCastOp>();
410 (void)applyPartialConversion(root, target, std::move(patterns));
411 }
412 // Structural type conversion can generate some redundant
413 // UnrealizedConversionCastOps to materialize the SG type from type converted
414 // WI type. These are redundant at this point and can be eliminated by
415 // inserting shape casts instead.
416 // Example:
417 // %1 = UnrealizedConversionCastOp %0 : vector<16x1xf32> to vector<16x16xf32>
418 // %2 = UnrealizedConversionCastOp %1 : vector<16x16xf32> to vector<16xf32>
419 // This can be replaced with:
420 // %2 = vector.shape_cast %0 : vector<16x1xf32> to vector<16xf32>
421 OpBuilder builder(root);
422 root->walk([&](UnrealizedConversionCastOp op) {
423 // If this op existed before, nothing to do.
424 if (existingCasts.contains(op))
425 return;
426 // number of inputs and outputs must be 1.
427 if (op.getNumOperands() != 1 || op.getNumResults() != 1)
428 return;
429 // Both input and output types must be vector types.
430 auto singleInput = op.getInputs()[0];
431 auto inputTy = dyn_cast<VectorType>(singleInput.getType());
432 auto outputTy = dyn_cast<VectorType>(op.getResult(0).getType());
433 if (!inputTy || !outputTy)
434 return;
435
436 // Check if the defining op of the input is also an
437 // UnrealizedConversionCastOp and it has a single user (which is this
438 // op).
439 auto definingOp = singleInput.getDefiningOp<UnrealizedConversionCastOp>();
440 if (!definingOp || !definingOp->hasOneUse())
441 return;
442 auto inputOfDefiningOp = definingOp.getInputs()[0];
443 // If the input of the defining op and output type are both vector types
444 // have same number of elements, insert a shape cast.
445 auto inputOfDefiningOpTy =
446 dyn_cast<VectorType>(inputOfDefiningOp.getType());
447 if (inputOfDefiningOpTy &&
448 inputOfDefiningOpTy.getNumElements() == outputTy.getNumElements()) {
449 builder.setInsertionPoint(op);
450 auto shapeCast = vector::ShapeCastOp::create(builder, op.getLoc(),
451 outputTy, inputOfDefiningOp);
452 op.replaceAllUsesWith(ValueRange{shapeCast.getResult()});
453 return;
454 }
455 });
456 // At this point, we will have some dead UnrealizedConversionCastOps. Just
457 // erase them.
458 bool changed = true;
459 while (changed) {
460 changed = false;
461 root->walk([&](UnrealizedConversionCastOp op) {
462 // Skip existing casts.
463 if (existingCasts.contains(op))
464 return;
465 if (op.use_empty()) {
466 op.erase();
467 changed = true;
468 }
469 });
470 }
471}
472
474 TypeConverter &typeConverter) {
475 // Any type other than TensorDescType and VectorType are legal as is.
476 typeConverter.addConversion([](Type type) -> std::optional<Type> {
477 if (!isa<TensorDescType, VectorType>(type))
478 return type;
479 return std::nullopt;
480 });
481 // For TensorDescType, drop the layout attribute if any.
482 typeConverter.addConversion([](TensorDescType type) -> Type {
483 if (type.getLayoutAttr()) {
484 return type.dropLayouts();
485 }
486 return type;
487 });
488 // For VectorType, check if there is a distribute layout attribute on the
489 // value. If so, convert to the distributed vector type based on the layout.
490 typeConverter.addConversion([](Value v) -> std::optional<Type> {
491 auto type = v.getType();
492 // If value is not vector type, nothing to do.
493 if (!isa<VectorType>(type))
494 return std::nullopt;
495 auto layout = xegpu::getDistributeLayoutAttr(v);
496 if (!layout || !layout.isForSubgroup())
497 return type;
498 // Vector type is distributed based on lane layout.
499 auto newTyOrFailure =
500 getDistVecTypeBasedOnLaneLayout(layout, cast<VectorType>(type));
501 if (failed(newTyOrFailure))
502 return type;
503 return *newTyOrFailure;
504 });
505}
506
511 // CreateNdDescOp is legal only if its result type has no layout attribute.
512 target.addDynamicallyLegalOp<xegpu::CreateNdDescOp>(
513 [&](xegpu::CreateNdDescOp op) { return !op.getType().getLayoutAttr(); });
514 // Any anchor XeGPU op is legal only if it has no anchor layout.
515 target.addDynamicallyLegalDialect<xegpu::XeGPUDialect>([](Operation *op) {
516 auto anchorOp = dyn_cast<AnchorLayoutInterface>(op);
517 if (!anchorOp)
518 return true;
519 return !anchorOp.getAnchorLayout();
520 });
521 // Arith constants are legal only if they have no temporary layout attribute.
522 target.addDynamicallyLegalOp<arith::ConstantOp>(
523 [=](arith::ConstantOp op) -> bool {
524 // If the result type is not a vector, it's legal.
525 if (!isa<VectorType>(op.getResult().getType()))
526 return true;
527 return !xegpu::getTemporaryLayout(dyn_cast<OpResult>(op.getResult()));
528 });
529 // In math and arith dialects, only handle elementwise ops with a single
530 // result and with a result layout attribute.
531 target.addDynamicallyLegalDialect<math::MathDialect, arith::ArithDialect>(
532 [=](Operation *op) -> std::optional<bool> {
533 // Only handle elementwise mappable ops
535 return true;
536 // Only handle ops with single vector result
537 if (op->getNumResults() != 1)
538 return true;
539
540 VectorType resultType =
541 dyn_cast<VectorType>(op->getResult(0).getType());
542 if (!resultType)
543 return true;
544
545 // Check if all operands are vectors of the same shape
546 for (Value operand : op->getOperands()) {
547 VectorType operandType = dyn_cast<VectorType>(operand.getType());
548 if (!operandType || operandType.getShape() != resultType.getShape()) {
549 return true;
550 }
551 }
552 return !xegpu::getTemporaryLayout(dyn_cast<OpResult>(op->getResult(0)));
553 });
554 target.markUnknownOpDynamicallyLegal([](Operation *op) { return true; });
555 patterns.add<SgToWiCreateNdDesc, SgToWiLoadNd, SgToWiStoreNd, SgToWiDpas,
556 SgToWiElementWise, SgToWiArithConstant, SgToWiPrefetchNd>(
557 typeConverter, patterns.getContext());
558}
return success()
#define DBGS()
Definition Hoisting.cpp:32
b getContext())
Attributes are known-constant values of operations.
Definition Attributes.h:25
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63
This class helps build Operations.
Definition Builders.h:207
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:398
This is a value defined by a result of an operation.
Definition Value.h:457
OpT getOperation()
Return the current operation being transformed.
Definition Pass.h:389
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
Definition Operation.h:512
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Definition Operation.h:407
Location getLoc()
The source location the operation was defined or derived from.
Definition Operation.h:223
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
OperationName getName()
The name of an operation is the key identifier for it.
Definition Operation.h:119
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797
result_range getResults()
Definition Operation.h:415
unsigned getNumResults()
Return the number of results held by this operation.
Definition Operation.h:404
virtual void runOnOperation()=0
The polymorphic API that runs the pass over the currently held operation.
void signalPassFailure()
Signal that some invariant was broken when running.
Definition Pass.h:226
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96
Type getType() const
Return the type of this value.
Definition Value.h:105
A utility result that is used to signal how to proceed with an ongoing walk:
Definition WalkResult.h:29
static WalkResult advance()
Definition WalkResult.h:47
static WalkResult interrupt()
Definition WalkResult.h:46
static DenseArrayAttrImpl get(MLIRContext *context, ArrayRef< int64_t > content)
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
const uArch * getUArch(llvm::StringRef archName)
bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
void populateXeGPUSgToWiDistributeTypeConversions(TypeConverter &typeConverter)
Define only the type conversions needed for XeGPU subgroup to workitem distribution.
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
bool requirePacked(const LayoutAttr layout)
Helper function to check if the layout is packed.
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
void populateXeGPUSgToWiDistributeTypeConversionAndLegality(TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target)
Defines type conversions and legality for XeGPU subgroup to workitem distribution and appends the req...
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
const FrozenRewritePatternSet GreedyRewriteConfig bool * changed
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition Value.h:497
const FrozenRewritePatternSet & patterns
This represents an operation in an abstracted form, suitable for use with the builder APIs.
void addOperands(ValueRange newOperands)
void addAttribute(StringRef name, Attribute attr)
Add an attribute with the specified name.
void addTypes(ArrayRef< Type > newTypes)