28 #define GEN_PASS_DEF_XEGPUWGTOSGDISTRIBUTE
29 #include "mlir/Dialect/XeGPU/Transforms/Passes.h.inc"
38 static xegpu::RangeAttr getRangeSpecAttr(
Operation *op) {
41 if (
auto attr = llvm::dyn_cast_if_present<xegpu::RangeAttr>(
42 parent->
getAttr(
"sg_id_range")))
49 static std::pair<SmallVector<int64_t>,
int>
51 xegpu::DistributeLayoutAttr layout) {
54 if (layout && layout.isForWorkgroup()) {
56 if (!layout.getSgDataAsInt().empty())
57 sgShape = layout.getSgDataAsInt();
59 sgShape = *maybeDerivedSgData;
64 for (
size_t i = 0; i < distUnit.size(); ++i)
65 distUnit[i] =
std::min(shape[i], distUnit[i]);
68 return std::make_pair(sgShape, count);
77 typename = std::enable_if_t<llvm::is_one_of<
78 OpType, xegpu::CreateNdDescOp, xegpu::LoadNdOp, xegpu::StoreNdOp,
79 xegpu::PrefetchNdOp, xegpu::LoadMatrixOp, xegpu::StoreMatrixOp>::value>>
86 if (origOffsets.empty())
90 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
91 if (!layout || !layout.isForWorkgroup())
94 Value sgId = rewriter.
create<gpu::SubgroupIdOp>(loc,
nullptr);
97 xegpu::RangeAttr sgIdRange = getRangeSpecAttr(op);
99 int64_t startOfRange = sgIdRange.getStart().getInt();
100 int64_t endOfRange = sgIdRange.getEnd().getInt();
102 if (layout.getNumSubgroups() != endOfRange - startOfRange)
104 op,
"sg_layout size must match the sg_id_range");
106 if (startOfRange > 0) {
107 Value startOfRangeVal =
108 rewriter.
create<arith::ConstantIndexOp>(loc, startOfRange);
109 sgId = rewriter.
create<index::SubOp>(loc, sgId, startOfRangeVal);
116 auto maybeDescOffsets = layout.getOffsets(rewriter, loc, sgId, wgShape);
117 if (
failed(maybeDescOffsets))
122 for (
const auto &sgOffsets : *maybeDescOffsets) {
125 offsetsList.push_back(std::move(newOffsets));
181 matchAndRewrite(xegpu::CreateNdDescOp op, OneToNOpAdaptor adaptor,
184 if (
failed(genOffsetsList(rewriter, op, offsetsList)))
188 xegpu::TensorDescType tdescTy = op.getType();
190 Type elemTy = tdescTy.getElementType();
191 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
195 layout.dropSgLayoutAndData());
198 for (
auto offsets : offsetsList) {
199 auto newOp = xegpu::CreateNdDescOp::create(
200 rewriter, op.getLoc(), newTdescTy, op.getSource(), offsets,
201 op.getMixedSizes(), op.getMixedStrides());
203 newOps.push_back(newOp);
213 struct WgToSgCreateNdOpNoOffset
218 matchAndRewrite(xegpu::CreateNdDescOp op, OneToNOpAdaptor adaptor,
222 if (!op.getMixedOffsets().empty())
227 xegpu::TensorDescType tdescTy = op.getType();
228 auto layout = dyn_cast<xegpu::LayoutAttr>(tdescTy.getLayout());
229 if (!layout || !layout.isForWorkgroup())
232 Type elemTy = tdescTy.getElementType();
237 std::tie(sgShape, count) = getSgShapeAndCount(wgShape, layout);
238 xegpu::TensorDescType newTdescTy =
240 layout.dropSgLayoutAndData());
243 std::generate(newCreateNdOps.begin(), newCreateNdOps.end(), [&]() {
244 return xegpu::CreateNdDescOp::create(rewriter, loc, newTdescTy,
245 op.getSource(), op.getMixedSizes(),
246 op.getMixedStrides());
258 matchAndRewrite(xegpu::LoadNdOp op, OneToNOpAdaptor adaptor,
260 if (!op.getMixedOffsets().empty())
264 for (
auto src : adaptor.getTensorDesc()) {
265 xegpu::TensorDescType tdescTy =
266 dyn_cast<xegpu::TensorDescType>(src.getType());
268 VectorType newResTy =
VectorType::get(srcShape, tdescTy.getElementType());
269 auto newLoadOp = xegpu::LoadNdOp::create(rewriter, op.getLoc(), newResTy,
270 src, op->getAttrs());
271 newLoadOps.push_back(newLoadOp);
274 return mlir::success();
284 matchAndRewrite(xegpu::StoreNdOp op, OneToNOpAdaptor adaptor,
286 if (!op.getMixedOffsets().empty())
289 for (
auto [v, t] : llvm::zip(adaptor.getValue(), adaptor.getTensorDesc()))
290 xegpu::StoreNdOp::create(rewriter, op.getLoc(), v, t, op.getL1HintAttr(),
291 op.getL2HintAttr(), op.getL3HintAttr());
303 matchAndRewrite(xegpu::LoadNdOp op, OneToNOpAdaptor adaptor,
307 if (
failed(genOffsetsList(rewriter, op, offsetsList)))
311 for (
auto [tdesc, offsets] :
312 llvm::zip(adaptor.getTensorDesc(), offsetsList)) {
313 auto tdescTy = dyn_cast<xegpu::TensorDescType>(tdesc.getType());
314 VectorType newResTy =
316 auto newOp = xegpu::LoadNdOp::create(
317 rewriter, op.getLoc(), newResTy, tdesc, offsets,
318 nullptr,
nullptr, op.getL1HintAttr(),
319 op.getL2HintAttr(), op.getL3HintAttr());
320 newOps.push_back(newOp);
330 struct WgToSgStoreNdOpWithOffset
334 matchAndRewrite(xegpu::StoreNdOp op, OneToNOpAdaptor adaptor,
337 if (
failed(genOffsetsList(rewriter, op, offsetsList)))
340 for (
auto [v, tdesc, offsets] :
341 llvm::zip(adaptor.getValue(), adaptor.getTensorDesc(), offsetsList)) {
342 rewriter.
create<xegpu::StoreNdOp>(op.getLoc(), v, tdesc, offsets,
343 op.getL1HintAttr(), op.getL2HintAttr(),
354 struct WgToSgPrefetchNdOpWithOffset
358 matchAndRewrite(xegpu::PrefetchNdOp op, OneToNOpAdaptor adaptor,
361 if (
failed(genOffsetsList(rewriter, op, offsetsList)))
364 for (
auto [tdesc, offsets] :
365 llvm::zip(adaptor.getTensorDesc(), offsetsList)) {
366 rewriter.
create<xegpu::PrefetchNdOp>(
367 op.getLoc(), tdesc, offsets, op.getL1HintAttr(), op.getL2HintAttr(),
379 struct WgToSgUpdateNdOffsetOp
383 matchAndRewrite(xegpu::UpdateNdOffsetOp op, OneToNOpAdaptor adaptor,
386 for (
auto tDesc : adaptor.getTensorDesc()) {
387 auto newUpdateTileOffsetOp = xegpu::UpdateNdOffsetOp::create(
388 rewriter, op.getLoc(), tDesc.getType(), tDesc, op.getOffsets(),
389 op.getConstOffsets());
390 newUpdateTileOffsetOps.push_back(newUpdateTileOffsetOp);
402 matchAndRewrite(xegpu::DpasOp op, OneToNOpAdaptor adaptor,
405 VectorType resultTy = op.getResult().getType();
406 if (resultTy.getRank() != 2)
415 for (
auto aVec : adaptor.getLhs()) {
416 for (
auto bVec : adaptor.getRhs()) {
421 tmpC = adaptor.getAcc()[i++];
422 operands.push_back(tmpC);
426 llvm::cast<VectorType>(aVec.getType()).getShape();
428 llvm::cast<VectorType>(bVec.getType()).getShape();
430 resultTy.getElementType());
431 tmpC = xegpu::DpasOp::create(rewriter, loc, resTy, operands);
433 originalLayout.dropSgLayoutAndData());
435 newDpasOps.push_back(tmpC);
447 matchAndRewrite(xegpu::PrefetchNdOp op, OneToNOpAdaptor adaptor,
450 int64_t offsetSize =
static_cast<int64_t
>(op.getOffsets().size());
451 if ((offsetSize != 0) || op.getConstOffsetsAttr())
454 for (
auto src : adaptor.getTensorDesc())
455 xegpu::PrefetchNdOp::create(rewriter, op.getLoc(),
TypeRange(), src,
463 struct WgToSgVectorBroadcastOp
468 matchAndRewrite(vector::BroadcastOp op, OneToNOpAdaptor adaptor,
470 VectorType resultType = op.getResult().getType();
473 xegpu::DistributeLayoutAttr layout =
475 if (!layout || !layout.isForWorkgroup())
481 dyn_cast<VectorType>(adaptor.getOperands().front()[0].getType());
482 if (!srcType || srcType.getRank() != resultType.getRank())
486 VectorType newResultType =
491 if (sgLayout.empty())
494 if (!xegpu::XeGPUDialect::isEvenlyDistributable(wgShape, layout))
500 auto srcShape = srcType.getShape();
501 for (
size_t i = 0; i < srcShape.size(); ++i) {
502 if (srcShape[i] != 1 && srcShape[i] != sgShape[i])
507 for (
auto operand : adaptor.getOperands().front()) {
508 auto newBroadcast = vector::BroadcastOp::create(rewriter, op.getLoc(),
509 newResultType, operand);
511 layout.dropSgLayoutAndData());
512 newBroadcastOps.push_back(newBroadcast.getResult());
533 assert(resultType &&
"Expected result to be a VectorType");
537 xegpu::DistributeLayoutAttr layout =
539 if (!layout || !layout.isForWorkgroup())
544 size_t numVariants = operands.empty() ? 0 : operands.front().size();
546 if (llvm::any_of(operands, [&](
const ValueRange &operandVec) {
547 return operandVec.size() != numVariants;
552 VectorType newResultType =
555 for (
size_t i = 0; i < numVariants; ++i) {
557 for (
auto &operandVec : operands)
558 opOperands.push_back(operandVec[i]);
561 state.addOperands(opOperands);
562 state.addTypes(newResultType);
566 if (
auto layout = dyn_cast<xegpu::LayoutAttr>(attr.getValue())) {
567 if (
auto newLayout = layout.dropSgLayoutAndData())
568 state.addAttribute(attr.getName(), newLayout);
570 state.addAttribute(attr.getName(), attr.getValue());
574 newResults.push_back(newOp->
getResult(0));
608 struct WgToSgConvertLayoutOp
612 matchAndRewrite(xegpu::ConvertLayoutOp op, OneToNOpAdaptor adaptor,
615 auto input = dyn_cast<xegpu::LayoutAttr>(op.getInputLayout());
616 auto target = dyn_cast<xegpu::LayoutAttr>(op.getTargetLayout());
618 if (!input || !target || !input.isForWorkgroup() ||
619 !target.isForWorkgroup())
621 op,
"Input and target layouts must have subgroup layout");
632 if (inputSgLayout != targetSgLayout || inputSgData != targetSgData ||
633 inputOrder != targetOrder)
636 input = input.dropSgLayoutAndData();
637 target = target.dropSgLayoutAndData();
640 if (input && target) {
643 auto newOp = xegpu::ConvertLayoutOp::create(
644 rewriter, op.getLoc(), src.getType(), src, input, target);
684 struct UnrealizedConversionCastOpPattern
690 matchAndRewrite(mlir::UnrealizedConversionCastOp op, OneToNOpAdaptor adaptor,
694 auto inputTy = dyn_cast<VectorType>(inputs[0].
getType());
695 auto outputTy = dyn_cast<VectorType>(op->getOpResult(0).getType());
697 if (!inputTy || !outputTy || !llvm::all_equal(op->getResultTypes()) ||
698 !llvm::all_equal(
ValueRange(inputs).getTypes()))
706 if (op.getNumOperands() == 1 &&
707 llvm::equal(
ValueRange(inputs).getTypes(), op->getResultTypes())) {
719 if (op.getNumResults() == 1 &&
725 return mlir::failure();
734 matchAndRewrite(arith::ConstantOp op, OneToNOpAdaptor adaptor,
736 auto vecAttr = dyn_cast<DenseElementsAttr>(op.getValue());
737 auto vecType = dyn_cast<VectorType>(op.getType());
738 if (!vecAttr || !vecAttr.isSplat() || !vecType)
741 xegpu::DistributeLayoutAttr layout =
743 if (!layout || !layout.isForWorkgroup())
749 std::tie(sgShape, count) = getSgShapeAndCount(wgShape, layout);
758 arith::ConstantOp::create(rewriter, op.getLoc(), newType, sgAttr);
759 if (
auto newLayout = layout.dropSgLayoutAndData())
770 struct WgToSgLoadGatherOpWithOffset
774 matchAndRewrite(xegpu::LoadGatherOp op, OneToNOpAdaptor adaptor,
777 if (!op.getOffsets())
781 VectorType resultType = dyn_cast<VectorType>(op.getResult().getType());
786 xegpu::DistributeLayoutAttr layout =
788 if (!layout || !layout.isForWorkgroup())
794 auto offsetsVecType =
795 dyn_cast<VectorType>(adaptor.getOffsets().front().getType());
797 dyn_cast<VectorType>(adaptor.getMask().front().getType());
798 if (!offsetsVecType || !maskVecType ||
799 offsetsVecType.getShape() != maskVecType.getShape()) {
801 "offsets have not been distributed");
807 VectorType newTy =
VectorType::get(sgShape, resultType.getElementType());
808 for (
auto [offsets, mask] :
809 llvm::zip(adaptor.getOffsets(), adaptor.getMask())) {
810 auto newLoadOp = rewriter.
create<xegpu::LoadGatherOp>(
811 loc, newTy, op.getSource(), offsets, mask, chunkSizeAttr,
812 op.getL1HintAttr(), op.getL2HintAttr(), op.getL3HintAttr());
814 layout.dropSgLayoutAndData());
815 newLoadOps.push_back(newLoadOp);
824 struct WgToSgStoreScatterOpWithOffset
828 matchAndRewrite(xegpu::StoreScatterOp op, OneToNOpAdaptor adaptor,
831 if (!op.getOffsets())
835 VectorType valueType = dyn_cast<VectorType>(op.getValue().getType());
839 xegpu::DistributeLayoutAttr layout =
841 if (!layout || !layout.isForWorkgroup())
845 auto offsetsVecType =
846 dyn_cast<VectorType>(adaptor.getOffsets().front().getType());
848 dyn_cast<VectorType>(adaptor.getMask().front().getType());
849 if (!offsetsVecType || !maskVecType ||
850 offsetsVecType.getShape() != maskVecType.getShape()) {
852 "offsets have not been distributed");
855 auto chunkSizeOpt = op.getChunkSize();
856 int64_t chunkSize = chunkSizeOpt ?
static_cast<int64_t
>(*chunkSizeOpt) : 1;
858 for (
auto [val, offs, mask] : llvm::zip(
859 adaptor.getValue(), adaptor.getOffsets(), adaptor.getMask())) {
860 rewriter.
create<xegpu::StoreScatterOp>(
861 loc, val, op.getDest(), offs, mask, chunkSizeAttr, op.getL1HintAttr(),
862 op.getL2HintAttr(), op.getL3HintAttr());
864 if (
auto newLayout = layout.dropSgLayoutAndData())
865 op->
setAttr(
"layout", newLayout);
875 matchAndRewrite(xegpu::LoadMatrixOp op, OneToNOpAdaptor adaptor,
879 if (
failed(genOffsetsList(rewriter, op, offsetsList)))
883 VectorType valueTy = op.getRes().getType();
884 Type elemTy = valueTy.getElementType();
886 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
890 for (
auto offsets : offsetsList) {
891 auto newOp = rewriter.
create<xegpu::LoadMatrixOp>(
892 op.getLoc(), newResTy, op.getMemDesc(), offsets,
893 layout.dropSgLayoutAndData());
894 newOps.push_back(newOp);
905 matchAndRewrite(xegpu::StoreMatrixOp op, OneToNOpAdaptor adaptor,
909 if (
failed(genOffsetsList(rewriter, op, offsetsList)))
912 xegpu::DistributeLayoutAttr layout = op.getLayoutAttr();
913 for (
auto [v, offsets] : llvm::zip(adaptor.getData(), offsetsList))
914 rewriter.
create<xegpu::StoreMatrixOp>(op.getLoc(), v, op.getMemDesc(),
916 layout.dropSgLayoutAndData());
928 .add<WgToSgCreateNdOp, WgToSgCreateNdOpNoOffset, WgToSgLoadNdOp,
929 WgToSgLoadNdOpWithOffset, WgToSgStoreNdOp, WgToSgStoreNdOpWithOffset,
930 WgToSgUpdateNdOffsetOp, WgToSgDpasOp, WgToSgPrefetchNdOp,
931 WgToSgPrefetchNdOpWithOffset, UnrealizedConversionCastOpPattern,
932 WgToSgElementwiseOp, WgToSgVectorBroadcastOp, WgToSgConvertLayoutOp,
933 WgToSgArithConstantOp, WgToSgLoadGatherOpWithOffset,
934 WgToSgStoreScatterOpWithOffset, WgToSgLoadMatrixOp,
935 WgToSgStoreMatrixOp>(
patterns.getContext());
941 struct XeGPUWgToSgDistributePass
942 :
public xegpu::impl::XeGPUWgToSgDistributeBase<XeGPUWgToSgDistributePass> {
943 void runOnOperation()
override;
947 void XeGPUWgToSgDistributePass::runOnOperation() {
950 getOperation()->walk([&](UnrealizedConversionCastOp castOp) {
951 existingCastOps.push_back(castOp.getOperation());
964 [&](RankedTensorType type,
966 Type elemTy = type.getElementType();
971 std::tie(subShape, count) = getSgShapeAndCount(
973 dyn_cast_if_present<xegpu::LayoutAttr>(type.getEncoding()));
976 result.append(count, newTy);
992 [&](xegpu::TensorDescType type,
994 Type elemTy = type.getElementType();
999 xegpu::LayoutAttr layout = type.getLayoutAttr();
1000 std::tie(subShape, count) = getSgShapeAndCount(shape, layout);
1003 layout = layout.dropSgLayoutAndData();
1006 type.getContext(), subShape, elemTy, type.getEncoding(), layout);
1007 result.append(count, newTy);
1011 auto getTensorDescType = [](
Operation *op) -> xegpu::TensorDescType {
1012 if (
auto createOp = dyn_cast<xegpu::CreateNdDescOp>(op))
1013 return createOp.getType();
1014 if (
auto loadOp = dyn_cast<xegpu::LoadNdOp>(op))
1015 return loadOp.getTensorDescType();
1016 if (
auto storeOp = dyn_cast<xegpu::StoreNdOp>(op))
1017 return storeOp.getTensorDescType();
1018 if (
auto updateOp = dyn_cast<xegpu::UpdateNdOffsetOp>(op))
1020 if (
auto prefetchOp = dyn_cast<xegpu::PrefetchNdOp>(op))
1021 return prefetchOp.getTensorDescType();
1022 return xegpu::TensorDescType();
1025 auto isLegal = [&](xegpu::DistributeLayoutAttr layout) ->
bool {
1026 return !layout || !layout.isForWorkgroup();
1029 target.addDynamicallyLegalOp<xegpu::CreateNdDescOp, xegpu::LoadNdOp,
1030 xegpu::StoreNdOp, xegpu::UpdateNdOffsetOp,
1031 xegpu::PrefetchNdOp>([=](
Operation *op) ->
bool {
1032 auto tdescTy = getTensorDescType(op);
1033 auto layout = dyn_cast_if_present<xegpu::LayoutAttr>(tdescTy.getLayout());
1034 return isLegal(layout);
1037 target.addDynamicallyLegalOp<xegpu::DpasOp>([=](xegpu::DpasOp op) ->
bool {
1039 return isLegal(layout);
1042 target.addDynamicallyLegalOp<xegpu::LoadMatrixOp>(
1043 [=](xegpu::LoadMatrixOp op) ->
bool {
1044 return isLegal(op.getLayoutAttr());
1047 target.addDynamicallyLegalOp<xegpu::StoreMatrixOp>(
1048 [=](xegpu::StoreMatrixOp op) ->
bool {
1049 return isLegal(op.getLayoutAttr());
1052 target.addDynamicallyLegalOp<arith::ConstantOp>(
1053 [=](arith::ConstantOp op) ->
bool {
1054 auto vecType = dyn_cast<VectorType>(op.getType());
1060 target.addDynamicallyLegalOp<xegpu::LoadGatherOp>(
1061 [=](xegpu::LoadGatherOp op) ->
bool {
1063 return isLegal(layout);
1066 target.addDynamicallyLegalOp<xegpu::StoreScatterOp>(
1067 [=](xegpu::StoreScatterOp op) ->
bool {
1069 auto layout = op->getAttrOfType<xegpu::LayoutAttr>(
"layout");
1072 return isLegal(layout);
1075 target.addDynamicallyLegalOp<vector::BroadcastOp>(
1076 [=](vector::BroadcastOp op) ->
bool {
1080 target.addDynamicallyLegalOp<xegpu::ConvertLayoutOp>(
1081 [=](xegpu::ConvertLayoutOp op) ->
bool {
1082 return isLegal(op.getInputLayout()) && isLegal(op.getTargetLayout());
1085 target.addDynamicallyLegalDialect<math::MathDialect, arith::ArithDialect>(
1086 [=](
Operation *op) -> std::optional<bool> {
1091 VectorType resultType =
1092 dyn_cast<VectorType>(op->getResult(0).getType());
1098 for (
Value operand : op->getOperands()) {
1099 VectorType operandType = dyn_cast<VectorType>(operand.getType());
1100 if (!operandType || operandType.getShape() != resultType.getShape()) {
1105 xegpu::DistributeLayoutAttr layout =
1107 return isLegal(layout);
1110 target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
1111 [=](UnrealizedConversionCastOp op) {
1112 return llvm::is_contained(existingCastOps, op.getOperation());
1115 target.markUnknownOpDynamicallyLegal([](
Operation *) {
return true; });
1122 return signalPassFailure();
1129 getOperation()->walk([](
Operation *op) {
1131 std::string name = xegpu::getLayoutName(result);
1132 if (auto layout = op->getAttrOfType<xegpu::LayoutAttr>(name)) {
1133 op->removeAttr(name);
1134 if (!isa<scf::IfOp, scf::ForOp, scf::WhileOp, scf::ConditionOp>(op)) {
1135 if (auto newLayout = layout.dropSgLayoutAndData())
1136 op->setAttr(name, newLayout);
static MLIRContext * getContext(OpFoldResult val)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
static LogicalResult updateOp(mlir::OpBuilder &builder, mlir::Operation *op, GetLayoutFnTy getLayoutOfValue)
Update an operation with the layout of its results.
Attributes are known-constant values of operations.
IntegerAttr getI64IntegerAttr(int64_t value)
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
void replaceOpWithMultiple(Operation *op, SmallVector< SmallVector< Value >> &&newValues)
Replace the given operation with the new value ranges.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
Base class for the conversion patterns.
This class describes a specific conversion target.
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
OpConversionPattern is a wrapper around ConversionPattern that allows for matching and rewriting agai...
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
Attribute getAttr(StringAttr name)
Return the specified attribute if present, null otherwise.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
OperationName getName()
The name of an operation is the key identifier for it.
result_range getOpResults()
unsigned getNumResults()
Return the number of results held by this operation.
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
void addConversion(FnT &&callback)
Register a conversion function.
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
bool hasElementwiseMappableTraits(Operation *op)
Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
void setDistributeLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout)
Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching it to the owner's dictio...
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
void populateXeGPUWgToSgDistributePatterns(RewritePatternSet &patterns)
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
Include the generated interface declarations.
SmallVector< int64_t > computeElementwiseMul(ArrayRef< int64_t > v1, ArrayRef< int64_t > v2)
Return a vector containing llvm::zip_equal(v1, v2) multiplied elementwise.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
int64_t computeProduct(ArrayRef< int64_t > basis)
Self-explicit.
const FrozenRewritePatternSet & patterns
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.
This represents an operation in an abstracted form, suitable for use with the builder APIs.