28#include "llvm/Support/Casting.h"
29#include "llvm/Support/FormatVariadic.h"
38 for (
const auto &vals : values)
39 llvm::append_range(
result, vals);
45 auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
48 if (!layout || !layout.isForSubgroup())
53 auto tdescShape = tdescTy.getShape();
54 auto elementType = tdescTy.getElementType();
59 int64_t sgSize = llvm::product_of(laneLayout);
63 for (
auto [tdescDim, laneDim, laneDataDim] :
64 llvm::zip_equal(tdescShape, laneLayout, laneData)) {
65 assert((tdescDim % (laneDim * laneDataDim) == 0) &&
66 "tensor descriptor shape is not distributable");
67 tensorSize *= tdescDim;
70 tensorSize *= tdescTy.getArrayLength();
72 return VectorType::get({tensorSize / sgSize}, elementType);
77 xegpu::LayoutAttr layout) {
78 int64_t rank = originalType.getRank();
85 while (
shape.size() > 2) {
86 arrayLength *=
shape[0];
91 auto laneLayout = layout.getEffectiveLaneLayoutAsInt();
92 auto laneData = layout.getEffectiveLaneDataAsInt();
93 while (!laneLayout.empty() && laneLayout.size() >
shape.size()) {
94 laneLayout.erase(laneLayout.begin());
95 laneData.erase(laneData.begin());
97 auto trimmedLayout = xegpu::LayoutAttr::get(
101 auto helperTdescTy = xegpu::TensorDescType::get(
102 shape, originalType.getElementType(), arrayLength,
104 xegpu::MemorySpace::Global, trimmedLayout);
110 VectorType originalType) {
113 assert((isa<xegpu::LayoutAttr>(layout) || isa<xegpu::SliceAttr>(layout)) &&
114 "Expecting a valid layout.");
116 int64_t vectorRank = originalType.getRank();
117 int64_t layoutRank = layout.getRank();
118 assert(vectorRank >= layoutRank &&
"Vector rank must be >= layout rank.");
122 int64_t offset = vectorRank - layoutRank;
126 auto distributedShapeOrFailure =
127 layout.computeDistributedShape(trailingShape);
128 if (
failed(distributedShapeOrFailure))
132 fullShape.begin() + offset);
133 resultShape.append(distributedShapeOrFailure->begin(),
134 distributedShapeOrFailure->end());
135 return VectorType::get(resultShape, originalType.getElementType());
139 const StringRef prefix(
"layout_operand_");
140 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
141 return llvm::formatv(
"{0}{1}", prefix, idx).str();
145 const StringRef prefix =
"layout_result_";
146 return llvm::formatv(
"{0}{1}", prefix,
result.getResultNumber()).str();
153 if (
auto result = dyn_cast<OpResult>(value)) {
155 assert(defOp &&
"result must have a defining op");
157 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(defOp)) {
158 auto layout = anchorOp.getAnchorLayout();
163 if (defOp->
hasAttr(layoutName)) {
165 defOp->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
170 if (
auto arg = dyn_cast<BlockArgument>(value)) {
171 auto *parentOp = arg.getOwner()->getParentOp();
172 if (
auto loop = dyn_cast_if_present<LoopLikeOpInterface>(parentOp)) {
173 OpOperand *tiedInit = loop.getTiedLoopInit(arg);
180 dyn_cast_if_present<xegpu::TensorDescType>(value.
getType()))
181 return tdescTy.getLayoutAttr();
185xegpu::DistributeLayoutAttr
188 unsigned idx =
const_cast<OpOperand &
>(opr).getOperandNumber();
190 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(op)) {
191 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(op)) {
193 return dpasOp.getLayoutAAttr();
194 }
else if (idx == 1) {
195 return dpasOp.getLayoutBAttr();
196 }
else if (idx == 2) {
197 return dpasOp.getLayoutCdAttr();
200 if (
auto dpasMxOp = dyn_cast<xegpu::DpasMxOp>(op)) {
203 unsigned currentIdx = 0;
205 if (idx == currentIdx++)
206 return dpasMxOp.getLayoutAAttr();
208 if (idx == currentIdx++)
209 return dpasMxOp.getLayoutBAttr();
211 if (dpasMxOp.getAcc())
212 if (idx == currentIdx++)
213 return dpasMxOp.getLayoutCdAttr();
215 if (dpasMxOp.getScaleA())
216 if (idx == currentIdx++)
217 return dpasMxOp.getLayoutAScaleAttr();
219 if (dpasMxOp.getScaleB())
220 if (idx == currentIdx++)
221 return dpasMxOp.getLayoutBScaleAttr();
225 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(op)) {
226 return convertOp.getInputLayoutAttr();
228 auto layout = anchorOp.getAnchorLayout();
235 if (isa<xegpu::StoreNdOp, xegpu::StoreMatrixOp>(op) && (idx < 2))
238 if (isa<xegpu::StoreScatterOp>(op)) {
239 xegpu::StoreScatterOp store(op);
240 int chunkSize = store.getChunkSize().value_or(1);
241 if (layout && idx >= 2 && chunkSize > 1)
242 return layout.dropDims(llvm::to_vector(
243 llvm::seq<int64_t>(layout.getRank() - 1, layout.getRank())));
246 if (isa<xegpu::LoadGatherOp>(op)) {
247 xegpu::LoadGatherOp
load(op);
248 int chunkSize =
load.getChunkSize().value_or(1);
249 if (layout && idx >= 1 && chunkSize > 1)
250 return layout.dropDims(llvm::to_vector(
251 llvm::seq<int64_t>(layout.getRank() - 1, layout.getRank())));
258 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
267xegpu::DistributeLayoutAttr
270 const std::string &name) {
271 xegpu::DistributeLayoutAttr candidate = layout;
273 if (
auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {
274 if (
auto perm = loadOp.getLayoutAttr())
283xegpu::DistributeLayoutAttr
286 const std::string &name) {
287 xegpu::DistributeLayoutAttr candidate = layout;
288 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
290 if (
auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {
292 if (
auto perm = storeOp.getLayoutAttr())
304 const mlir::xegpu::DistributeLayoutAttr layout) {
307 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
308 if (anchorOp.getAnchorLayout() == layout)
310 anchorOp.setAnchorLayout(layout);
326 const DistributeLayoutAttr layout) {
328 unsigned idx =
const_cast<OpOperand &
>(operand).getOperandNumber();
333 if (
auto anchorOp = dyn_cast<xegpu::AnchorLayoutInterface>(owner)) {
334 if (
auto dpasOp = dyn_cast<xegpu::DpasOp>(owner)) {
336 return dpasOp.setLayoutAAttr(layout);
337 }
else if (idx == 1) {
338 return dpasOp.setLayoutBAttr(layout);
339 }
else if (idx == 2) {
340 return dpasOp.setLayoutCdAttr(layout);
343 if (
auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(owner)) {
344 return convertOp.setInputLayoutAttr(layout);
350 if (isa<xegpu::StoreScatterOp, xegpu::StoreNdOp, xegpu::StoreMatrixOp>(
353 anchorOp.setAnchorLayout(layout);
357 anchorOp.setAnchorLayout(layout);
371template <
typename T,
typename>
372xegpu::DistributeLayoutAttr
374 Operation *op = operandOrResult.getOwner();
378 auto layout = op->
getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
385template xegpu::DistributeLayoutAttr
387template xegpu::DistributeLayoutAttr
390template <
typename T,
typename>
392 const xegpu::DistributeLayoutAttr layout) {
393 Operation *owner = operandOrResult.getOwner();
395 if (owner->
hasAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
405 const mlir::xegpu::DistributeLayoutAttr layout);
409 const mlir::xegpu::DistributeLayoutAttr layout);
414 auto vecTy = dyn_cast<VectorType>(value.
getType());
422 int64_t srcShapeRank = srcShape.size();
426 int64_t rankDiff = srcShapeRank - targetShapeRank;
427 std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
429 llvm::copy(
shape, adjustedTargetShape.begin() + rankDiff);
435 Value slice = vector::ExtractStridedSliceOp::create(
436 builder, loc, value, offsets, adjustedTargetShape, staticStrides);
439 if (srcShapeRank > targetShapeRank) {
440 auto targetTy = VectorType::get(
shape, vecTy.getElementType());
441 slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
452 VectorType inputTy = dyn_cast<VectorType>(values[0].
getType());
453 assert(llvm::all_of(values.
getTypes(),
454 [&](
Type type) { return type == inputTy; }) &&
455 "values must be of the same VectorType");
457 Type elemTy = inputTy.getElementType();
460 VectorType resultTy = VectorType::get(
shape, elemTy);
465 for (
auto [src, offsets] :
468 result = vector::InsertStridedSliceOp::create(builder, loc, src,
result,
469 offsets, staticStrides);
480 auto targetAttrs = gpuModuleOp.getTargets();
482 for (
auto &attr : *targetAttrs) {
483 auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
485 return xevmAttr.getChip().str();
497 assert(
lhs.size() ==
rhs.size() &&
"lhs and rhs must have the same size");
499 for (
auto [l, r] : llvm::zip_equal(
lhs,
rhs)) {
502 results.push_back(builder.
createOrFold<arith::AddIOp>(loc, lval, rval));
525 a = a.slice(a.size() -
b.size());
533 static_assert(std::is_integral<T>::value,
"T must be an integer type");
536 if (!candidateMultiples.empty())
538 SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());
539 for (T candidate : candidates) {
540 for (T multiple : multiples) {
541 int value =
static_cast<int>(candidate * multiple);
542 if (value != 0 && dim % value == 0 && value > largest)
550 vector::CombiningKind kind, uint32_t size) {
552 Value laneVal = vector::ReductionOp::create(builder, loc, kind, input);
554 for (uint64_t i = 1; i < size; i <<= 1) {
556 gpu::ShuffleOp::create(builder, loc, laneVal, i, size,
557 gpu::ShuffleMode::XOR)
559 laneVal = makeArithReduction(builder, loc, kind, laneVal, shuffled);
566 vector::CombiningKind kind,
569 VectorType sourceType = src.
getType();
570 int64_t sourceRank = sourceType.getRank();
573 assert(sourceRank >= 2 &&
"expected at least a 2D source vector");
574 for (
int64_t i = 0; i < sourceRank - 2; ++i)
575 assert(sourceType.getShape()[i] == 1 &&
576 "expected leading dimensions to be unit");
577 int64_t rowIdx = sourceRank - 2;
578 int64_t columnIdx = sourceRank - 1;
579 int64_t sourceH = sourceType.getShape()[rowIdx];
580 int64_t sourceW = sourceType.getShape()[columnIdx];
581 int nSlices = (reductionDim == rowIdx) ? sourceW : sourceH;
583 TypedAttr zeroAttr = rewriter.
getZeroAttr(sourceType.getElementType());
584 Value reductionResult = arith::ConstantOp::create(
585 rewriter, loc,
acc.getType(),
594 for (
int i = 0; i < nSlices; ++i) {
600 if (reductionDim == columnIdx) {
601 sliceOffsets[rowIdx] = i;
602 sliceSizes[columnIdx] = sourceW;
604 sliceOffsets[columnIdx] = i;
605 sliceSizes[rowIdx] = sourceH;
608 vector::ExtractStridedSliceOp extractOp =
609 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
610 sliceSizes, strides);
614 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
616 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
618 VectorType::get({nSliceElements}, sourceType.getElementType()),
619 extractOp.getResult());
629 accIdx[accRank - 1] = i;
630 Value accExtract = vector::ExtractOp::create(rewriter, loc,
acc, accIdx);
631 Value reduction = vector::ReductionOp::create(
632 rewriter, loc, kind, slice.getResult(), accExtract);
633 reductionResult = vector::InsertOp::create(rewriter, loc, reduction,
634 reductionResult, accIdx);
638 return reductionResult;
643 vector::CombiningKind kind,
int64_t reductionDim,
int64_t reductionSize,
645 VectorType sourceType = src.
getType();
646 int64_t sourceRank = sourceType.getRank();
649 assert(sourceRank >= 2 &&
"expected at least a 2D source vector");
650 for (
int64_t i = 0; i < sourceRank - 2; ++i)
651 assert(sourceType.getShape()[i] == 1 &&
652 "expected leading dimensions to be unit");
653 int64_t rowIdx = sourceRank - 2;
654 int64_t columnIdx = sourceRank - 1;
655 int64_t sourceH = sourceType.getShape()[rowIdx];
656 int64_t sourceW = sourceType.getShape()[columnIdx];
659 TypedAttr zeroAttr = rewriter.
getZeroAttr(sourceType.getElementType());
660 Value reductionResult = arith::ConstantOp::create(
661 rewriter, loc,
acc.getType(),
668 int nSlices = (reductionDim == rowIdx) ? sourceW : sourceH;
673 for (
int i = 0; i < nSlices; ++i) {
679 if (reductionDim == columnIdx) {
680 sliceOffsets[rowIdx] = i;
681 sliceSizes[columnIdx] = sourceW;
683 sliceOffsets[columnIdx] = i;
684 sliceSizes[rowIdx] = sourceH;
687 vector::ExtractStridedSliceOp extractOp =
688 vector::ExtractStridedSliceOp::create(rewriter, loc, src, sliceOffsets,
689 sliceSizes, strides);
690 int64_t nSliceElements = extractOp.getResult().getType().getNumElements();
691 vector::ShapeCastOp slice = vector::ShapeCastOp::create(
693 VectorType::get({nSliceElements}, sourceType.getElementType()),
694 extractOp.getResult());
697 accIdx[accRank - 1] = i;
698 Value accExtract = vector::ExtractOp::create(rewriter, loc,
acc, accIdx);
703 reductionResult = vector::InsertOp::create(rewriter, loc, fullReduce,
704 reductionResult, accIdx);
706 return reductionResult;
711 vector::CombiningKind kind) {
712 auto vecTy = dyn_cast<VectorType>(type);
713 Type elemTy = vecTy ? vecTy.getElementType() : type;
718 return arith::ConstantOp::create(
720 return arith::ConstantOp::create(builder, loc, cast<TypedAttr>(scalarAttr));
724 case vector::CombiningKind::ADD:
725 case vector::CombiningKind::XOR:
726 case vector::CombiningKind::OR:
727 case vector::CombiningKind::MAXUI:
730 case vector::CombiningKind::MUL:
731 case vector::CombiningKind::AND:
734 case vector::CombiningKind::MINSI:
735 if (
auto intTy = dyn_cast<IntegerType>(elemTy))
737 elemTy, APInt::getSignedMaxValue(intTy.getWidth())));
740 case vector::CombiningKind::MINUI:
741 if (
auto intTy = dyn_cast<IntegerType>(elemTy))
743 builder.
getIntegerAttr(elemTy, APInt::getMaxValue(intTy.getWidth())));
746 case vector::CombiningKind::MAXSI:
747 if (
auto intTy = dyn_cast<IntegerType>(elemTy))
749 elemTy, APInt::getSignedMinValue(intTy.getWidth())));
752 case vector::CombiningKind::MINNUMF:
753 case vector::CombiningKind::MINIMUMF:
754 if (
auto floatTy = dyn_cast<FloatType>(elemTy))
756 elemTy, APFloat::getInf(floatTy.getFloatSemantics())));
759 case vector::CombiningKind::MAXNUMF:
760 case vector::CombiningKind::MAXIMUMF:
761 if (
auto floatTy = dyn_cast<FloatType>(elemTy))
763 elemTy, APFloat::getInf(floatTy.getFloatSemantics(),
true)));
779 auto laneData = layout.getEffectiveLaneDataAsInt();
780 if (laneData.size() != 2)
782 return laneData[0] != 1;
795 auto laneLayout = layout.getEffectiveLaneLayoutAsInt();
796 if (laneLayout.size() != 2)
811 for (
size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx)
812 if (srcIdx < src.size() && src[srcIdx] == dst[dstIdx])
814 else if (dst[dstIdx] == 1)
815 expandedUnitDims.push_back(dstIdx);
818 return srcIdx == src.size();
835 splitDimGroups.clear();
836 for (
size_t dstIdx = 0; dstIdx < dst.size(); ++dstIdx) {
837 if (srcIdx >= src.size())
839 accumulatedSize *= dst[dstIdx];
840 currentDstDims.push_back(dstIdx);
842 if (accumulatedSize == src[srcIdx]) {
845 if (srcIdx == src.size() - 1) {
846 while (++dstIdx < dst.size() && dst[dstIdx] == 1)
847 currentDstDims.push_back(dstIdx);
850 splitDimGroups.push_back(currentDstDims);
854 currentDstDims.clear();
855 }
else if (accumulatedSize > src[srcIdx]) {
859 return srcIdx == src.size();
887 auto vecTy = dyn_cast<VectorType>(layoutSrc.
getType());
893 auto [subShape, count] = getSubShapeAndCount(vecTy, layout);
896 auto newTy = VectorType::get(subShape, vecTy.getElementType());
897 for (
Value dest : dests)
901 if (
auto whileOp = dyn_cast<scf::WhileOp>(op)) {
905 cast<scf::YieldOp>(whileOp.getAfterBody()->getTerminator());
906 for (
auto [init, beforeArg, yieldVal] :
907 llvm::zip(whileOp.getInits(), whileOp.getBeforeArguments(),
908 yieldOp.getOperands()))
909 recordTypes(init, {beforeArg, yieldVal});
912 scf::ConditionOp condOp = whileOp.getConditionOp();
913 for (
auto [condArg, afterArg, res] :
914 llvm::zip(condOp.getArgs(), whileOp.getAfterArguments(),
915 whileOp.getResults()))
916 recordTypes(condArg, {afterArg, res});
919 if (
auto forOp = dyn_cast<scf::ForOp>(op)) {
922 auto yieldOp = cast<scf::YieldOp>(forOp.getBody()->getTerminator());
923 for (
auto [init, arg, res, yieldVal] :
924 llvm::zip(forOp.getInitArgs(), forOp.getRegionIterArgs(),
925 forOp.getResults(), yieldOp.getOperands()))
926 recordTypes(init, {arg, res, yieldVal});
940 auto loopArgTypeMap = std::make_shared<DenseMap<Value, SmallVector<Type>>>(
941 std::move(loopArgTypes));
942 converter.addConversion(
943 [loopArgTypeMap, getSubShapeAndCount](
946 if (!isa<VectorType>(v.
getType()))
952 auto it = loopArgTypeMap->find(v);
953 if (it != loopArgTypeMap->end()) {
954 result.append(it->second.begin(), it->second.end());
963 auto vecType = cast<VectorType>(v.
getType());
964 auto [subShape, count] = getSubShapeAndCount(vecType, layout);
968 auto newTy = VectorType::get(subShape, vecType.getElementType());
969 result.append(count, newTy);
976 const llvm::SmallSetVector<UnrealizedConversionCastOp, 8> &existingCasts) {
995 auto hasIdenticalVectorTypes = [](
ValueRange values) {
996 auto types = values.getTypes();
997 return !types.empty() && llvm::all_of(types, [&](
Type type) {
998 return isa<VectorType>(type) && type == types.front();
1002 root->
walk([&](UnrealizedConversionCastOp op) {
1003 if (existingCasts.contains(op))
1006 if (op.getNumResults() == 1 && op.getNumOperands() >= 1) {
1008 op.getInputs()[0].getDefiningOp<UnrealizedConversionCastOp>();
1009 if (defOp && !existingCasts.contains(defOp) &&
1010 defOp.getNumOperands() == 1 &&
1011 defOp.getNumResults() == op.getNumOperands() &&
1012 llvm::all_of(op.getInputs(),
1013 [&](
Value v) { return v.getDefiningOp() == defOp; })) {
1014 Value orig = defOp.getInputs()[0];
1015 auto origTy = dyn_cast<VectorType>(orig.
getType());
1016 auto resTy = dyn_cast<VectorType>(op.getResult(0).getType());
1017 if (origTy && resTy &&
1018 origTy.getNumElements() == resTy.getNumElements() &&
1022 vector::ShapeCastOp::create(builder, op.getLoc(), resTy, orig);
1023 op.replaceAllUsesWith(
ValueRange{shapeCast.getResult()});
1031 auto outputTy = dyn_cast<VectorType>(op.getResult(0).getType());
1032 if (op.getNumOperands() > 1 && outputTy &&
1033 hasIdenticalVectorTypes(op.getInputs())) {
1036 builder, op.getLoc(), op.getInputs(), outputTy.getShape());
1042 if (op.getNumOperands() == 1 && op.getNumResults() > 1) {
1044 op.getInputs()[0].getDefiningOp<UnrealizedConversionCastOp>();
1045 if (defOp && !existingCasts.contains(defOp) &&
1046 defOp.getNumResults() == 1 &&
1047 defOp.getNumOperands() == op.getNumResults() &&
1049 op->getResultTypes())) {
1050 op.replaceAllUsesWith(defOp.getInputs());
1055 auto tileTy = dyn_cast<VectorType>(op.getResult(0).getType());
1056 if (tileTy && hasIdenticalVectorTypes(op.getResults())) {
1059 builder, op.getLoc(), op.getInputs()[0], tileTy.getShape());
1060 op->replaceAllUsesWith(results);
1067 bool changed =
true;
1070 root->
walk([&](UnrealizedConversionCastOp op) {
1071 if (existingCasts.contains(op))
1073 if (op.use_empty()) {
1095 collapseDims.clear();
1096 collapseDims.resize(dst.size());
1100 int64_t srcProd = std::accumulate(src.begin(), src.end(),
int64_t{1},
1101 std::multiplies<int64_t>());
1102 int64_t dstProd = std::accumulate(dst.begin(), dst.end(),
int64_t{1},
1103 std::multiplies<int64_t>());
1104 if (srcProd != dstProd)
1113 srcCompact.push_back(s);
1116 dstCompact.push_back(d);
1119 for (
int64_t need : dstCompact) {
1121 while (s < srcCompact.size() &&
acc < need)
1122 acc *= srcCompact[s++];
1126 if (s != srcCompact.size())
1137 while (dstIdx < dst.size() && dst[dstIdx] == 1)
1142 for (
size_t srcIdx = 0; srcIdx < src.size(); ++srcIdx) {
1143 if (dstIdx >= dst.size()) {
1146 if (lastNonEmpty >= 0)
1147 collapseDims[lastNonEmpty].push_back(srcIdx);
1151 collapseDims[dstIdx].push_back(srcIdx);
1152 lastNonEmpty = dstIdx;
1153 if (
acc == dst[dstIdx]) {
1156 while (dstIdx < dst.size() && dst[dstIdx] == 1)
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
Attributes are known-constant values of operations.
IntegerAttr getIntegerAttr(Type type, int64_t value)
FloatAttr getFloatAttr(Type type, double value)
TypedAttr getZeroAttr(Type type)
TypedAttr getOneAttr(Type type)
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
This class helps build Operations.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
AttrClass getAttrOfType(StringAttr name)
bool hasAttrOfType(NameT &&name)
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
This class provides an abstraction over the different types of ranges over Values.
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
Operation * getOwner() const
Return the owner of this operand.
Value makeArithReduction(OpBuilder &b, Location loc, CombiningKind kind, Value v1, Value acc, arith::FastMathFlagsAttr fastmath=nullptr, Value mask=nullptr)
Returns the result value of reducing two scalar/vector values with the corresponding arith operation.
bool matchDimCollapse(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &collapseDims)
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
bool requirePacked(const DistributeLayoutAttr layout)
Helper function to check if the layout is packed.
void setTemporaryLayout(const T &operandOrResult, const DistributeLayoutAttr layout)
Value createReductionNeutralValue(OpBuilder &builder, Location loc, Type type, vector::CombiningKind kind)
Creates a constant filled with the neutral (identity) value for the given reduction kind.
void setDistributeLayoutAttr(const OpResult &Result, const DistributeLayoutAttr layout)
[to-be-deprecated] Sets the DistributeLayoutAttr for a given OpResult user should use setAnchorLayout...
Value subgroupReduction(Location loc, OpBuilder &builder, Value input, vector::CombiningKind kind, uint32_t size)
Given an input value representing per-lane data, this function returns the result after performing a ...
bool matchUnitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< int64_t > &expandedUnitDims)
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
FailureOr< VectorType > getDistVecTypeBasedOnLaneLayout(DistributeLayoutAttr layout, VectorType originalType)
Helper function to get distributed vector type for a source vector type according to the lane_layout.
Value lowerToVectorReductions(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, Location loc, PatternRewriter &rewriter)
Given a src and an acc argumments from a vector::MultiDimReductionOp, lower to a set of vector::Reduc...
bool requireTranspose(const DistributeLayoutAttr layout, const uArch::uArch *uArch)
Helper function to check if the layout requires a transpose effect.
bool matchSplitDimExpansion(ArrayRef< int64_t > src, ArrayRef< int64_t > dst, SmallVector< SmallVector< int64_t > > &splitDimGroups)
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
DenseMap< Value, SmallVector< Type > > precomputeLoopBlockArgTypes(Operation *topLevelOp, SubShapeAndCountFn getSubShapeAndCount)
Pre-computes distributed VectorType mappings for every value carried through an SCF loop under topLev...
std::string getTemporaryLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
void addVectorTypeConversion(TypeConverter &converter, SubShapeAndCountFn getSubShapeAndCount, DenseMap< Value, SmallVector< Type > > loopArgTypes)
Adds a context-aware VectorType conversion to converter (1:1 shape-changing or 1:N,...
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
DistributeLayoutAttr getTemporaryLayout(const T &operandOrResult)
get and set distribute layout attribute for non-anchor operations (and offsets/masks of load/store op...
Value lowerCrossLaneReductionToShuffles(TypedValue< VectorType > src, TypedValue< VectorType > acc, vector::CombiningKind kind, int64_t reductionDim, int64_t reductionSize, Location loc, PatternRewriter &rewriter)
Lowers cross-lane reductions to shuffle operations on a 2D vector.
std::function< std::pair< SmallVector< int64_t >, int >( VectorType, DistributeLayoutAttr)> SubShapeAndCountFn
Callback type for computing sub-shape and count for 1:N (or 1:1 shape-changing) VectorType conversion...
void cleanupUnrealizedConversionCasts(Operation *root, const llvm::SmallSetVector< UnrealizedConversionCastOp, 8 > &existingCasts)
Cleans up UnrealizedConversionCastOps inserted during SCF structural type conversion and/or XeGPU unr...
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
llvm::DenseMap< KeyT, ValueT, KeyInfoT, BucketT > DenseMap
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
virtual int getSubgroupSize() const =0
StringRef getName() const