MLIR  22.0.0git
XeGPUUtils.cpp
Go to the documentation of this file.
1 //===---- XeGPUUtils.cpp - MLIR Utilities for XeGPUOps ------------------===//
2 //
3 // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements utility methods for working with the XeGPU dialect.
10 //
11 //===----------------------------------------------------------------------===//
12 
20 #include "mlir/IR/Builders.h"
21 #include "mlir/IR/Operation.h"
22 #include "mlir/IR/ValueRange.h"
25 #include "llvm/Support/FormatVariadic.h"
26 #include <cstdint>
27 #include <numeric>
28 
29 using namespace mlir;
30 
31 /// convert ArrayRef<ValueRange> into SmallVector<Value>
33  SmallVector<Value> result;
34  for (const auto &vals : values)
35  llvm::append_range(result, vals);
36  return result;
37 }
38 
39 FailureOr<VectorType>
40 mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) {
41  auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
42  // It only works for subgroup level layout, which only has lane_layout
43  // and lane_data, and is to distribute a SIMD code into SIMT code.
44  if (!layout || !layout.isForSubgroup())
45  return failure();
46 
47  SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
48  SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
49  auto tdescShape = tdescTy.getShape();
50  auto elementType = tdescTy.getElementType();
51 
52  // compute sgSize by multiply elements of laneLayout
53  // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
54  // e.g. for 1D layout, sgSize = laneLayout[0]
55  int64_t sgSize = llvm::product_of(laneLayout);
56 
57  // Case 1: regular loads/stores
58  auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();
59  if (scatterAttr) {
60  auto chunkSize = scatterAttr.getChunkSize().getInt();
61  // Verify if the first dimension of the tensor descriptor shape is
62  // distributable.
63  assert(tdescShape[0] == laneLayout[0] &&
64  "tensor descriptor shape is not distributable");
65  return VectorType::get({chunkSize}, elementType);
66  }
67 
68  // Case 2: block loads/stores
69  // Check if the tensor descriptor shape is distributable.
70  int64_t tensorSize = 1;
71  for (auto [tdescDim, laneDim, laneDataDim] :
72  llvm::zip_equal(tdescShape, laneLayout, laneData)) {
73  assert((tdescDim % (laneDim * laneDataDim) == 0) &&
74  "tensor descriptor shape is not distributable");
75  tensorSize *= tdescDim;
76  }
77  // tensorSize must be adjusted for array_length.
78  tensorSize *= tdescTy.getArrayLength();
79 
80  return VectorType::get({tensorSize / sgSize}, elementType);
81 }
82 
83 FailureOr<VectorType>
84 mlir::xegpu::getDistributedVectorType(VectorType originalType,
85  xegpu::LayoutAttr layout) {
86  int64_t rank = originalType.getRank();
87  // Distributed vector type is only supported for 1D, 2D and 3D vectors.
88  if (rank < 1 || rank > 3)
89  return failure();
90  ArrayRef<int64_t> shape = originalType.getShape();
91  // arrayLength is 1 for 1D and 2D vectors, and equal to the first dimension
92  // of the 3D vector.
93  int arrayLength = 1;
94  if (rank == 3) {
95  arrayLength = shape[0];
96  shape = shape.drop_front();
97  }
98  auto helperTdescTy = xegpu::TensorDescType::get(
99  shape, originalType.getElementType(), arrayLength,
100  /*boundary_check=*/true,
101  /*memory_space=*/xegpu::MemorySpace::Global, layout);
102  return xegpu::getDistributedVectorType(helperTdescTy);
103 }
104 
105 std::string xegpu::getLayoutName(const OpOperand &operand) {
106  const StringRef prefix("layout_operand_");
107  unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();
108  return llvm::formatv("{0}{1}", prefix, idx).str();
109 }
110 
111 std::string xegpu::getLayoutName(const OpResult result) {
112  const StringRef prefix = "layout_result_";
113  return llvm::formatv("{0}{1}", prefix, result.getResultNumber()).str();
114 }
115 
116 xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {
117  if (!value)
118  return nullptr;
119 
120  if (auto tdescTy =
121  dyn_cast_if_present<xegpu::TensorDescType>(value.getType()))
122  return tdescTy.getLayoutAttr();
123 
124  if (auto result = dyn_cast<OpResult>(value)) {
125  Operation *defOp = result.getDefiningOp();
126  assert(defOp && "result must have a defining op");
127 
128  // For ConvertLayoutOp, the layout is stored in the targetLayoutAttr
129  if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(defOp))
130  return convertOp.getTargetLayoutAttr();
131 
132  // for LoadNdOp, the layout is stored in the tensor descriptor
133  if (auto loadNd = dyn_cast<xegpu::LoadNdOp>(defOp))
134  return getDistributeLayoutAttr(loadNd.getTensorDesc());
135 
136  // for LoadMatrixOp, the layout is attached to the property of the op
137  if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(defOp))
138  return loadOp.getLayoutAttr();
139 
140  // for StoreMatrixOp, the layout is attached to the property of the op
141  if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(defOp))
142  return storeOp.getLayoutAttr();
143 
144  std::string layoutName = getLayoutName(result);
145  if (defOp->hasAttr(layoutName))
146  return defOp->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
147  }
148 
149  if (auto arg = dyn_cast<BlockArgument>(value)) {
150  auto *parentOp = arg.getOwner()->getParentOp();
151  if (auto loop = dyn_cast<LoopLikeOpInterface>(parentOp)) {
152  OpOperand *tiedInit = loop.getTiedLoopInit(arg);
153  if (tiedInit)
154  return getDistributeLayoutAttr(tiedInit->get());
155  }
156  }
157 
158  return nullptr;
159 }
160 
161 xegpu::DistributeLayoutAttr
163  Operation *op = opr.getOwner();
164 
165  if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(op))
166  return loadOp.getLayoutAttr();
167 
168  if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(op))
169  return storeOp.getLayoutAttr();
170 
171  std::string layoutName = xegpu::getLayoutName(opr);
172  if (op->hasAttr(layoutName))
173  return op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);
174  return getDistributeLayoutAttr(opr.get());
175 }
176 
177 template <typename T, typename>
178 void xegpu::setDistributeLayoutAttr(const T &operandOrResult,
179  const DistributeLayoutAttr layout) {
180  Operation *owner = operandOrResult.getOwner();
181  std::string name = xegpu::getLayoutName(operandOrResult);
182  if (layout && !owner->hasAttrOfType<DistributeLayoutAttr>(name))
183  owner->setAttr(name, layout);
184 }
185 
186 // Explicit instantiation for OpResult
187 template void xegpu::setDistributeLayoutAttr<mlir::OpResult>(
188  const mlir::OpResult &result,
189  const mlir::xegpu::DistributeLayoutAttr layout);
190 
191 // Explicit instantiation for OpOperand
192 template void xegpu::setDistributeLayoutAttr<mlir::OpOperand>(
193  const mlir::OpOperand &operand,
194  const mlir::xegpu::DistributeLayoutAttr layout);
195 
197  Operation *op, function_ref<DistributeLayoutAttr(Value)> getLayoutImpl) {
198  op->walk([&](Operation *nestOp) {
199  if (isa<xegpu::LoadMatrixOp, xegpu::StoreMatrixOp>(nestOp))
200  return;
201 
202  for (OpOperand &opr : nestOp->getOpOperands()) {
203  auto layout = getLayoutImpl(opr.get());
204  setDistributeLayoutAttr(opr, layout);
205  }
206  for (OpResult result : nestOp->getOpResults()) {
207  auto layout = getLayoutImpl(result);
208  setDistributeLayoutAttr(result, layout);
209  }
210  });
211 }
212 
213 template <typename T, typename>
214 void xegpu::removeLayoutAttr(const T &operandOrResult) {
215  Operation *owner = operandOrResult.getOwner();
216  std::string name = xegpu::getLayoutName(operandOrResult);
217  if (owner->hasAttrOfType<DistributeLayoutAttr>(name))
218  owner->removeAttr(name);
219 }
220 
221 // Explicit instantiation for OpResult
222 template void
223 xegpu::removeLayoutAttr<mlir::OpResult>(const mlir::OpResult &result);
224 
225 // Explicit instantiation for OpOperand
226 template void
227 xegpu::removeLayoutAttr<mlir::OpOperand>(const mlir::OpOperand &operand);
228 
230  op->walk([&](Operation *nestOp) {
231  for (OpOperand &opr : nestOp->getOpOperands())
232  removeLayoutAttr(opr);
233  for (OpResult result : nestOp->getOpResults())
234  removeLayoutAttr(result);
235  });
236 }
237 
240  Value value, ArrayRef<int64_t> shape) {
241  auto vecTy = dyn_cast<VectorType>(value.getType());
242  if (!vecTy)
243  return {value};
244 
245  ArrayRef<int64_t> srcShape = vecTy.getShape();
246  if (!computeShapeRatio(srcShape, shape))
247  return {value};
248 
249  int64_t srcShapeRank = srcShape.size();
250  int64_t targetShapeRank = shape.size();
251 
252  SmallVector<int64_t> adjustedTargetShape(srcShape.size());
253  int64_t rankDiff = srcShapeRank - targetShapeRank;
254  std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,
255  1);
256  std::copy(shape.begin(), shape.end(), adjustedTargetShape.begin() + rankDiff);
257 
258  SmallVector<Value> result;
259  for (SmallVector<int64_t> offsets :
260  StaticTileOffsetRange(srcShape, adjustedTargetShape)) {
261  SmallVector<int64_t> staticStrides(offsets.size(), 1);
262  Value slice = vector::ExtractStridedSliceOp::create(
263  builder, loc, value, offsets, adjustedTargetShape, staticStrides);
264 
265  // Reshape to remove leading unit dims if needed
266  if (srcShapeRank > targetShapeRank) {
267  auto targetTy = VectorType::get(shape, vecTy.getElementType());
268  slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);
269  }
270  result.push_back(slice);
271  }
272 
273  return result;
274 }
275 
277  ValueRange values,
278  ArrayRef<int64_t> shape) {
279  VectorType inputTy = dyn_cast<VectorType>(values[0].getType());
280  assert(llvm::all_of(values.getTypes(),
281  [&](Type type) { return type == inputTy; }) &&
282  "values must be of the same VectorType");
283 
284  Type elemTy = inputTy.getElementType();
285  ArrayRef<int64_t> tileShape = inputTy.getShape();
286 
287  VectorType resultTy = VectorType::get(shape, elemTy);
288  auto zeroAttr = builder.getZeroAttr(elemTy);
289  Value result = arith::ConstantOp::create(
290  builder, loc, resultTy, DenseElementsAttr::get(resultTy, zeroAttr));
291 
292  for (auto [src, offsets] :
293  llvm::zip_equal(values, StaticTileOffsetRange(shape, tileShape))) {
294  SmallVector<int64_t> staticStrides(tileShape.size(), 1);
295  result = vector::InsertStridedSliceOp::create(builder, loc, src, result,
296  offsets, staticStrides);
297  }
298  return result;
299 }
300 
302  Operation *op, TypeConverter converter) {
303  MLIRContext *context = op->getContext();
304 
305  auto materializeCast = [](OpBuilder &builder, Type type, ValueRange inputs,
306  Location loc) -> Value {
307  return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
308  .getResult(0);
309  };
310 
311  { // convert VectorType to RankedTensorType for SCF Structural ops
312  TypeConverter converter;
313  converter.addConversion([](Type type) -> Type { return type; });
314  converter.addConversion([](VectorType type) -> Type {
315  return RankedTensorType::get(type.getShape(), type.getElementType());
316  });
317  converter.addSourceMaterialization(materializeCast);
318  converter.addTargetMaterialization(materializeCast);
319 
320  mlir::ConversionTarget target(*context);
321  target.addLegalOp<UnrealizedConversionCastOp>();
322 
325  target);
326  (void)mlir::applyPartialConversion(op, target, std::move(patterns));
327  }
328 
329  { // propagate the layout attribute to RankedTensorType by checking
330  // BuiltInUnrealizedCastOps
331  // for VectorType to RankedTensorType cast.
332  op->walk([](UnrealizedConversionCastOp castOp) {
333  if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)
334  return WalkResult::skip();
335 
336  Value input = castOp.getInputs()[0];
337  Value result = castOp.getResults()[0];
338  auto inputTy = dyn_cast<VectorType>(input.getType());
339  auto resultTy = dyn_cast<RankedTensorType>(result.getType());
340 
341  // Only look at ops casting from VectorType to RankedTensorType
342  if (!inputTy || !resultTy)
343  return WalkResult::skip();
344 
345  xegpu::DistributeLayoutAttr layout =
347  if (!layout)
348  return WalkResult::skip();
349 
350  RankedTensorType newTy = resultTy.cloneWithEncoding(layout);
351  result.setType(newTy);
352 
353  // update the arguments if user is a LoopLike op.
354  for (OpOperand &use : result.getUses()) {
355  if (auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {
356  BlockArgument arg = loop.getTiedLoopRegionIterArg(&use);
357  arg.setType(newTy);
358  }
359  // whileOp has two regions, the BlockArgument of the after region
360  // is not exposed by LoopLikeOpInterface
361  if (auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {
362  unsigned idx = use.getOperandNumber();
363  BlockArgument arg = whileOp.getAfterArguments()[idx];
364  arg.setType(newTy);
365  }
366  }
367  return WalkResult::advance();
368  });
369 
370  // using yieldOp as anchor to update the result type of its ParentOp
371  op->walk([](scf::YieldOp yieldOp) {
372  Operation *parentOp = yieldOp->getParentOp();
373  for (OpResult r : parentOp->getOpResults()) {
374  unsigned idx = r.getResultNumber();
375  Type resultTy = r.getType();
376  Type yieldTy = yieldOp.getResults()[idx].getType();
377  if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)
378  r.setType(yieldTy);
379  }
380  });
381  }
382 
383  { // perform the conversion from RankedTensorType to VectorType based on the
384  // DistributeLayoutAttr
385 
386  // Handle the UnrealizedConversionCastOp introduced by the first step.
387  // For vector->RankedTensorType, it will simply forward the inputs.
388  // For RankedTensorType->vector, it will update the inputs with the
389  // one from the adaptor.
390  class UnrealizedConversionCastOpPattern
391  : public OpConversionPattern<mlir::UnrealizedConversionCastOp> {
392  using OpConversionPattern<
393  mlir::UnrealizedConversionCastOp>::OpConversionPattern;
394 
395  mlir::LogicalResult
396  matchAndRewrite(mlir::UnrealizedConversionCastOp op,
397  OneToNOpAdaptor adaptor,
398  ConversionPatternRewriter &rewriter) const override {
399  auto inputs = op.getOperands();
400  auto outputs = op.getOutputs();
401 
402  if (inputs.size() != 1 || outputs.size() != 1)
403  return failure();
404 
405  auto inputTy = inputs[0].getType();
406  auto outputTy = outputs[0].getType();
407 
408  if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {
409  rewriter.replaceOpWithMultiple(op, adaptor.getInputs());
410  return success();
411  }
412 
413  if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {
414  SmallVector<Value> values = xegpu::flattenValues(adaptor.getInputs());
415  auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),
416  outputTy, values);
417  rewriter.replaceOp(op, newOp);
418  return success();
419  }
420  return failure();
421  }
422  };
423 
424  converter.addSourceMaterialization(materializeCast);
425  converter.addTargetMaterialization([&](OpBuilder &builder, TypeRange type,
426  ValueRange inputs, Location loc) {
427  return UnrealizedConversionCastOp::create(builder, loc, type, inputs)
428  .getResults();
429  });
430 
431  mlir::ConversionTarget target(*context);
432  target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(
433  [](UnrealizedConversionCastOp op) {
434  auto isTensorTy = [](Type type) {
435  return isa<RankedTensorType>(type);
436  };
437  return llvm::none_of(op->getOperandTypes(), isTensorTy) &&
438  llvm::none_of(op->getResultTypes(), isTensorTy);
439  });
441  patterns.insert<UnrealizedConversionCastOpPattern>(context);
443  target);
444  (void)mlir::applyPartialConversion(op, target, std::move(patterns));
445  }
446 }
447 
448 std::optional<std::string> xegpu::getChipStr(Operation *op) {
449  auto gpuModuleOp = op->getParentOfType<gpu::GPUModuleOp>();
450 
451  if (!gpuModuleOp)
452  return std::nullopt;
453 
454  auto targetAttrs = gpuModuleOp.getTargets();
455  if (targetAttrs) {
456  for (auto &attr : *targetAttrs) {
457  auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);
458  if (xevmAttr)
459  return xevmAttr.getChip().str();
460  }
461  }
462 
463  return std::nullopt;
464 }
465 
466 /// Generates element-wise addition ops of two arrays with same length.
468  Location loc,
471  assert(lhs.size() == rhs.size() && "lhs and rhs must have the same size");
473  for (auto [l, r] : llvm::zip_equal(lhs, rhs)) {
474  auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);
475  auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);
476  results.push_back(builder.createOrFold<index::AddOp>(loc, lval, rval));
477  }
478  return results;
479 }
480 
481 /// Generates element-wise addition ops of two arrays with automatic alignment.
482 /// When the input arrays have different sizes, the shorter array is
483 /// right-aligned with the longer array, and the unmatched leading elements from
484 /// the longer array are preserved unchanged. This is commonly used for offset
485 /// computation where higher-dimensional offsets need to be added to
486 /// lower-dimensional adjustments.
487 ///
488 /// Example:
489 /// lhs = [l1, l2, l3], rhs = [r1, r2]
490 /// Result: [11, l2+r1, l3+r2]
495  // ensure a is longer than b
496  ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;
497  ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;
498  SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));
499  a = a.slice(a.size() - b.size());
500  results.append(addElementwise(builder, loc, a, b));
501  return results;
502 }
static void copy(Location loc, Value dst, Value src, Value size, OpBuilder &builder)
Copies the given number of bytes from src to dst pointers.
This class represents an argument of a Block.
Definition: Value.h:309
TypedAttr getZeroAttr(Type type)
Definition: Builders.cpp:324
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
void replaceOpWithMultiple(Operation *op, SmallVector< SmallVector< Value >> &&newValues)
Replace the given operation with the new value ranges.
This class describes a specific conversion target.
void addLegalOp(OperationName op)
Register the given operations as legal.
void addDynamicallyLegalOp(OperationName op, const DynamicLegalityCallbackFn &callback)
Register the given operation as dynamically legal and set the dynamic legalization callback to the on...
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
IRValueT get() const
Return the current value being used by this operand.
Definition: UseDefLists.h:160
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition: Location.h:76
MLIRContext is the top-level object for a collection of MLIR operations.
Definition: MLIRContext.h:63
This class helps build Operations.
Definition: Builders.h:207
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition: Builders.h:526
OpConversionPattern is a wrapper around ConversionPattern that allows for matching and rewriting agai...
This class represents an operand of an operation.
Definition: Value.h:257
This is a value defined by a result of an operation.
Definition: Value.h:457
unsigned getResultNumber() const
Returns the number of this result.
Definition: Value.h:469
Operation is the basic unit of execution within MLIR.
Definition: Operation.h:88
AttrClass getAttrOfType(StringAttr name)
Definition: Operation.h:550
bool hasAttrOfType(NameT &&name)
Definition: Operation.h:575
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition: Operation.h:560
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition: Operation.h:797
MLIRContext * getContext()
Return the context this operation is associated with.
Definition: Operation.h:216
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition: Operation.h:234
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition: Operation.h:238
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition: Operation.h:582
operand_type_range getOperandTypes()
Definition: Operation.h:397
MutableArrayRef< OpOperand > getOpOperands()
Definition: Operation.h:383
result_type_range getResultTypes()
Definition: Operation.h:428
result_range getOpResults()
Definition: Operation.h:420
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
Definition: Operation.h:600
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
Type conversion class.
void addConversion(FnT &&callback)
Register a conversion function.
void addSourceMaterialization(FnT &&callback)
All of the following materializations require function objects that are convertible to the following ...
void addTargetMaterialization(FnT &&callback)
This method registers a materialization that will be called when converting a value to a target type ...
This class provides an abstraction over the various different ranges of value types.
Definition: TypeRange.h:37
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition: Types.h:74
This class provides an abstraction over the different types of ranges over Values.
Definition: ValueRange.h:387
type_range getTypes() const
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition: Value.h:96
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:116
Type getType() const
Return the type of this value.
Definition: Value.h:105
use_range getUses() const
Returns a range of all uses, which is useful for iterating over all uses.
Definition: Value.h:188
static WalkResult skip()
Definition: WalkResult.h:48
static WalkResult advance()
Definition: WalkResult.h:47
Operation * getOwner() const
Return the owner of this operand.
Definition: UseDefLists.h:38
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
Definition: XeGPUUtils.cpp:276
void setDistributeLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout)
Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching it to the owner's dictio...
Definition: XeGPUUtils.cpp:178
void setDistributeLayoutAttrs(Operation *op, function_ref< DistributeLayoutAttr(Value)> getLayoutImpl)
Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
Definition: XeGPUUtils.cpp:196
std::string getLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
Definition: XeGPUUtils.cpp:105
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
Definition: XeGPUUtils.cpp:214
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
Definition: XeGPUUtils.cpp:301
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
Definition: XeGPUUtils.cpp:116
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
Definition: XeGPUUtils.cpp:448
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
Definition: XeGPUUtils.cpp:239
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
Definition: XeGPUUtils.cpp:229
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
Definition: XeGPUUtils.cpp:32
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
Definition: XeGPUUtils.cpp:492
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
Definition: XeGPUUtils.cpp:467
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Definition: XeGPUUtils.cpp:40
Include the generated interface declarations.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition: Utils.cpp:304
const FrozenRewritePatternSet & patterns
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition: Utils.cpp:111
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
LogicalResult applyPartialConversion(ArrayRef< Operation * > ops, const ConversionTarget &target, const FrozenRewritePatternSet &patterns, ConversionConfig config=ConversionConfig())
Below we define several entry points for operation conversion.