doxygen/XeGPUUtils_8cpp_source.html

//===---- XeGPUUtils.cpp - MLIR Utilities for XeGPUOps   ------------------===//

//

// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file implements utility methods for working with the XeGPU dialect.

//

//===----------------------------------------------------------------------===//


#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"

#include "mlir/Dialect/GPU/IR/GPUDialect.h"

#include "mlir/Dialect/Index/IR/IndexOps.h"

#include "mlir/Dialect/LLVMIR/XeVMDialect.h"

#include "mlir/Dialect/SCF/Transforms/Patterns.h"

#include "mlir/Dialect/Utils/IndexingUtils.h"

#include "mlir/Dialect/XeGPU/IR/XeGPU.h"

#include "mlir/IR/Builders.h"

#include "mlir/IR/Operation.h"

#include "mlir/IR/ValueRange.h"

#include "mlir/Interfaces/LoopLikeInterface.h"

#include "mlir/Transforms/DialectConversion.h"

#include "llvm/Support/FormatVariadic.h"

#include <cstdint>

#include <numeric>


using namespace mlir;


/// convert ArrayRef<ValueRange> into SmallVector<Value>


SmallVector<Value> xegpu::flattenValues(ArrayRef<ValueRange> values) {

  SmallVector<Value> result;

  for (const auto &vals : values)

    llvm::append_range(result, vals);

  return result;

}


FailureOr<VectorType>


mlir::xegpu::getDistributedVectorType(xegpu::TensorDescType tdescTy) {

  auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());

  // It only works for subgroup level layout, which only has lane_layout

  // and lane_data, and is to distribute a SIMD code into SIMT code.

  if (!layout || !layout.isForSubgroup())

    return failure();


  SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());

  SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());

  auto tdescShape = tdescTy.getShape();

  auto elementType = tdescTy.getElementType();


  // compute sgSize by multiply elements of laneLayout

  // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]

  // e.g. for 1D layout, sgSize = laneLayout[0]

  int64_t sgSize = llvm::product_of(laneLayout);


  // Case 1: regular loads/stores

  auto scatterAttr = tdescTy.getEncodingOfType<ScatterTensorDescAttr>();

  if (scatterAttr) {

    auto chunkSize = scatterAttr.getChunkSize().getInt();

    // Verify if the first dimension of the tensor descriptor shape is

    // distributable.

    assert(tdescShape[0] == laneLayout[0] &&

           "tensor descriptor shape is not distributable");

    return VectorType::get({chunkSize}, elementType);

  }


  // Case 2: block loads/stores

  // Check if the tensor descriptor shape is distributable.

  int64_t tensorSize = 1;

  for (auto [tdescDim, laneDim, laneDataDim] :

       llvm::zip_equal(tdescShape, laneLayout, laneData)) {

    assert((tdescDim % (laneDim * laneDataDim) == 0) &&

           "tensor descriptor shape is not distributable");

    tensorSize *= tdescDim;

  }

  // tensorSize must be adjusted for array_length.

  tensorSize *= tdescTy.getArrayLength();


  return VectorType::get({tensorSize / sgSize}, elementType);

}


FailureOr<VectorType>

mlir::xegpu::getDistributedVectorType(VectorType originalType,

                                      xegpu::LayoutAttr layout) {

  int64_t rank = originalType.getRank();

  // Distributed vector type is only supported for 1D, 2D and 3D vectors.

  if (rank < 1 || rank > 3)

    return failure();

  ArrayRef<int64_t> shape = originalType.getShape();

  // arrayLength is 1 for 1D and 2D vectors, and equal to the first dimension

  // of the 3D vector.

  int arrayLength = 1;

  if (rank == 3) {

    arrayLength = shape[0];

    shape = shape.drop_front();

  }

  auto helperTdescTy = xegpu::TensorDescType::get(

      shape, originalType.getElementType(), arrayLength,

      /*boundary_check=*/true,

      /*memory_space=*/xegpu::MemorySpace::Global, layout);

  return xegpu::getDistributedVectorType(helperTdescTy);

}


std::string xegpu::getLayoutName(const OpOperand &operand) {

  const StringRef prefix("layout_operand_");

  unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();

  return llvm::formatv("{0}{1}", prefix, idx).str();

}


std::string xegpu::getLayoutName(const OpResult result) {

  const StringRef prefix = "layout_result_";

  return llvm::formatv("{0}{1}", prefix, result.getResultNumber()).str();

}


xegpu::DistributeLayoutAttr xegpu::getDistributeLayoutAttr(const Value value) {

  if (!value)

    return nullptr;


  if (auto tdescTy =

          dyn_cast_if_present<xegpu::TensorDescType>(value.getType()))

    return tdescTy.getLayoutAttr();


  if (auto result = dyn_cast<OpResult>(value)) {

    Operation *defOp = result.getDefiningOp();

    assert(defOp && "result must have a defining op");


    // For ConvertLayoutOp, the layout is stored in the targetLayoutAttr

    if (auto convertOp = dyn_cast<xegpu::ConvertLayoutOp>(defOp))

      return convertOp.getTargetLayoutAttr();


    // for LoadNdOp, the layout is stored in the tensor descriptor

    if (auto loadNd = dyn_cast<xegpu::LoadNdOp>(defOp))

      return getDistributeLayoutAttr(loadNd.getTensorDesc());


    // for LoadMatrixOp, the layout is attached to the property of the op

    if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(defOp))

      return loadOp.getLayoutAttr();


    // for StoreMatrixOp, the layout is attached to the property of the op

    if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(defOp))

      return storeOp.getLayoutAttr();


    std::string layoutName = getLayoutName(result);

    if (defOp->hasAttr(layoutName))

      return defOp->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);


    // check for "permament" layout only after "temporary" layout name lookup

    // for backward compatibility

    if (auto loadGatherOp = dyn_cast<xegpu::LoadGatherOp>(defOp))

      return loadGatherOp.getLayoutAttr();

  }


  if (auto arg = dyn_cast<BlockArgument>(value)) {

    auto *parentOp = arg.getOwner()->getParentOp();

    if (auto loop = dyn_cast<LoopLikeOpInterface>(parentOp)) {

      OpOperand *tiedInit = loop.getTiedLoopInit(arg);

      if (tiedInit)

        return getDistributeLayoutAttr(tiedInit->get());

    }

  }


  return nullptr;

}


xegpu::DistributeLayoutAttr


xegpu::getDistributeLayoutAttr(const OpOperand &opr) {

  Operation *op = opr.getOwner();


  if (auto loadOp = dyn_cast<xegpu::LoadMatrixOp>(op))

    return loadOp.getLayoutAttr();


  if (auto storeOp = dyn_cast<xegpu::StoreMatrixOp>(op))

    return storeOp.getLayoutAttr();


  std::string layoutName = xegpu::getLayoutName(opr);

  if (op->hasAttr(layoutName))

    return op->getAttrOfType<xegpu::DistributeLayoutAttr>(layoutName);


  // check for "permament" layout only after "temporary" layout name lookup

  if (auto storeScatterOp = dyn_cast<xegpu::StoreScatterOp>(op))

    if (auto layout = storeScatterOp.getLayoutAttr())

      return layout;


  return getDistributeLayoutAttr(opr.get());

}


// Returns the permanent layout attribute for the given result if it's

// available on the defining op. Otherwise returns the provided layout.

xegpu::DistributeLayoutAttr


maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,

                         const OpResult &result, mlir::Operation *owner,

                         const std::string &name) {

  xegpu::DistributeLayoutAttr candidate = layout;


  if (auto loadOp = dyn_cast<xegpu::LoadGatherOp>(owner)) {

    if (auto perm = loadOp.getLayoutAttr())

      candidate = perm;

  }


  return candidate;

}


// Returns the permanent layout attribute for the given operand if it's

// available on the defining op. Otherwise returns the provided layout.

xegpu::DistributeLayoutAttr


maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout,

                         const OpOperand &operand, mlir::Operation *owner,

                         const std::string &name) {

  xegpu::DistributeLayoutAttr candidate = layout;

  unsigned idx = const_cast<OpOperand &>(operand).getOperandNumber();


  if (auto storeOp = dyn_cast<xegpu::StoreScatterOp>(owner)) {

    if (idx == 0) {

      if (auto perm = storeOp.getLayoutAttr())

        candidate = perm;

    }

  }


  return candidate;

}


template <typename T, typename>


void xegpu::setDistributeLayoutAttr(const T &operandOrResult,

                                    const DistributeLayoutAttr layout,

                                    bool respectPermLayout) {

  Operation *owner = operandOrResult.getOwner();

  std::string name = xegpu::getLayoutName(operandOrResult);


  if (owner->hasAttrOfType<DistributeLayoutAttr>(name))

    return;


  DistributeLayoutAttr candidate = layout;

  if (respectPermLayout)

    candidate = maybePickPermanentLayout(layout, operandOrResult, owner, name);


  if (candidate)

    owner->setAttr(name, candidate);

}


// Explicit instantiation for OpResult

template void xegpu::setDistributeLayoutAttr<mlir::OpResult>(

    const mlir::OpResult &result,

    const mlir::xegpu::DistributeLayoutAttr layout, bool respectPermLayout);


// Explicit instantiation for OpOperand

template void xegpu::setDistributeLayoutAttr<mlir::OpOperand>(

    const mlir::OpOperand &operand,

    const mlir::xegpu::DistributeLayoutAttr layout, bool respectPermLayout);


void xegpu::setDistributeLayoutAttrs(

    Operation *op, function_ref<DistributeLayoutAttr(Value)> getLayoutImpl) {

  op->walk([&](Operation *nestOp) {

    if (isa<xegpu::LoadMatrixOp, xegpu::StoreMatrixOp>(nestOp))

      return;


    for (OpOperand &opr : nestOp->getOpOperands()) {

      auto layout = getLayoutImpl(opr.get());

      setDistributeLayoutAttr(opr, layout);

    }

    for (OpResult result : nestOp->getOpResults()) {

      auto layout = getLayoutImpl(result);

      setDistributeLayoutAttr(result, layout);

    }

  });

}


template <typename T, typename>


void xegpu::removeLayoutAttr(const T &operandOrResult) {

  Operation *owner = operandOrResult.getOwner();

  std::string name = xegpu::getLayoutName(operandOrResult);

  if (owner->hasAttrOfType<DistributeLayoutAttr>(name))

    owner->removeAttr(name);

}


// Explicit instantiation for OpResult

template void

xegpu::removeLayoutAttr<mlir::OpResult>(const mlir::OpResult &result);


// Explicit instantiation for OpOperand

template void

xegpu::removeLayoutAttr<mlir::OpOperand>(const mlir::OpOperand &operand);


void xegpu::removeLayoutAttrs(Operation *op) {

  op->walk([&](Operation *nestOp) {

    for (OpOperand &opr : nestOp->getOpOperands())

      removeLayoutAttr(opr);

    for (OpResult result : nestOp->getOpResults())

      removeLayoutAttr(result);

  });

}


SmallVector<Value>


xegpu::extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc,

                                        Value value, ArrayRef<int64_t> shape) {

  auto vecTy = dyn_cast<VectorType>(value.getType());

  if (!vecTy)

    return {value};


  ArrayRef<int64_t> srcShape = vecTy.getShape();

  if (!computeShapeRatio(srcShape, shape))

    return {value};


  int64_t srcShapeRank = srcShape.size();

  int64_t targetShapeRank = shape.size();


  SmallVector<int64_t> adjustedTargetShape(srcShape.size());

  int64_t rankDiff = srcShapeRank - targetShapeRank;

  std::fill(adjustedTargetShape.begin(), adjustedTargetShape.begin() + rankDiff,

            1);

  llvm::copy(shape, adjustedTargetShape.begin() + rankDiff);


  SmallVector<Value> result;

  for (SmallVector<int64_t> offsets :

       StaticTileOffsetRange(srcShape, adjustedTargetShape)) {

    SmallVector<int64_t> staticStrides(offsets.size(), 1);

    Value slice = vector::ExtractStridedSliceOp::create(

        builder, loc, value, offsets, adjustedTargetShape, staticStrides);


    // Reshape to remove leading unit dims if needed

    if (srcShapeRank > targetShapeRank) {

      auto targetTy = VectorType::get(shape, vecTy.getElementType());

      slice = vector::ShapeCastOp::create(builder, loc, targetTy, slice);

    }

    result.push_back(slice);

  }


  return result;

}


Value xegpu::createVectorWithShapeFromValues(OpBuilder &builder, Location loc,

                                             ValueRange values,

                                             ArrayRef<int64_t> shape) {

  VectorType inputTy = dyn_cast<VectorType>(values[0].getType());

  assert(llvm::all_of(values.getTypes(),

                      [&](Type type) { return type == inputTy; }) &&

         "values must be of the same VectorType");


  Type elemTy = inputTy.getElementType();

  ArrayRef<int64_t> tileShape = inputTy.getShape();


  VectorType resultTy = VectorType::get(shape, elemTy);

  auto zeroAttr = builder.getZeroAttr(elemTy);

  Value result = arith::ConstantOp::create(

      builder, loc, resultTy, DenseElementsAttr::get(resultTy, zeroAttr));


  for (auto [src, offsets] :

       llvm::zip_equal(values, StaticTileOffsetRange(shape, tileShape))) {

    SmallVector<int64_t> staticStrides(tileShape.size(), 1);

    result = vector::InsertStridedSliceOp::create(builder, loc, src, result,

                                                  offsets, staticStrides);

  }

  return result;

}


void xegpu::doSCFStructuralTypeConversionWithTensorType(

    Operation *op, TypeConverter converter) {

  MLIRContext *context = op->getContext();


  auto materializeCast = [](OpBuilder &builder, Type type, ValueRange inputs,

                            Location loc) -> Value {

    return UnrealizedConversionCastOp::create(builder, loc, type, inputs)

        .getResult(0);

  };


  { // convert VectorType to RankedTensorType for SCF Structural ops

    TypeConverter converter;

    converter.addConversion([](Type type) -> Type { return type; });

    converter.addConversion([](VectorType type) -> Type {

      return RankedTensorType::get(type.getShape(), type.getElementType());

    });

    converter.addSourceMaterialization(materializeCast);

    converter.addTargetMaterialization(materializeCast);


    mlir::ConversionTarget target(*context);

    target.addLegalOp<UnrealizedConversionCastOp>();


    mlir::RewritePatternSet patterns(context);

    scf::populateSCFStructuralTypeConversionsAndLegality(converter, patterns,

                                                         target);

    (void)mlir::applyPartialConversion(op, target, std::move(patterns));

  }


  { // propagate the layout attribute to RankedTensorType by checking

    // BuiltInUnrealizedCastOps

    // for VectorType to RankedTensorType cast.

    op->walk([](UnrealizedConversionCastOp castOp) {

      if (castOp.getNumOperands() != 1 || castOp.getNumResults() != 1)

        return WalkResult::skip();


      Value input = castOp.getInputs()[0];

      Value result = castOp.getResults()[0];

      auto inputTy = dyn_cast<VectorType>(input.getType());

      auto resultTy = dyn_cast<RankedTensorType>(result.getType());


      // Only look at ops casting from VectorType to RankedTensorType

      if (!inputTy || !resultTy)

        return WalkResult::skip();


      xegpu::DistributeLayoutAttr layout =

          xegpu::getDistributeLayoutAttr(input);

      if (!layout)

        return WalkResult::skip();


      RankedTensorType newTy = resultTy.cloneWithEncoding(layout);

      result.setType(newTy);


      // update the arguments if user is a LoopLike op.

      for (OpOperand &use : result.getUses()) {

        if (auto loop = dyn_cast<LoopLikeOpInterface>(use.getOwner())) {

          BlockArgument arg = loop.getTiedLoopRegionIterArg(&use);

          arg.setType(newTy);

        }

        // whileOp has two regions, the BlockArgument of the after region

        // is not exposed by LoopLikeOpInterface

        if (auto whileOp = dyn_cast<scf::WhileOp>(use.getOwner())) {

          unsigned idx = use.getOperandNumber();

          BlockArgument arg = whileOp.getAfterArguments()[idx];

          arg.setType(newTy);

        }

      }

      return WalkResult::advance();

    });


    // using yieldOp as anchor to update the result type of its ParentOp

    op->walk([](scf::YieldOp yieldOp) {

      Operation *parentOp = yieldOp->getParentOp();

      for (OpResult r : parentOp->getOpResults()) {

        unsigned idx = r.getResultNumber();

        Type resultTy = r.getType();

        Type yieldTy = yieldOp.getResults()[idx].getType();

        if (isa<RankedTensorType>(resultTy) && yieldTy != resultTy)

          r.setType(yieldTy);

      }

    });

  }


  { // perform the conversion from RankedTensorType to VectorType based on the

    // DistributeLayoutAttr


    // Handle the UnrealizedConversionCastOp introduced by the first step.

    // For vector->RankedTensorType, it will simply forward the inputs.

    // For RankedTensorType->vector, it will update the inputs with the

    // one from the adaptor.

    class UnrealizedConversionCastOpPattern

        : public OpConversionPattern<mlir::UnrealizedConversionCastOp> {

      using OpConversionPattern<

          mlir::UnrealizedConversionCastOp>::OpConversionPattern;


      mlir::LogicalResult

      matchAndRewrite(mlir::UnrealizedConversionCastOp op,

                      OneToNOpAdaptor adaptor,

                      ConversionPatternRewriter &rewriter) const override {

        auto inputs = op.getOperands();

        auto outputs = op.getOutputs();


        if (inputs.size() != 1 || outputs.size() != 1)

          return failure();


        auto inputTy = inputs[0].getType();

        auto outputTy = outputs[0].getType();


        if (isa<VectorType>(inputTy) && isa<RankedTensorType>(outputTy)) {

          rewriter.replaceOpWithMultiple(op, adaptor.getInputs());

          return success();

        }


        if (isa<RankedTensorType>(inputTy) && isa<VectorType>(outputTy)) {

          SmallVector<Value> values = xegpu::flattenValues(adaptor.getInputs());

          auto newOp = UnrealizedConversionCastOp::create(rewriter, op.getLoc(),

                                                          outputTy, values);

          rewriter.replaceOp(op, newOp);

          return success();

        }

        return failure();

      }

    };


    converter.addSourceMaterialization(materializeCast);

    converter.addTargetMaterialization([&](OpBuilder &builder, TypeRange type,

                                           ValueRange inputs, Location loc) {

      return UnrealizedConversionCastOp::create(builder, loc, type, inputs)

          .getResults();

    });


    mlir::ConversionTarget target(*context);

    target.addDynamicallyLegalOp<UnrealizedConversionCastOp>(

        [](UnrealizedConversionCastOp op) {

          auto isTensorTy = [](Type type) {

            return isa<RankedTensorType>(type);

          };

          return llvm::none_of(op->getOperandTypes(), isTensorTy) &&

                 llvm::none_of(op->getResultTypes(), isTensorTy);

        });

    mlir::RewritePatternSet patterns(context);

    patterns.insert<UnrealizedConversionCastOpPattern>(context);

    scf::populateSCFStructuralTypeConversionsAndLegality(converter, patterns,

                                                         target);

    (void)mlir::applyPartialConversion(op, target, std::move(patterns));

  }

}


std::optional<std::string> xegpu::getChipStr(Operation *op) {

  auto gpuModuleOp = op->getParentOfType<gpu::GPUModuleOp>();


  if (!gpuModuleOp)

    return std::nullopt;


  auto targetAttrs = gpuModuleOp.getTargets();

  if (targetAttrs) {

    for (auto &attr : *targetAttrs) {

      auto xevmAttr = llvm::dyn_cast<xevm::XeVMTargetAttr>(attr);

      if (xevmAttr)

        return xevmAttr.getChip().str();

    }

  }


  return std::nullopt;

}


/// Generates element-wise addition ops of two arrays with same length.


SmallVector<OpFoldResult> xegpu::addElementwise(OpBuilder &builder,

                                                Location loc,

                                                ArrayRef<OpFoldResult> lhs,

                                                ArrayRef<OpFoldResult> rhs) {

  assert(lhs.size() == rhs.size() && "lhs and rhs must have the same size");

  SmallVector<OpFoldResult> results;

  for (auto [l, r] : llvm::zip_equal(lhs, rhs)) {

    auto lval = getValueOrCreateConstantIndexOp(builder, loc, l);

    auto rval = getValueOrCreateConstantIndexOp(builder, loc, r);

    results.push_back(builder.createOrFold<index::AddOp>(loc, lval, rval));

  }

  return results;

}


/// Generates element-wise addition ops of two arrays with automatic alignment.

/// When the input arrays have different sizes, the shorter array is

/// right-aligned with the longer array, and the unmatched leading elements from

/// the longer array are preserved unchanged. This is commonly used for offset

/// computation where higher-dimensional offsets need to be added to

/// lower-dimensional adjustments.

///

/// Example:

///   lhs = [l1, l2, l3], rhs = [r1, r2]

///   Result: [11, l2+r1, l3+r2]

SmallVector<OpFoldResult>


xegpu::addWithRightAligned(OpBuilder &builder, Location loc,

                           ArrayRef<OpFoldResult> lhs,

                           ArrayRef<OpFoldResult> rhs) {

  // ensure a is longer than b

  ArrayRef<OpFoldResult> a = lhs.size() >= rhs.size() ? lhs : rhs;

  ArrayRef<OpFoldResult> b = lhs.size() >= rhs.size() ? rhs : lhs;

  SmallVector<OpFoldResult> results(a.take_front(a.size() - b.size()));

  a = a.slice(a.size() - b.size());

  results.append(addElementwise(builder, loc, a, b));

  return results;

}


template <typename T>


int xegpu::getLargestDivisor(T dim, ArrayRef<T> candidates,

                             ArrayRef<T> candidateMultiples) {

  static_assert(std::is_integral<T>::value, "T must be an integer type");

  int largest = -1;

  SmallVector<T> multiples = {1};

  if (!candidateMultiples.empty())

    multiples =

        SmallVector<T>(candidateMultiples.begin(), candidateMultiples.end());

  for (T candidate : candidates) {

    for (T multiple : multiples) {

      int value = static_cast<int>(candidate * multiple);

      if (value != 0 && dim % value == 0 && value > largest)

        largest = value;

    }

  }

  return largest;

}


/// Explicit instantiations

template int xegpu::getLargestDivisor<int>(int dim, ArrayRef<int> candidates,

                                           ArrayRef<int> candidateMultiples);

template int

xegpu::getLargestDivisor<unsigned>(unsigned dim, ArrayRef<unsigned> candidates,

                                   ArrayRef<unsigned> candidateMultiples);

success
return success()

Builders.h

DialectConversion.h

GPUDialect.h

lhs
lhs
Definition AffineExpr.cpp:832

Operation.h

IndexOps.h

IndexingUtils.h

b
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
Definition LinalgTransformOps.cpp:2096

target
target
Definition LinalgTransformOps.cpp:2099

result
result
Definition LinalgTransformOps.cpp:2097

LoopLikeInterface.h

Patterns.h

ValueRange.h

rhs
*B rhs
Definition VectorTransforms.cpp:2247

maybePickPermanentLayout
xegpu::DistributeLayoutAttr maybePickPermanentLayout(xegpu::DistributeLayoutAttr layout, const OpResult &result, mlir::Operation *owner, const std::string &name)
Definition XeGPUUtils.cpp:191

XeGPUUtils.h

XeGPU.h

XeVMDialect.h

TypeConverter

int64_t

llvm::ArrayRef
Definition LLVM.h:48

llvm::SmallVector
Definition LLVM.h:72

mlir::BlockArgument
This class represents an argument of a Block.
Definition Value.h:309

mlir::Builder::getZeroAttr
TypedAttr getZeroAttr(Type type)
Definition Builders.cpp:324

mlir::OneToNOpAdaptor

mlir::DenseElementsAttr::get
static DenseElementsAttr get(ShapedType type, ArrayRef< Attribute > values)
Constructs a dense elements attribute from an array of element values.
Definition BuiltinAttributes.cpp:910

mlir::IROperand::get
IRValueT get() const
Return the current value being used by this operand.
Definition UseDefLists.h:160

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63

mlir::OpBuilder
This class helps build Operations.
Definition Builders.h:207

mlir::OpBuilder::createOrFold
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:526

mlir::OpOperand
This class represents an operand of an operation.
Definition Value.h:257

mlir::OpResult
This is a value defined by a result of an operation.
Definition Value.h:457

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88

mlir::Operation::getAttrOfType
AttrClass getAttrOfType(StringAttr name)
Definition Operation.h:550

mlir::Operation::hasAttrOfType
bool hasAttrOfType(NameT &&name)
Definition Operation.h:575

mlir::Operation::hasAttr
bool hasAttr(StringAttr name)
Return true if the operation has an attribute with the provided name, false otherwise.
Definition Operation.h:560

mlir::Operation::getParentOp
Operation * getParentOp()
Returns the closest surrounding operation that contains this operation or nullptr if this is a top-le...
Definition Operation.h:234

mlir::Operation::getOpOperands
MutableArrayRef< OpOperand > getOpOperands()
Definition Operation.h:383

mlir::Operation::getParentOfType
OpTy getParentOfType()
Return the closest surrounding parent operation that is of type 'OpTy'.
Definition Operation.h:238

mlir::Operation::setAttr
void setAttr(StringAttr name, Attribute value)
If the an attribute exists with the specified name, change it to the new value.
Definition Operation.h:582

mlir::Operation::getOperandTypes
operand_type_range getOperandTypes()
Definition Operation.h:397

mlir::Operation::getResultTypes
result_type_range getResultTypes()
Definition Operation.h:428

mlir::Operation::walk
std::enable_if_t< llvm::function_traits< std::decay_t< FnT > >::num_args==1, RetT > walk(FnT &&callback)
Walk the operation by calling the callback for each nested operation (including this one),...
Definition Operation.h:797

mlir::Operation::getOpResults
result_range getOpResults()
Definition Operation.h:420

mlir::Operation::removeAttr
Attribute removeAttr(StringAttr name)
Remove the attribute with the specified name if it exists.
Definition Operation.h:600

mlir::Operation::getContext
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216

mlir::RewritePatternSet
Definition PatternMatch.h:816

mlir::StaticTileOffsetRange
A range-style iterator that allows for iterating over the offsets of all potential tiles of size tile...
Definition IndexingUtils.h:376

mlir::TypeRange
This class provides an abstraction over the various different ranges of value types.
Definition TypeRange.h:37

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387

mlir::ValueRange::getTypes
type_range getTypes() const

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96

mlir::Value::setType
void setType(Type newType)
Mutate the type of this Value to be of the specified type.
Definition Value.h:116

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition Value.h:105

mlir::WalkResult::skip
static WalkResult skip()
Definition WalkResult.h:48

mlir::WalkResult::advance
static WalkResult advance()
Definition WalkResult.h:47

mlir::detail::IROperandBase::getOwner
Operation * getOwner() const
Return the owner of this operand.
Definition UseDefLists.h:38

void

mlir::scf::populateSCFStructuralTypeConversionsAndLegality
void populateSCFStructuralTypeConversionsAndLegality(const TypeConverter &typeConverter, RewritePatternSet &patterns, ConversionTarget &target, PatternBenefit benefit=1)
Populates patterns for SCF structural type conversions and sets up the provided ConversionTarget with...
Definition StructuralTypeConversions.cpp:267

mlir::shape
Definition ShapeMappingAnalysis.h:20

mlir::xegpu::createVectorWithShapeFromValues
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc, ValueRange values, ArrayRef< int64_t > shape)
Create a vector of shape from a set of values using vector.insert_stride_slice.
Definition XeGPUUtils.cpp:331

mlir::xegpu::setDistributeLayoutAttrs
void setDistributeLayoutAttrs(Operation *op, function_ref< DistributeLayoutAttr(Value)> getLayoutImpl)
Set the DistributeLayoutAttr for each OpOperand and OpResult of the given operation.
Definition XeGPUUtils.cpp:251

mlir::xegpu::getLayoutName
std::string getLayoutName(const OpOperand &operand)
Return the attribute name for the OpOperand to attach DistributeLayoutAttr.
Definition XeGPUUtils.cpp:105

mlir::xegpu::getLargestDivisor
int getLargestDivisor(T dim, ArrayRef< T > candidates, ArrayRef< T > candidateMultiples={})
Helper Function to find a proper instruction multiple for the user-supplied sg-level data shape (dive...
Definition XeGPUUtils.cpp:560

mlir::xegpu::removeLayoutAttr
void removeLayoutAttr(const T &operandOrResult)
Removes the LayoutAttr for a given OpOperand or OpResult if it exists.
Definition XeGPUUtils.cpp:269

mlir::xegpu::doSCFStructuralTypeConversionWithTensorType
void doSCFStructuralTypeConversionWithTensorType(Operation *op, TypeConverter converter)
Do type conversion for SCF structural ops, e.g., scf.for using SCF structure type convertion patterns...
Definition XeGPUUtils.cpp:356

mlir::xegpu::getDistributeLayoutAttr
DistributeLayoutAttr getDistributeLayoutAttr(const Value value)
Retrieves the DistributeLayoutAttr associated with a given Value.
Definition XeGPUUtils.cpp:116

mlir::xegpu::setDistributeLayoutAttr
void setDistributeLayoutAttr(const T &operandOrResult, const DistributeLayoutAttr layout, bool respectPermLayout=false)
Sets the DistributeLayoutAttr for a given OpOperand or OpResult by attaching it to the owner's dictio...
Definition XeGPUUtils.cpp:224

mlir::xegpu::getChipStr
std::optional< std::string > getChipStr(Operation *op)
Retrieves the chip string from the XeVM target attribute of the parent GPU module operation.
Definition XeGPUUtils.cpp:503

mlir::xegpu::extractVectorsWithShapeFromValue
SmallVector< Value > extractVectorsWithShapeFromValue(OpBuilder &builder, Location loc, Value value, ArrayRef< int64_t > shape)
Extract a set of small vectors from a value with a given shape using vector.extract_stride_slice.
Definition XeGPUUtils.cpp:294

mlir::xegpu::removeLayoutAttrs
void removeLayoutAttrs(Operation *op)
Removes the DistributeLayoutAttr for each OpOperand and OpResult of the given operation if they exist...
Definition XeGPUUtils.cpp:284

mlir::xegpu::flattenValues
SmallVector< Value > flattenValues(ArrayRef< ValueRange > values)
Flatten a set of ValueRange into a single SmallVector<Value>
Definition XeGPUUtils.cpp:32

mlir::xegpu::addWithRightAligned
SmallVector< OpFoldResult > addWithRightAligned(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with automatic alignment.
Definition XeGPUUtils.cpp:547

mlir::xegpu::addElementwise
SmallVector< OpFoldResult > addElementwise(OpBuilder &builder, Location loc, ArrayRef< OpFoldResult > lhs, ArrayRef< OpFoldResult > rhs)
Generates element-wise addition ops of two arrays with same length.
Definition XeGPUUtils.cpp:522

mlir::xegpu::getDistributedVectorType
FailureOr< VectorType > getDistributedVectorType(xegpu::TensorDescType tdescTy)
If tensor descriptor has a layout attribute it is used in SIMT mode.
Definition XeGPUUtils.cpp:40

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::getType
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:304

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition GreedyPatternRewriteDriver.h:283

mlir::getValueOrCreateConstantIndexOp
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Definition Utils.cpp:111

mlir::computeShapeRatio
std::optional< SmallVector< int64_t > > computeShapeRatio(ArrayRef< int64_t > shape, ArrayRef< int64_t > subShape)
Return the multi-dimensional integral ratio of subShape to the trailing dimensions of shape.
Definition IndexingUtils.cpp:106

mlir::function_ref
llvm::function_ref< Fn > function_ref
Definition LLVM.h:152