doxygen/IntelGpuXe2_8h_source.html

//===--- IntelGpuXe2.h ------------------------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// \file

// Xe2 uArch definition. Xe2 is the second generation of Intel Xe GPUs.

// This file defines the uArch details for Xe2 and its derived architectures.

// This includes Ponte Vecchio (PVC) and Battlemage (BMG) architectures.

//

//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_XEGPU_UARCH_INTELGPUXE2_H

#define MLIR_DIALECT_XEGPU_UARCH_INTELGPUXE2_H


#include "mlir/Dialect/XeGPU/uArch/uArchBase.h"

#include "mlir/IR/BuiltinTypes.h"

#include "mlir/IR/TypeUtilities.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Support/DebugLog.h"

#include <map>

#include <string>


using namespace mlir;

using namespace mlir::xegpu::uArch;


namespace mlir {

namespace xegpu {


namespace uArch {


struct Xe2Plus : public uArch {


  Xe2Plus(StringRef archName, StringRef archDescription,

          llvm::ArrayRef<const Instruction *> instructionRegistry,

          const XeCoreInfo &xeCore)

      : uArch(archName, archDescription, instructionRegistry), xeCore(xeCore) {}


  int getSubgroupSize() const override { return 16; }

  unsigned getGeneralPackedFormatBitSize() const override { return 32; }


protected:

  XeCoreInfo xeCore;

};


//===----------------------------------------------------------------------===//

// uArch instructions

//===----------------------------------------------------------------------===//


struct Subgroup2DBlockStoreInstruction : public Instruction {


  Subgroup2DBlockStoreInstruction()

      : Instruction(InstructionKind::Subgroup2DBlockStore,

                    InstructionScope::Subgroup) {}


  static bool classof(const Instruction *B) {

    return B->getInstructionKind() == InstructionKind::Subgroup2DBlockStore;

  }


  // Source :

  // https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_2d_block_io.html#_add_a_new_section_5_2_x_cl_intel_subgroup_2d_block_io

  std::optional<

      std::tuple<llvm::ArrayRef<int>, llvm::ArrayRef<int>, llvm::ArrayRef<int>>>


  getBlockWidthHeightCount(Type elemTy) const {

    const static int kHeight[] = {1, 2, 4, 8};

    const static int kWidth16[] = {16};

    const static int kWidth32[] = {16};

    const static int kCount[] = {1};

    const int elemByteSize = elemTy.getIntOrFloatBitWidth() / 8;

    if (elemByteSize == 1)

      return std::make_tuple(llvm::ArrayRef<int>(kWidth32),

                             llvm::ArrayRef<int>(kHeight),

                             llvm::ArrayRef<int>(kCount));

    else if (elemByteSize == 2 || elemByteSize == 4)

      return std::make_tuple(llvm::ArrayRef<int>(kWidth16),

                             llvm::ArrayRef<int>(kHeight),

                             llvm::ArrayRef<int>(kCount));

    return std::nullopt;

  }


  int32_t getPackedFormatBitSize() const { return 16; }

};


struct Subgroup2DBlockLoadInstruction : public Instruction {


  Subgroup2DBlockLoadInstruction()

      : Instruction(InstructionKind::Subgroup2DBlockLoad,

                    InstructionScope::Subgroup) {}


  static bool classof(const Instruction *B) {

    return B->getInstructionKind() == InstructionKind::Subgroup2DBlockLoad;

  }


  // Source :

  // https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_2d_block_io.html#_add_a_new_section_5_2_x_cl_intel_subgroup_2d_block_io

  std::optional<

      std::tuple<llvm::ArrayRef<int>, llvm::ArrayRef<int>, llvm::ArrayRef<int>>>


  getBlockWidthHeightCount(Type elemTy, bool hasTransform, bool hasTranspose,

                           bool upConv = false) const {

    static const int kHeightAtLeast1[] = {1, 2, 4, 8, 16, 32};

    static const int kHeightAtLeast8[] = {8, 16, 32};

    static const int kHeightAtLeast16[] = {16, 32};

    static const int kHeightAtLeast32[] = {32};


    static const int kWidth32[] = {32};

    static const int kWidth16[] = {16};

    static const int kWidth8[] = {8};


    static const int32_t kCount1[] = {1};

    static const int32_t kCount2[] = {1, 2};

    static const int32_t kCount4[] = {1, 2, 4};

    static const int32_t kCount4Only[] = {4};

    // (elemBytes, transform, transpose, upConvert)

    using Key = std::tuple<int, uint8_t, uint8_t, uint8_t>;

    // (widths, heights, counts)

    using Value = std::tuple<llvm::ArrayRef<int32_t>, llvm::ArrayRef<int32_t>,

                             llvm::ArrayRef<int32_t>>;

    static const llvm::DenseMap<Key, Value> kMap = {

        {{1, false, false, false}, {kWidth32, kHeightAtLeast1, kCount2}},

        {{1, false, false, true}, {kWidth16, kHeightAtLeast8, kCount4Only}},

        {{2, false, false, false}, {kWidth16, kHeightAtLeast1, kCount2}},

        {{4, false, false, false}, {kWidth16, kHeightAtLeast1, kCount1}},

        // Block Loads with Transform:

        {{1, true, false, false}, {kWidth16, kHeightAtLeast32, kCount4}},

        {{2, true, false, false}, {kWidth16, kHeightAtLeast16, kCount2}},

        // Block Loads with Transpose:

        {{4, false, true, false}, {kWidth8, kHeightAtLeast16, kCount1}},

    };

    const int elemByteSize = elemTy.getIntOrFloatBitWidth() / 8;

    auto it = kMap.find({elemByteSize, hasTransform, hasTranspose, upConv});

    if (it != kMap.end())

      return it->second;

    return std::nullopt;

  }


  int32_t getPackedFormatBitSize() const { return 16; }

};


struct Subgroup2DBlockPrefetchInstruction : public Instruction {


  Subgroup2DBlockPrefetchInstruction()

      : Instruction(InstructionKind::Subgroup2DBlockPrefetch,

                    InstructionScope::Subgroup) {}


  static bool classof(const Instruction *B) {

    return B->getInstructionKind() == InstructionKind::Subgroup2DBlockPrefetch;

  }


  // Source :

  // https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html#_add_a_new_section_6_15_x_sub_group_prefetch_functions

  std::optional<

      std::tuple<llvm::ArrayRef<int>, llvm::ArrayRef<int>, llvm::ArrayRef<int>>>


  getBlockWidthHeightCount(Type elemTy) const {

    static const int kHeightAtLeast1[] = {1, 2, 4, 8, 16, 32};


    static const int kWidth32[] = {32};

    static const int kWidth16[] = {16};


    static const int32_t kCount1[] = {1};

    static const int32_t kCount2[] = {1, 2};

    // elemBytes

    using Key = int;

    // (widths, heights, counts)

    using Value = std::tuple<llvm::ArrayRef<int32_t>, llvm::ArrayRef<int32_t>,

                             llvm::ArrayRef<int32_t>>;

    static const llvm::DenseMap<Key, Value> kMap = {

        {1, {kWidth32, kHeightAtLeast1, kCount2}},

        {2, {kWidth16, kHeightAtLeast1, kCount2}},

        {4, {kWidth16, kHeightAtLeast1, kCount1}},

    };

    const int elemByteSize = elemTy.getIntOrFloatBitWidth() / 8;

    auto it = kMap.find(elemByteSize);

    if (it != kMap.end())

      return it->second;

    return std::nullopt;

  }


  int32_t getPackedFormatBitSize() const { return 16; }

};


struct SubgroupMatrixMultiplyAcc : public Instruction,

                                   public MMAInstructionInterface {


  SubgroupMatrixMultiplyAcc(unsigned packedFormatBitSizeA,

                            unsigned packedFormatBitSizeB)

      : Instruction(InstructionKind::SubgroupMatrixMultiplyAcc,

                    InstructionScope::Subgroup),

        packedFormatBitSizeA(packedFormatBitSizeA),

        packedFormatBitSizeB(packedFormatBitSizeB) {}


  static bool classof(const Instruction *B) {

    return B->getInstructionKind() ==

           InstructionKind::SubgroupMatrixMultiplyAcc;

  }


  // Source:

  // https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_matrix_multiply_accumulate.html


  // Override all virtuals from MatrixOpInterface

  virtual llvm::SmallVector<std::pair<uint32_t, uint32_t>, 16>

  getSupportedShapes(Type dataType, MMAOpndKind matrixType) override;

  virtual llvm::SmallVector<Type, 8>

  getSupportedTypes(MLIRContext &context, MMAOpndKind matrixType) override;

  virtual bool

  checkSupportedShapesAndTypes(std::pair<uint32_t, uint32_t> AShape,

                               std::pair<uint32_t, uint32_t> BShape,

                               std::pair<uint32_t, uint32_t> CShape,

                               std::pair<uint32_t, uint32_t> DShape, Type AType,

                               Type BType, Type CType, Type DType) override;

  virtual bool checkSupportedTypes(Type AType, Type BType, Type CType,

                                   Type DType) override;

  virtual bool validate(std::pair<uint32_t, uint32_t> AShape,

                        std::pair<uint32_t, uint32_t> BShape,

                        std::pair<uint32_t, uint32_t> CShape,

                        std::pair<uint32_t, uint32_t> DShape, Type AType,

                        Type BType, Type CType, Type DType) override;

  virtual llvm::SmallVector<uint32_t, 8>

  getSupportedM(Type type) const override;

  virtual llvm::SmallVector<uint32_t, 8>

  getSupportedK(Type type) const override;

  virtual llvm::SmallVector<uint32_t, 8>

  getSupportedN(Type type) const override;


  unsigned getPackedFormatBitSizeA() const { return packedFormatBitSizeA; }

  unsigned getPackedFormatBitSizeB() const { return packedFormatBitSizeB; }


protected:

  const unsigned packedFormatBitSizeA;

  const unsigned packedFormatBitSizeB;

};


//===----------------------------------------------------------------------===//

// uArch instances

//===----------------------------------------------------------------------===//


struct PVCuArch final : public Xe2Plus {


  static llvm::ArrayRef<const Instruction *> getInstructionRegistryArr() {

    static const SubgroupMatrixMultiplyAcc dpasInst{16, 32};

    static const Subgroup2DBlockLoadInstruction loadNdInst;

    static const Subgroup2DBlockStoreInstruction storeNdInst;

    static const Subgroup2DBlockPrefetchInstruction prefetchNdInst;

    static const Instruction *arr[] = {&dpasInst, &loadNdInst, &storeNdInst,

                                       &prefetchNdInst};

    return arr;

  }


  PVCuArch()

      : Xe2Plus("pvc",                        // archName

                "Ponte Vecchio Architecture", // archDescription

                getInstructionRegistryArr(),

                XeCoreInfo(8, SharedMemory(512 * 1024, 4), 8, 8) // xeCore

        ) {}


  static const uArch *getInstance() {

    static const PVCuArch instance;

    return reinterpret_cast<const uArch *>(&instance);

  }


};


struct BMGuArch : public Xe2Plus {


  static llvm::ArrayRef<const Instruction *> getInstructionRegistryArr() {

    static const SubgroupMatrixMultiplyAcc dpasInst{16, 32};

    static const Subgroup2DBlockLoadInstruction loadNdInst;

    static const Subgroup2DBlockStoreInstruction storeNdInst;

    static const Subgroup2DBlockPrefetchInstruction prefetchNdInst;

    static const Instruction *arr[] = {&dpasInst, &loadNdInst, &storeNdInst,

                                       &prefetchNdInst};

    return arr;

  }


  BMGuArch()

      : Xe2Plus("bmg",                     // archName

                "Battlemage Architecture", // archDescription

                getInstructionRegistryArr(),

                XeCoreInfo(8, SharedMemory(256 * 1024, 4), 8, 8) // xeCore

        ) {}


  static const uArch *getInstance() {

    static const BMGuArch instance;

    return reinterpret_cast<const uArch *>(&instance);

  }


};


inline const uArch *getUArch(llvm::StringRef archName) {

  if (archName.equals_insensitive("pvc"))

    return PVCuArch::getInstance();

  else if (archName.equals_insensitive("bmg"))

    return BMGuArch::getInstance();

  else

    llvm_unreachable("No matching uArch found");


  return nullptr;

}


} // namespace uArch


} // namespace xegpu

} // namespace mlir


//===----------------------------------------------------------------------===//

// Instruction implementations

//===----------------------------------------------------------------------===//


inline llvm::SmallVector<std::pair<uint32_t, uint32_t>, 16>


SubgroupMatrixMultiplyAcc::getSupportedShapes(Type dataType,

                                              MMAOpndKind matrixType) {

  auto combineVectors = [](const llvm::SmallVector<uint32_t, 8> &a,

                           const llvm::SmallVector<uint32_t, 8> &b)

      -> llvm::SmallVector<std::pair<uint32_t, uint32_t>, 16> {

    llvm::SmallVector<std::pair<uint32_t, uint32_t>, 16> result;

    for (unsigned x : a) {

      for (unsigned y : b) {

        result.emplace_back(x, y);

      }

    }

    return result;

  };


  auto M = getSupportedM(dataType);

  auto K = getSupportedK(dataType);

  auto N = getSupportedN(dataType);

  llvm::SmallVector<std::pair<unsigned, unsigned>, 16> resultMatrix;


  switch (matrixType) {

  case MMAOpndKind::MatrixA:

    resultMatrix = combineVectors(M, K);

    break;

  case MMAOpndKind::MatrixB:

    resultMatrix = combineVectors(K, N);

    break;

  case MMAOpndKind::MatrixC:

    resultMatrix = combineVectors(M, N);

    break;

  case MMAOpndKind::MatrixD:

    resultMatrix = combineVectors(M, N);

    break;

  }

  return resultMatrix;

}


inline llvm::SmallVector<Type, 8>


SubgroupMatrixMultiplyAcc::getSupportedTypes(MLIRContext &context,

                                             MMAOpndKind matrixType) {

  Type bf16Type = BFloat16Type::get(&context);

  Type f16Type = Float16Type::get(&context);

  Type tf32Type = FloatTF32Type::get(&context);

  Type f32Type = Float32Type::get(&context);


  switch (matrixType) {

  case MMAOpndKind::MatrixA:

    return {bf16Type, f16Type, tf32Type};

  case MMAOpndKind::MatrixB:

    return {bf16Type, f16Type, tf32Type};

  case MMAOpndKind::MatrixC:

    return {bf16Type, f16Type, f32Type};

  case MMAOpndKind::MatrixD:

    return {bf16Type, f16Type, f32Type};

  }

  return {};

}


inline bool SubgroupMatrixMultiplyAcc::checkSupportedTypes(Type AType,

                                                           Type BType,

                                                           Type CType,

                                                           Type DType) {

  if (AType.isF16() || BType.isF16()) {

    if (AType != BType || (CType && (!CType.isF32() && !CType.isF16())) ||

        (!DType.isF32() && !DType.isF16())) {

      LDBG() << "Unsupported dpas combinations of Dst, Acc, A and B matrices.";

      return false;

    }

  } else if (AType.isBF16() || BType.isBF16()) {

    if (AType != BType || (CType && (!CType.isF32() && !CType.isBF16())) ||

        (!DType.isF32() && !DType.isBF16())) {

      LDBG() << "Unsupported dpas combinations of Dst, Acc, A and B matrices.";

      return false;

    }

  } else if (AType.isTF32() || BType.isTF32()) {

    if (AType != BType || (CType && (!CType.isF32() && !DType.isF32())) ||

        (!DType.isF32())) {

      LDBG() << "Unsupported dpas combinations of Dst, Acc, A and B matrices.";

      return false;

    }

  } else if (!(AType.isInteger(2) || AType.isInteger(4) ||

               AType.isInteger(8)) &&

             !(BType.isInteger(2) || BType.isInteger(4) ||

               BType.isInteger(8))) {

    LDBG() << "Unsupported dpas combinations of Dst, Acc, A and B matrices.";

    return false;

  }


  return true;

}


inline bool SubgroupMatrixMultiplyAcc::checkSupportedShapesAndTypes(

    std::pair<uint32_t, uint32_t> AShape, std::pair<uint32_t, uint32_t> BShape,

    std::pair<uint32_t, uint32_t> CShape, std::pair<uint32_t, uint32_t> DShape,

    Type AType, Type BType, Type CType, Type DType) {

  auto supportedAShapes = getSupportedShapes(AType, MMAOpndKind::MatrixA);

  auto supportedBShapes = getSupportedShapes(BType, MMAOpndKind::MatrixB);

  auto supportedCShapes = getSupportedShapes(CType, MMAOpndKind::MatrixC);

  auto supportedDShapes = getSupportedShapes(DType, MMAOpndKind::MatrixD);

  return llvm::is_contained(supportedAShapes, AShape) &&

         llvm::is_contained(supportedBShapes, BShape) &&

         llvm::is_contained(supportedCShapes, CShape) &&

         llvm::is_contained(supportedDShapes, DShape) &&

         checkSupportedTypes(AType, BType, CType, DType);

}


inline bool SubgroupMatrixMultiplyAcc::validate(

    std::pair<uint32_t, uint32_t> AShape, std::pair<uint32_t, uint32_t> BShape,

    std::pair<uint32_t, uint32_t> CShape, std::pair<uint32_t, uint32_t> DShape,

    Type AType, Type BType, Type CType, Type DType) {

  return checkSupportedShapesAndTypes(AShape, BShape, CShape, DShape, AType,

                                      BType, CType, DType);

}


inline llvm::SmallVector<uint32_t, 8>


SubgroupMatrixMultiplyAcc::getSupportedM(Type type) const {

  return {1, 2, 3, 4, 5, 6, 7, 8};

}


inline llvm::SmallVector<uint32_t, 8>


SubgroupMatrixMultiplyAcc::getSupportedK(Type type) const {

  // assert if data type is not int or float type

  assert(type.isIntOrFloat() && "Matrix type must be int or float");

  auto bitWidth = type.getIntOrFloatBitWidth();

  uint32_t kSize = 0;

  switch (bitWidth) {

  case 2:

    kSize = 64;

    break;

  case 4:

    kSize = 64;

    break;

  case 8:

    kSize = 32;

    break;

  case 16:

    kSize = 16;

    break;

  case 32:

    kSize = 8;

    break;

  default:

    llvm_unreachable("Invalid int or float");

  }

  return {kSize};

}


inline llvm::SmallVector<uint32_t, 8>


SubgroupMatrixMultiplyAcc::getSupportedN(Type type) const {

  return {16};

}


#endif // MLIR_DIALECT_XEGPU_UARCH_INTELGPUXE2_H

b
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
Definition LinalgTransformOps.cpp:2097

result
result
Definition LinalgTransformOps.cpp:2098

TypeUtilities.h

llvm::ArrayRef
Definition LLVM.h:48

llvm::DenseMap
Definition LLVM.h:55

llvm::SmallVector
Definition LLVM.h:72

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74

mlir::Type::isTF32
bool isTF32() const
Definition Types.cpp:39

mlir::Type::isF32
bool isF32() const
Definition Types.cpp:40

mlir::Type::isInteger
bool isInteger() const
Return true if this is an integer type (with the specified width).
Definition Types.cpp:56

mlir::Type::isIntOrFloat
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
Definition Types.cpp:116

mlir::Type::isF16
bool isF16() const
Definition Types.cpp:38

mlir::Type::getIntOrFloatBitWidth
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
Definition Types.cpp:122

mlir::Type::isBF16
bool isBF16() const
Definition Types.cpp:37

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96

BuiltinTypes.h

mlir::xegpu::uArch
Definition IntelGpuXe2.h:31

mlir::xegpu::uArch::InstructionKind
InstructionKind
Definition uArchBase.h:36

mlir::xegpu::uArch::InstructionKind::Subgroup2DBlockPrefetch
@ Subgroup2DBlockPrefetch
Definition uArchBase.h:41

mlir::xegpu::uArch::InstructionKind::SubgroupMatrixMultiplyAcc
@ SubgroupMatrixMultiplyAcc
Definition uArchBase.h:37

mlir::xegpu::uArch::InstructionKind::Subgroup2DBlockLoad
@ Subgroup2DBlockLoad
Definition uArchBase.h:40

mlir::xegpu::uArch::InstructionKind::Subgroup2DBlockStore
@ Subgroup2DBlockStore
Definition uArchBase.h:39

mlir::xegpu::uArch::getUArch
const uArch * getUArch(llvm::StringRef archName)
Definition IntelGpuXe2.h:268

mlir::xegpu::uArch::MMAOpndKind
MMAOpndKind
Definition uArchBase.h:207

mlir::xegpu::uArch::MMAOpndKind::MatrixD
@ MatrixD
Definition uArchBase.h:207

mlir::xegpu::uArch::MMAOpndKind::MatrixA
@ MatrixA
Definition uArchBase.h:207

mlir::xegpu::uArch::MMAOpndKind::MatrixC
@ MatrixC
Definition uArchBase.h:207

mlir::xegpu::uArch::MMAOpndKind::MatrixB
@ MatrixB
Definition uArchBase.h:207

mlir::xegpu::uArch::InstructionScope
InstructionScope
Definition uArchBase.h:35

mlir::xegpu::uArch::InstructionScope::Subgroup
@ Subgroup
Definition uArchBase.h:35

mlir::xegpu
Definition XeGPU.h:25

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::xegpu::uArch::BMGuArch::BMGuArch
BMGuArch()
Definition IntelGpuXe2.h:256

mlir::xegpu::uArch::BMGuArch::getInstructionRegistryArr
static llvm::ArrayRef< const Instruction * > getInstructionRegistryArr()
Definition IntelGpuXe2.h:246

mlir::xegpu::uArch::BMGuArch::getInstance
static const uArch * getInstance()
Definition IntelGpuXe2.h:262

mlir::xegpu::uArch::Instruction
Definition uArchBase.h:50

mlir::xegpu::uArch::Instruction::Instruction
Instruction(InstructionKind kind, InstructionScope scope)
Definition uArchBase.h:51

mlir::xegpu::uArch::MMAInstructionInterface
Definition uArchBase.h:208

mlir::xegpu::uArch::PVCuArch::PVCuArch
PVCuArch()
Definition IntelGpuXe2.h:233

mlir::xegpu::uArch::PVCuArch::getInstructionRegistryArr
static llvm::ArrayRef< const Instruction * > getInstructionRegistryArr()
Definition IntelGpuXe2.h:223

mlir::xegpu::uArch::PVCuArch::getInstance
static const uArch * getInstance()
Definition IntelGpuXe2.h:239

mlir::xegpu::uArch::SharedMemory
Definition uArchBase.h:176

mlir::xegpu::uArch::Subgroup2DBlockLoadInstruction
Definition IntelGpuXe2.h:79

mlir::xegpu::uArch::Subgroup2DBlockLoadInstruction::Subgroup2DBlockLoadInstruction
Subgroup2DBlockLoadInstruction()
Definition IntelGpuXe2.h:80

mlir::xegpu::uArch::Subgroup2DBlockLoadInstruction::getPackedFormatBitSize
int32_t getPackedFormatBitSize() const
Definition IntelGpuXe2.h:129

mlir::xegpu::uArch::Subgroup2DBlockLoadInstruction::getBlockWidthHeightCount
std::optional< std::tuple< llvm::ArrayRef< int >, llvm::ArrayRef< int >, llvm::ArrayRef< int > > > getBlockWidthHeightCount(Type elemTy, bool hasTransform, bool hasTranspose, bool upConv=false) const
Definition IntelGpuXe2.h:91

mlir::xegpu::uArch::Subgroup2DBlockLoadInstruction::classof
static bool classof(const Instruction *B)
Definition IntelGpuXe2.h:83

mlir::xegpu::uArch::Subgroup2DBlockPrefetchInstruction
Definition IntelGpuXe2.h:132

mlir::xegpu::uArch::Subgroup2DBlockPrefetchInstruction::classof
static bool classof(const Instruction *B)
Definition IntelGpuXe2.h:136

mlir::xegpu::uArch::Subgroup2DBlockPrefetchInstruction::getBlockWidthHeightCount
std::optional< std::tuple< llvm::ArrayRef< int >, llvm::ArrayRef< int >, llvm::ArrayRef< int > > > getBlockWidthHeightCount(Type elemTy) const
Definition IntelGpuXe2.h:143

mlir::xegpu::uArch::Subgroup2DBlockPrefetchInstruction::getPackedFormatBitSize
int32_t getPackedFormatBitSize() const
Definition IntelGpuXe2.h:167

mlir::xegpu::uArch::Subgroup2DBlockPrefetchInstruction::Subgroup2DBlockPrefetchInstruction
Subgroup2DBlockPrefetchInstruction()
Definition IntelGpuXe2.h:133

mlir::xegpu::uArch::Subgroup2DBlockStoreInstruction
Definition IntelGpuXe2.h:48

mlir::xegpu::uArch::Subgroup2DBlockStoreInstruction::classof
static bool classof(const Instruction *B)
Definition IntelGpuXe2.h:52

mlir::xegpu::uArch::Subgroup2DBlockStoreInstruction::Subgroup2DBlockStoreInstruction
Subgroup2DBlockStoreInstruction()
Definition IntelGpuXe2.h:49

mlir::xegpu::uArch::Subgroup2DBlockStoreInstruction::getBlockWidthHeightCount
std::optional< std::tuple< llvm::ArrayRef< int >, llvm::ArrayRef< int >, llvm::ArrayRef< int > > > getBlockWidthHeightCount(Type elemTy) const
Definition IntelGpuXe2.h:59

mlir::xegpu::uArch::Subgroup2DBlockStoreInstruction::getPackedFormatBitSize
int32_t getPackedFormatBitSize() const
Definition IntelGpuXe2.h:76

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc
Definition IntelGpuXe2.h:171

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::getSupportedShapes
virtual llvm::SmallVector< std::pair< uint32_t, uint32_t >, 16 > getSupportedShapes(Type dataType, MMAOpndKind matrixType) override
Definition IntelGpuXe2.h:288

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::getSupportedN
virtual llvm::SmallVector< uint32_t, 8 > getSupportedN(Type type) const override
Definition IntelGpuXe2.h:435

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::validate
virtual bool validate(std::pair< uint32_t, uint32_t > AShape, std::pair< uint32_t, uint32_t > BShape, std::pair< uint32_t, uint32_t > CShape, std::pair< uint32_t, uint32_t > DShape, Type AType, Type BType, Type CType, Type DType) override
Definition IntelGpuXe2.h:393

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::getSupportedM
virtual llvm::SmallVector< uint32_t, 8 > getSupportedM(Type type) const override
Definition IntelGpuXe2.h:402

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::getSupportedK
virtual llvm::SmallVector< uint32_t, 8 > getSupportedK(Type type) const override
Definition IntelGpuXe2.h:407

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::packedFormatBitSizeA
const unsigned packedFormatBitSizeA
Definition IntelGpuXe2.h:214

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::SubgroupMatrixMultiplyAcc
SubgroupMatrixMultiplyAcc(unsigned packedFormatBitSizeA, unsigned packedFormatBitSizeB)
Definition IntelGpuXe2.h:172

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::classof
static bool classof(const Instruction *B)
Definition IntelGpuXe2.h:178

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::getPackedFormatBitSizeB
unsigned getPackedFormatBitSizeB() const
Definition IntelGpuXe2.h:211

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::getPackedFormatBitSizeA
unsigned getPackedFormatBitSizeA() const
Definition IntelGpuXe2.h:210

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::getSupportedTypes
virtual llvm::SmallVector< Type, 8 > getSupportedTypes(MLIRContext &context, MMAOpndKind matrixType) override
Definition IntelGpuXe2.h:325

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::checkSupportedShapesAndTypes
virtual bool checkSupportedShapesAndTypes(std::pair< uint32_t, uint32_t > AShape, std::pair< uint32_t, uint32_t > BShape, std::pair< uint32_t, uint32_t > CShape, std::pair< uint32_t, uint32_t > DShape, Type AType, Type BType, Type CType, Type DType) override
Definition IntelGpuXe2.h:378

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::checkSupportedTypes
virtual bool checkSupportedTypes(Type AType, Type BType, Type CType, Type DType) override
Definition IntelGpuXe2.h:345

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc::packedFormatBitSizeB
const unsigned packedFormatBitSizeB
Definition IntelGpuXe2.h:215

mlir::xegpu::uArch::Xe2Plus::xeCore
XeCoreInfo xeCore
Definition IntelGpuXe2.h:42

mlir::xegpu::uArch::Xe2Plus::getGeneralPackedFormatBitSize
unsigned getGeneralPackedFormatBitSize() const override
Definition IntelGpuXe2.h:39

mlir::xegpu::uArch::Xe2Plus::getSubgroupSize
int getSubgroupSize() const override
Definition IntelGpuXe2.h:38

mlir::xegpu::uArch::Xe2Plus::Xe2Plus
Xe2Plus(StringRef archName, StringRef archDescription, llvm::ArrayRef< const Instruction * > instructionRegistry, const XeCoreInfo &xeCore)
Definition IntelGpuXe2.h:34

mlir::xegpu::uArch::XeCoreInfo
Definition uArchBase.h:191

mlir::xegpu::uArch::uArch
Definition uArchBase.h:143

mlir::xegpu::uArch::uArch::instructionRegistry
llvm::SmallDenseMap< InstructionKind, const Instruction *, 32 > instructionRegistry
Definition uArchBase.h:172

mlir::xegpu::uArch::uArch::uArch
uArch(StringRef name, StringRef description, llvm::ArrayRef< const Instruction * > instructionRegistry)
Definition uArchBase.h:145

uArchBase.h