doxygen/uArchBase_8h_source.html

//===- uArch.h --------------------------------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// \file

// Base uArch definition for different architectures.

//

//

//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_XEGPU_UARCH_UARCHBASE_H

#define MLIR_DIALECT_XEGPU_UARCH_UARCHBASE_H


#include <any>

#include <functional>

#include <iostream>

#include <map>

#include <mutex>

#include <shared_mutex>

#include <tuple>


#include "mlir/IR/Types.h"

#include "llvm/ADT/SmallVector.h"


namespace mlir {

namespace xegpu {

namespace uArch {


constexpr unsigned generalPackedFormatBitSize{32};


// An enum class to represent the scope of an instruction

enum class InstructionScope { Lane, Subgroup, Workgroup, Cluster };


enum class InstructionKind {

  SubgroupMatrixMultiplyAcc, // Dot Product Accumulate Systolic (DPAS) is a

                             // matrix multiply-add operation

  Subgroup2DBlockStore,      // Subgroup-level 2D block write instruction

  Subgroup2DBlockLoad,       // Subgroup-level 2D block load instruction

  Subgroup2DBlockPrefetch    // Subgroup-level 2D block prefetch instruction

  // @TODO: Add more instructions as needed

};


// A struct to represent basic information about an instruction.

// The primary purpose of the Instruction struct is to provide a generic way to

// represent information about an instruction and to use this information to

// generate the uArch. Specifc instruction in a uArch can inherit from this

// struct and add more fields as needed.


struct Instruction {


  Instruction(InstructionKind kind, InstructionScope scope)

      : instKind(kind), scope(scope) {}


  ~Instruction() = default;

  // Get methods

  InstructionKind getInstructionKind() const { return instKind; }

  InstructionScope getScope() const { return scope; }


  static llvm::StringRef toString(InstructionKind instKind) {

    switch (instKind) {

    case InstructionKind::SubgroupMatrixMultiplyAcc:

      return "dpas";

    case InstructionKind::Subgroup2DBlockStore:

      return "store_nd";

    case InstructionKind::Subgroup2DBlockLoad:

      return "load_nd";

    case InstructionKind::Subgroup2DBlockPrefetch:

      return "prefetch_nd";

    }

    llvm_unreachable("Unknown InstructionKind");

  }


  static std::optional<InstructionKind>


  parseInstructionKind(llvm::StringRef str) {

    if (str.equals_insensitive("dpas"))

      return InstructionKind::SubgroupMatrixMultiplyAcc;

    return std::nullopt;

  }


protected:

  const InstructionKind instKind; // Specific InstructionKind (e.g., DPAS)

  const InstructionScope scope;   // scope of the instruction (e.g., lane,

                                  // subgroup, workgroup, cluster)

  // @TODO: Add more fields as needed

};


enum class RegisterFileMode : uint8_t { Small, Large };

enum class RegisterFileType : uint8_t { GRF, ARF };


// A struct to represent register file information


struct RegisterFileInfo {

  // Constructor

  RegisterFileInfo() = default;


  RegisterFileInfo(uint32_t size,

                   const llvm::SmallVector<RegisterFileMode, 4> &mode,

                   const llvm::SmallVector<uint32_t, 4> &numRegs)

      : size(size), mode(mode), numRegsPerThreadPerMode(numRegs) {}


  // Get methods

  uint32_t getSize() const { return size; }


  const llvm::SmallVector<RegisterFileMode, 4> &getModes() const {

    return mode;

  }


  const llvm::SmallVector<uint32_t, 4> &getNumRegsPerThreadPerMode() const {

    return numRegsPerThreadPerMode;

  }


protected:

  uint32_t size; // size per register in bits

  llvm::SmallVector<RegisterFileMode, 4>

      mode; // e.g., "small", "large" GRF modes

  llvm::SmallVector<uint32_t, 4>

      numRegsPerThreadPerMode; // number of registers per thread per mode

};


enum class CacheHierarchyLevel { L1 = 1, L2 = 2, L3 = 3 };


// A struct to represent cache information


struct CacheInfo {

  // Constructor

  CacheInfo() = default;


  CacheInfo(uint32_t size, uint32_t line_size,

            CacheHierarchyLevel hierarchy_level)

      : size(size), line_size(line_size), hierarchy_level(hierarchy_level) {}


  virtual ~CacheInfo() = default;


  // Get methods

  uint32_t getSize() const { return size; }

  uint32_t getLineSize() const { return line_size; }

  CacheHierarchyLevel getHierarchyLevel() const { return hierarchy_level; }


protected:

  uint32_t size;

  uint32_t line_size;

  CacheHierarchyLevel hierarchy_level;

  // @TODO: Add more fields as needed (e.g., associativity, num_banks,

  // bank_size, num_ports, port_width, bank_conflicts, hierarchy_level,

  // latency, throughput, bandwidth)

};


struct uArch {

  // Constructor


  uArch(StringRef name, StringRef description,

        llvm::ArrayRef<const Instruction *> instructionRegistry)

      : name(name), description(description) {

    for (const Instruction *instr : instructionRegistry)

      this->instructionRegistry[instr->getInstructionKind()] = instr;

  }


  virtual ~uArch() = default;

  StringRef getName() const { return name; }

  StringRef getDescription() const { return description; }

  virtual int getSubgroupSize() const = 0;

  virtual unsigned getGeneralPackedFormatBitSize() const = 0;


  const Instruction *getInstruction(InstructionKind instKind) const {

    auto it = instructionRegistry.find(instKind);

    assert(it != instructionRegistry.end() &&

           "Instruction not found in registry");

    return it->second;

  }


  bool isSupportedInstruction(InstructionKind instr) const {

    return instructionRegistry.contains(instr);

  }


protected:

  StringRef name;

  StringRef description;

  llvm::SmallDenseMap<InstructionKind, const Instruction *, 32>

      instructionRegistry;

};


// A struct to represent shared memory information


struct SharedMemory {

  // Constructor


  SharedMemory(uint32_t size, uint32_t alignment)

      : size(size), alignment(alignment) {}


  // Get methods

  uint32_t getSize() const { return size; }

  uint32_t getAlignment() const { return alignment; }


protected:

  uint32_t size;      // in bytes

  uint32_t alignment; // in bytes

  // @TODO: Add more fields as needed (e.g., latency, throughput, bandwidth)

};


struct XeCoreInfo {

  uint32_t num_threads;

  SharedMemory shared_memory;

  uint32_t num_vector_units;

  uint32_t num_matrix_units;


  XeCoreInfo(uint32_t num_threads, const SharedMemory &shared_memory,

             uint32_t num_vector_units, uint32_t num_matrix_units)

      : num_threads(num_threads), shared_memory(shared_memory),

        num_vector_units(num_vector_units), num_matrix_units(num_matrix_units) {

  }


};


//===----------------------------------------------------------------------===//

// Interfaces

//===----------------------------------------------------------------------===//

enum class MMAOpndKind { MatrixA, MatrixB, MatrixC, MatrixD };


struct MMAInstructionInterface {

  // Get supported Matrix shapes

  virtual llvm::SmallVector<std::pair<uint32_t, uint32_t>, 16>

  getSupportedShapes(Type dataType, MMAOpndKind matrixType) = 0;

  // @TODO: This method takes an context object as a parameter, this is to

  // create the Type objects from the same context. Since type objects are

  // uniqued in a specific context, to do things like "aType == bType" (where

  // aType and bType are both same type) kind of checks, the both types should

  // be from the same context.

  //

  // One alternative to this is to create enum to represent each types, but this

  // adds an extra burden to user to convert these enums to specific types. In

  // fact the utility that would convert enumToType() and vice versa would still

  // have to use the context object.

  //

  // Untill we have a better solution, we stick to passing context object to

  // this method.

  virtual llvm::SmallVector<Type, 8>

  getSupportedTypes(MLIRContext &context, MMAOpndKind matrixType) = 0;

  virtual bool

  checkSupportedShapesAndTypes(std::pair<uint32_t, uint32_t> AShape,

                               std::pair<uint32_t, uint32_t> BShape,

                               std::pair<uint32_t, uint32_t> CShape,

                               std::pair<uint32_t, uint32_t> DShape, Type AType,

                               Type BType, Type CType, Type DType) = 0;

  virtual bool checkSupportedTypes(Type AType, Type BType, Type CType,

                                   Type DType) = 0;

  virtual bool validate(std::pair<uint32_t, uint32_t> AShape,

                        std::pair<uint32_t, uint32_t> BShape,

                        std::pair<uint32_t, uint32_t> CShape,

                        std::pair<uint32_t, uint32_t> DShape, Type AType,

                        Type BType, Type CType, Type DType) = 0;

  virtual llvm::SmallVector<uint32_t, 8> getSupportedM(Type type) const = 0;

  virtual llvm::SmallVector<uint32_t, 8> getSupportedK(Type type) const = 0;

  virtual llvm::SmallVector<uint32_t, 8> getSupportedN(Type type) const = 0;


  virtual ~MMAInstructionInterface() = default;

};


} // namespace uArch

} // namespace xegpu

} // namespace mlir


#endif // MLIR_DIALECT_XEGPU_UARCH_UARCHBASE_H

Types.h

llvm::ArrayRef
Definition LLVM.h:48

llvm::SmallVector
Definition LLVM.h:72

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74

mlir::xegpu::uArch
Definition IntelGpuXe2.h:31

mlir::xegpu::uArch::InstructionKind
InstructionKind
Definition uArchBase.h:36

mlir::xegpu::uArch::InstructionKind::Subgroup2DBlockPrefetch
@ Subgroup2DBlockPrefetch
Definition uArchBase.h:41

mlir::xegpu::uArch::InstructionKind::SubgroupMatrixMultiplyAcc
@ SubgroupMatrixMultiplyAcc
Definition uArchBase.h:37

mlir::xegpu::uArch::InstructionKind::Subgroup2DBlockLoad
@ Subgroup2DBlockLoad
Definition uArchBase.h:40

mlir::xegpu::uArch::InstructionKind::Subgroup2DBlockStore
@ Subgroup2DBlockStore
Definition uArchBase.h:39

mlir::xegpu::uArch::generalPackedFormatBitSize
constexpr unsigned generalPackedFormatBitSize
Definition uArchBase.h:32

mlir::xegpu::uArch::MMAOpndKind
MMAOpndKind
Definition uArchBase.h:207

mlir::xegpu::uArch::MMAOpndKind::MatrixD
@ MatrixD
Definition uArchBase.h:207

mlir::xegpu::uArch::MMAOpndKind::MatrixA
@ MatrixA
Definition uArchBase.h:207

mlir::xegpu::uArch::MMAOpndKind::MatrixC
@ MatrixC
Definition uArchBase.h:207

mlir::xegpu::uArch::MMAOpndKind::MatrixB
@ MatrixB
Definition uArchBase.h:207

mlir::xegpu::uArch::RegisterFileMode
RegisterFileMode
Definition uArchBase.h:86

mlir::xegpu::uArch::RegisterFileMode::Small
@ Small
Definition uArchBase.h:86

mlir::xegpu::uArch::RegisterFileMode::Large
@ Large
Definition uArchBase.h:86

mlir::xegpu::uArch::InstructionScope
InstructionScope
Definition uArchBase.h:35

mlir::xegpu::uArch::InstructionScope::Lane
@ Lane
Definition uArchBase.h:35

mlir::xegpu::uArch::InstructionScope::Cluster
@ Cluster
Definition uArchBase.h:35

mlir::xegpu::uArch::InstructionScope::Subgroup
@ Subgroup
Definition uArchBase.h:35

mlir::xegpu::uArch::InstructionScope::Workgroup
@ Workgroup
Definition uArchBase.h:35

mlir::xegpu::uArch::RegisterFileType
RegisterFileType
Definition uArchBase.h:87

mlir::xegpu::uArch::RegisterFileType::GRF
@ GRF
Definition uArchBase.h:87

mlir::xegpu::uArch::RegisterFileType::ARF
@ ARF
Definition uArchBase.h:87

mlir::xegpu::uArch::CacheHierarchyLevel
CacheHierarchyLevel
Definition uArchBase.h:117

mlir::xegpu::uArch::CacheHierarchyLevel::L2
@ L2
Definition uArchBase.h:117

mlir::xegpu::uArch::CacheHierarchyLevel::L3
@ L3
Definition uArchBase.h:117

mlir::xegpu::uArch::CacheHierarchyLevel::L1
@ L1
Definition uArchBase.h:117

mlir::xegpu
Definition XeGPU.h:25

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::xegpu::uArch::CacheInfo::size
uint32_t size
Definition uArchBase.h:135

mlir::xegpu::uArch::CacheInfo::getLineSize
uint32_t getLineSize() const
Definition uArchBase.h:131

mlir::xegpu::uArch::CacheInfo::hierarchy_level
CacheHierarchyLevel hierarchy_level
Definition uArchBase.h:137

mlir::xegpu::uArch::CacheInfo::~CacheInfo
virtual ~CacheInfo()=default

mlir::xegpu::uArch::CacheInfo::CacheInfo
CacheInfo(uint32_t size, uint32_t line_size, CacheHierarchyLevel hierarchy_level)
Definition uArchBase.h:123

mlir::xegpu::uArch::CacheInfo::CacheInfo
CacheInfo()=default

mlir::xegpu::uArch::CacheInfo::line_size
uint32_t line_size
Definition uArchBase.h:136

mlir::xegpu::uArch::CacheInfo::getHierarchyLevel
CacheHierarchyLevel getHierarchyLevel() const
Definition uArchBase.h:132

mlir::xegpu::uArch::CacheInfo::getSize
uint32_t getSize() const
Definition uArchBase.h:130

mlir::xegpu::uArch::Instruction
Definition uArchBase.h:50

mlir::xegpu::uArch::Instruction::Instruction
Instruction(InstructionKind kind, InstructionScope scope)
Definition uArchBase.h:51

mlir::xegpu::uArch::Instruction::parseInstructionKind
static std::optional< InstructionKind > parseInstructionKind(llvm::StringRef str)
Definition uArchBase.h:73

mlir::xegpu::uArch::Instruction::scope
const InstructionScope scope
Definition uArchBase.h:81

mlir::xegpu::uArch::Instruction::getScope
InstructionScope getScope() const
Definition uArchBase.h:57

mlir::xegpu::uArch::Instruction::toString
static llvm::StringRef toString(InstructionKind instKind)
Definition uArchBase.h:58

mlir::xegpu::uArch::Instruction::~Instruction
~Instruction()=default

mlir::xegpu::uArch::Instruction::getInstructionKind
InstructionKind getInstructionKind() const
Definition uArchBase.h:56

mlir::xegpu::uArch::Instruction::instKind
const InstructionKind instKind
Definition uArchBase.h:80

mlir::xegpu::uArch::MMAInstructionInterface
Definition uArchBase.h:208

mlir::xegpu::uArch::MMAInstructionInterface::getSupportedShapes
virtual llvm::SmallVector< std::pair< uint32_t, uint32_t >, 16 > getSupportedShapes(Type dataType, MMAOpndKind matrixType)=0

mlir::xegpu::uArch::MMAInstructionInterface::getSupportedN
virtual llvm::SmallVector< uint32_t, 8 > getSupportedN(Type type) const =0

mlir::xegpu::uArch::MMAInstructionInterface::~MMAInstructionInterface
virtual ~MMAInstructionInterface()=default

mlir::xegpu::uArch::MMAInstructionInterface::checkSupportedShapesAndTypes
virtual bool checkSupportedShapesAndTypes(std::pair< uint32_t, uint32_t > AShape, std::pair< uint32_t, uint32_t > BShape, std::pair< uint32_t, uint32_t > CShape, std::pair< uint32_t, uint32_t > DShape, Type AType, Type BType, Type CType, Type DType)=0

mlir::xegpu::uArch::MMAInstructionInterface::checkSupportedTypes
virtual bool checkSupportedTypes(Type AType, Type BType, Type CType, Type DType)=0

mlir::xegpu::uArch::MMAInstructionInterface::getSupportedK
virtual llvm::SmallVector< uint32_t, 8 > getSupportedK(Type type) const =0

mlir::xegpu::uArch::MMAInstructionInterface::validate
virtual bool validate(std::pair< uint32_t, uint32_t > AShape, std::pair< uint32_t, uint32_t > BShape, std::pair< uint32_t, uint32_t > CShape, std::pair< uint32_t, uint32_t > DShape, Type AType, Type BType, Type CType, Type DType)=0

mlir::xegpu::uArch::MMAInstructionInterface::getSupportedM
virtual llvm::SmallVector< uint32_t, 8 > getSupportedM(Type type) const =0

mlir::xegpu::uArch::MMAInstructionInterface::getSupportedTypes
virtual llvm::SmallVector< Type, 8 > getSupportedTypes(MLIRContext &context, MMAOpndKind matrixType)=0

mlir::xegpu::uArch::RegisterFileInfo::getSize
uint32_t getSize() const
Definition uArchBase.h:99

mlir::xegpu::uArch::RegisterFileInfo::size
uint32_t size
Definition uArchBase.h:110

mlir::xegpu::uArch::RegisterFileInfo::RegisterFileInfo
RegisterFileInfo()=default

mlir::xegpu::uArch::RegisterFileInfo::mode
llvm::SmallVector< RegisterFileMode, 4 > mode
Definition uArchBase.h:112

mlir::xegpu::uArch::RegisterFileInfo::RegisterFileInfo
RegisterFileInfo(uint32_t size, const llvm::SmallVector< RegisterFileMode, 4 > &mode, const llvm::SmallVector< uint32_t, 4 > &numRegs)
Definition uArchBase.h:93

mlir::xegpu::uArch::RegisterFileInfo::numRegsPerThreadPerMode
llvm::SmallVector< uint32_t, 4 > numRegsPerThreadPerMode
Definition uArchBase.h:114

mlir::xegpu::uArch::RegisterFileInfo::getModes
const llvm::SmallVector< RegisterFileMode, 4 > & getModes() const
Definition uArchBase.h:101

mlir::xegpu::uArch::RegisterFileInfo::getNumRegsPerThreadPerMode
const llvm::SmallVector< uint32_t, 4 > & getNumRegsPerThreadPerMode() const
Definition uArchBase.h:105

mlir::xegpu::uArch::SharedMemory
Definition uArchBase.h:176

mlir::xegpu::uArch::SharedMemory::getAlignment
uint32_t getAlignment() const
Definition uArchBase.h:183

mlir::xegpu::uArch::SharedMemory::SharedMemory
SharedMemory(uint32_t size, uint32_t alignment)
Definition uArchBase.h:178

mlir::xegpu::uArch::SharedMemory::size
uint32_t size
Definition uArchBase.h:186

mlir::xegpu::uArch::SharedMemory::alignment
uint32_t alignment
Definition uArchBase.h:187

mlir::xegpu::uArch::SharedMemory::getSize
uint32_t getSize() const
Definition uArchBase.h:182

mlir::xegpu::uArch::SubgroupMatrixMultiplyAcc
Definition IntelGpuXe2.h:171

mlir::xegpu::uArch::XeCoreInfo::num_vector_units
uint32_t num_vector_units
Definition uArchBase.h:194

mlir::xegpu::uArch::XeCoreInfo::num_matrix_units
uint32_t num_matrix_units
Definition uArchBase.h:195

mlir::xegpu::uArch::XeCoreInfo::num_threads
uint32_t num_threads
Definition uArchBase.h:192

mlir::xegpu::uArch::XeCoreInfo::shared_memory
SharedMemory shared_memory
Definition uArchBase.h:193

mlir::xegpu::uArch::XeCoreInfo::XeCoreInfo
XeCoreInfo(uint32_t num_threads, const SharedMemory &shared_memory, uint32_t num_vector_units, uint32_t num_matrix_units)
Definition uArchBase.h:197

mlir::xegpu::uArch::uArch::instructionRegistry
llvm::SmallDenseMap< InstructionKind, const Instruction *, 32 > instructionRegistry
Definition uArchBase.h:172

mlir::xegpu::uArch::uArch::getDescription
StringRef getDescription() const
Definition uArchBase.h:153

mlir::xegpu::uArch::uArch::getGeneralPackedFormatBitSize
virtual unsigned getGeneralPackedFormatBitSize() const =0

mlir::xegpu::uArch::uArch::name
StringRef name
Definition uArchBase.h:169

mlir::xegpu::uArch::uArch::isSupportedInstruction
bool isSupportedInstruction(InstructionKind instr) const
Definition uArchBase.h:164

mlir::xegpu::uArch::uArch::getSubgroupSize
virtual int getSubgroupSize() const =0

mlir::xegpu::uArch::uArch::uArch
uArch(StringRef name, StringRef description, llvm::ArrayRef< const Instruction * > instructionRegistry)
Definition uArchBase.h:145

mlir::xegpu::uArch::uArch::getInstruction
const Instruction * getInstruction(InstructionKind instKind) const
Definition uArchBase.h:157

mlir::xegpu::uArch::uArch::~uArch
virtual ~uArch()=default

mlir::xegpu::uArch::uArch::getName
StringRef getName() const
Definition uArchBase.h:152

mlir::xegpu::uArch::uArch::description
StringRef description
Definition uArchBase.h:170