doxygen/IndexedAccessOpInterfaceImpl_8cpp_source.html

//===- IndexedAccessOpInterfaceImpl.cpp -----------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

// Implement IndexedAccessOpInterface on GPU dialect operations that have

// %memref[%i0, %i1, ...] arguments to allow them to be manipulated by

// generic memref-dialect passes.

//===----------------------------------------------------------------------===//


#include "mlir/Dialect/GPU/Transforms/IndexedAccessOpInterfaceImpl.h"


#include "mlir/Dialect/GPU/IR/GPUDialect.h"

#include "mlir/Dialect/MemRef/IR/MemoryAccessOpInterfaces.h"

#include "mlir/IR/Dialect.h"

#include "mlir/IR/Operation.h"

#include "mlir/IR/PatternMatch.h"


using namespace mlir;

using namespace mlir::memref;

using namespace mlir::gpu;


/// Given a GPU matrix type that will be loaded or stored, the leading dimension

/// of the matrix in memory, and whether or not the matrix is transposed,

/// compute the size of the linear memory that the load/store spans as

/// dC + leadingDim * (dR - 1) where dR and dC are the non-contiguous and

/// contiguous matrix dimensions, respectively (we get to the dX-1th row and

/// then access the first dY elements of it).


static int64_t get1DAccessSize(MMAMatrixType matrixType, int64_t leadingDim,

                               bool transpose) {

  assert(matrixType.getShape().size() == 2 && "expected matrices to be 2D");


  int64_t c = matrixType.getShape()[1];

  int64_t r = matrixType.getShape()[0];

  if (transpose)

    std::swap(c, r);

  return c + leadingDim * (r - 1);

}


namespace {

struct SubgroupMmaLoadMatrixOpImpl final

    : IndexedAccessOpInterface::ExternalModel<SubgroupMmaLoadMatrixOpImpl,

                                              SubgroupMmaLoadMatrixOp> {

  TypedValue<MemRefType> getAccessedMemref(Operation *op) const {

    return cast<SubgroupMmaLoadMatrixOp>(op).getSrcMemref();

  }


  Operation::operand_range getIndices(Operation *op) const {

    return cast<SubgroupMmaLoadMatrixOp>(op).getIndices();

  }


  /// This returns a 1-D shape so that it's clear that both linearization and

  /// folding in expand/collapse_shape operations are allowed.

  SmallVector<int64_t> getAccessedShape(Operation *op) const {

    auto loadOp = cast<SubgroupMmaLoadMatrixOp>(op);

    return {get1DAccessSize(cast<MMAMatrixType>(loadOp.getRes().getType()),

                            loadOp.getLeadDimension().getZExtValue(),

                            loadOp.getTranspose().value_or(false))};

  }


  std::optional<SmallVector<Value>>

  updateMemrefAndIndices(Operation *op, RewriterBase &rewriter, Value newMemref,

                         ValueRange newIndices) const {

    auto loadOp = cast<SubgroupMmaLoadMatrixOp>(op);

    rewriter.modifyOpInPlace(loadOp, [&]() {

      loadOp.getSrcMemrefMutable().assign(newMemref);

      loadOp.getIndicesMutable().assign(newIndices);

    });

    return std::nullopt;

  }


  bool hasInboundsIndices(Operation *) const { return true; }

};


struct SubgroupMmaStoreMatrixOpImpl final

    : IndexedAccessOpInterface::ExternalModel<SubgroupMmaStoreMatrixOpImpl,

                                              SubgroupMmaStoreMatrixOp> {

  TypedValue<MemRefType> getAccessedMemref(Operation *op) const {

    return cast<SubgroupMmaStoreMatrixOp>(op).getDstMemref();

  }


  Operation::operand_range getIndices(Operation *op) const {

    return cast<SubgroupMmaStoreMatrixOp>(op).getIndices();

  }


  /// This returns a 1-D shape so that it's clear that both linearization and

  /// folding in expand/collapse_shape operations are allowed.

  SmallVector<int64_t> getAccessedShape(Operation *op) const {

    auto storeOp = cast<SubgroupMmaStoreMatrixOp>(op);

    return {get1DAccessSize(storeOp.getSrc().getType(),

                            storeOp.getLeadDimension().getZExtValue(),

                            storeOp.getTranspose().value_or(false))};

  }


  std::optional<SmallVector<Value>>

  updateMemrefAndIndices(Operation *op, RewriterBase &rewriter, Value newMemref,

                         ValueRange newIndices) const {

    auto storeOp = cast<SubgroupMmaStoreMatrixOp>(op);

    rewriter.modifyOpInPlace(storeOp, [&]() {

      storeOp.getDstMemrefMutable().assign(newMemref);

      storeOp.getIndicesMutable().assign(newIndices);

    });

    return std::nullopt;

  }


  bool hasInboundsIndices(Operation *) const { return true; }

};

} // namespace


void mlir::gpu::registerIndexedAccessOpInterfaceExternalModels(

    DialectRegistry &registry) {

  registry.addExtension(+[](MLIRContext *ctx, gpu::GPUDialect *dialect) {

    SubgroupMmaLoadMatrixOp::attachInterface<SubgroupMmaLoadMatrixOpImpl>(*ctx);

    SubgroupMmaStoreMatrixOp::attachInterface<SubgroupMmaStoreMatrixOpImpl>(

        *ctx);

  });

}


GPUDialect.h

Dialect.h

Operation.h

get1DAccessSize
static int64_t get1DAccessSize(MMAMatrixType matrixType, int64_t leadingDim, bool transpose)
Given a GPU matrix type that will be loaded or stored, the leading dimension of the matrix in memory,...
Definition IndexedAccessOpInterfaceImpl.cpp:31

IndexedAccessOpInterfaceImpl.h

ValueRange
b ValueRange
Definition LinalgTransformOps.cpp:2107

MemoryAccessOpInterfaces.h

PatternMatch.h

int64_t

mlir::DialectRegistry
The DialectRegistry maps a dialect namespace to a constructor for the matching dialect.
Definition DialectRegistry.h:139

mlir::DialectRegistry::addExtension
bool addExtension(TypeID extensionID, std::unique_ptr< DialectExtensionBase > extension)
Add the given extension to the registry.
Definition DialectRegistry.h:215

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63

mlir::Operation::operand_range
OperandRange operand_range
Definition Operation.h:397

mlir::RewriterBase::modifyOpInPlace
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
Definition PatternMatch.h:644

mlir::gpu::MMAMatrixType
MMAMatrix represents a matrix held by a subgroup for matrix-matrix multiply accumulate operations.
Definition GPUDialect.h:131

mlir::gpu::MMAMatrixType::getShape
ArrayRef< int64_t > getShape() const
Get shape of the matrix.
Definition GPUDialect.cpp:203

mlir::gpu
Definition GPUCommonPass.h:35

mlir::gpu::registerIndexedAccessOpInterfaceExternalModels
void registerIndexedAccessOpInterfaceExternalModels(DialectRegistry &registry)
Definition IndexedAccessOpInterfaceImpl.cpp:112

mlir::memref
Definition Passes.h:27

mlir::nvgpu::getIndices
Operation::operand_range getIndices(Operation *op)
Get the indices that the given load/store operation is operating on.
Definition Utils.cpp:18

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::TypedValue
std::conditional_t< std::is_same_v< Ty, mlir::Type >, mlir::Value, detail::TypedValue< Ty > > TypedValue
If Ty is mlir::Type this will select Value instead of having a wrapper around it.
Definition Value.h:494