doxygen/ACCEmitRemarksLoop_8cpp_source.html

//===- ACCEmitRemarksLoop.cpp - Emit OpenACC loop mapping remarks --------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass emits optimization remarks describing how loops inside OpenACC

// compute regions are mapped to parallelism levels and GPU dimensions.

//

//===----------------------------------------------------------------------===//


#include "mlir/Dialect/Func/IR/FuncOps.h"

#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h"

#include "mlir/Dialect/OpenACC/OpenACC.h"

#include "mlir/Dialect/OpenACC/OpenACCParMapping.h"

#include "mlir/Dialect/OpenACC/OpenACCUtilsLoop.h"

#include "mlir/Dialect/OpenACC/Transforms/Passes.h"

#include "mlir/Dialect/SCF/IR/SCF.h"

#include "mlir/Interfaces/FunctionInterfaces.h"

#include "mlir/Interfaces/LoopLikeInterface.h"

#include "llvm/ADT/StringExtras.h"

#include "llvm/Support/Debug.h"


namespace mlir {

namespace acc {

#define GEN_PASS_DEF_ACCEMITREMARKSLOOP

#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc"

} // namespace acc

} // namespace mlir


#define DEBUG_TYPE "acc-emit-remarks-loop"


using namespace mlir;


namespace {


static bool shouldEmitLoopRemarks(acc::ComputeRegionOp computeRegion) {

  StringRef origin = computeRegion.getOrigin();

  if (origin == acc::KernelsOp::getOperationName() ||

      origin == acc::ParallelOp::getOperationName() ||

      origin == acc::SerialOp::getOperationName())

    return true;


  if (auto func = computeRegion->getParentOfType<FunctionOpInterface>())

    return acc::isSpecializedAccRoutine(func);

  return false;

}


static std::string getACCParLevelName(acc::GPUParallelDimAttr parDim,

                                      const acc::ACCToGPUMappingPolicy &policy,

                                      acc::ComputeRegionOp computeRegion) {

  std::string accName;

  if (policy.isSeq(parDim))

    accName = "sequential";

  else if (policy.isVector(parDim))

    accName = "vector";

  else if (policy.isWorker(parDim))

    accName = "worker";

  else if (policy.isGang(parDim))

    accName = "gang";


  if (!policy.isSeq(parDim)) {

    if (std::optional<uint64_t> constant =

            computeRegion.getKnownConstantLaunchArg(parDim))

      accName += "(" + std::to_string(*constant) + ")";

  }

  return accName;

}


static std::string getGPUParDimName(acc::GPUParallelDimAttr parDim,

                                    llvm::StringRef separator) {

  auto formatDim = [&](llvm::StringRef prefix, char axis) {

    return (prefix + separator).str() + axis;

  };


  if (parDim.isThreadX())

    return formatDim("threadidx", 'x');

  if (parDim.isThreadY())

    return formatDim("threadidx", 'y');

  if (parDim.isThreadZ())

    return formatDim("threadidx", 'z');

  if (parDim.isBlockX())

    return formatDim("blockidx", 'x');

  if (parDim.isBlockY())

    return formatDim("blockidx", 'y');

  if (parDim.isBlockZ())

    return formatDim("blockidx", 'z');

  return {};

}


static void emitLoopMappingRemark(acc::ComputeRegionOp computeRegion,

                                  LoopLikeOpInterface loopOp,

                                  acc::OpenACCSupport &accSupport,

                                  const acc::ACCToGPUMappingPolicy &policy,

                                  llvm::StringRef gpuDimSeparator) {

  acc::GPUParallelDimsAttr parDimsAttr =

      loopOp->getAttrOfType<acc::GPUParallelDimsAttr>(

          acc::GPUParallelDimsAttr::name);


  SmallVector<acc::GPUParallelDimAttr, 1> seqParDims;

  ArrayRef<acc::GPUParallelDimAttr> parDims;

  if (parDimsAttr) {

    parDims = parDimsAttr.getArray();

  } else if (isa<scf::ForOp>(loopOp.getOperation())) {

    seqParDims.push_back(acc::GPUParallelDimAttr::seqDim(loopOp->getContext()));

    parDims = seqParDims;

  } else {

    return;

  }


  accSupport.emitRemark(

      loopOp,

      [&]() {

        SmallVector<std::string> accMsgs;

        SmallVector<std::string> gpuMsgs;


        for (acc::GPUParallelDimAttr parDim : parDims) {

          accMsgs.push_back(getACCParLevelName(parDim, policy, computeRegion));

          if (std::string gpuName = getGPUParDimName(parDim, gpuDimSeparator);

              !gpuName.empty())

            gpuMsgs.push_back(std::move(gpuName));

        }


        std::string msg = "!$acc loop " + llvm::join(accMsgs, ", ");


        if (uint64_t collapseCount = acc::getCollapseCount(loopOp);

            collapseCount > 1)

          msg += " collapse(" + std::to_string(collapseCount) + ")";


        if (!gpuMsgs.empty())

          msg += " ! " + llvm::join(gpuMsgs, " ");

        return msg;

      },

      DEBUG_TYPE);

}


class ACCEmitRemarksLoop

    : public acc::impl::ACCEmitRemarksLoopBase<ACCEmitRemarksLoop> {

public:

  using ACCEmitRemarksLoopBase<ACCEmitRemarksLoop>::ACCEmitRemarksLoopBase;


  void runOnOperation() override {

    func::FuncOp func = getOperation();

    acc::OpenACCSupport &accSupport = getAnalysis<acc::OpenACCSupport>();

    acc::DefaultACCToGPUMappingPolicy policy;

    if (gpuDimSeparator.empty())

      gpuDimSeparator = ".";


    func.walk([&](acc::ComputeRegionOp computeRegion) {

      if (!shouldEmitLoopRemarks(computeRegion))

        return;


      computeRegion.getRegion().walk([&](LoopLikeOpInterface loopOp) {

        emitLoopMappingRemark(computeRegion, loopOp, accSupport, policy,

                              gpuDimSeparator);

      });

    });

  }

};


} // namespace

Passes.h

FuncOps.h

FunctionInterfaces.h

DEBUG_TYPE
#define DEBUG_TYPE
Definition LocalAliasAnalysis.cpp:32

LoopLikeInterface.h

OpenACCParMapping.h

OpenACCSupport.h

OpenACCUtilsLoop.h

llvm::ArrayRef
Definition LLVM.h:40

llvm::SmallVector
Definition LLVM.h:64

mlir::acc::ACCParMappingPolicy::isWorker
virtual bool isWorker(ParDimAttrT attr) const =0
Check if the attribute represents worker parallelism.

mlir::acc::ACCParMappingPolicy::isSeq
virtual bool isSeq(ParDimAttrT attr) const =0
Check if the attribute represents sequential execution.

mlir::acc::ACCParMappingPolicy::isVector
virtual bool isVector(ParDimAttrT attr) const =0
Check if the attribute represents vector parallelism.

mlir::acc::ACCParMappingPolicy::isGang
virtual bool isGang(ParDimAttrT attr) const =0
Check if the attribute represents gang parallelism (any gang dimension).

mlir::acc::DefaultACCToGPUMappingPolicy
Default policy that provides the standard GPU mapping: gang(dim:1) -> BlockX (gridDim....
Definition OpenACCParMapping.h:119

mlir::acc::OpenACCSupport
Definition OpenACCSupport.h:237

mlir::acc::OpenACCSupport::emitRemark
remark::detail::InFlightRemark emitRemark(Operation *op, std::function< std::string()> messageFn, llvm::StringRef category="openacc")
Emit an OpenACC remark with lazy message generation.
Definition OpenACCSupport.cpp:41

mlir::acc::impl::ACCEmitRemarksLoopBase
Definition ACCEmitRemarksLoop.cpp:448

OpenACC.h

SCF.h

mlir::acc
Definition OpenACCSupport.h:68

mlir::acc::getCollapseCount
uint64_t getCollapseCount(Operation *op)
Number of original loops collapsed into op, or 1 when op carries no collapse_count attribute.
Definition OpenACCUtilsLoop.cpp:346

mlir::acc::isSpecializedAccRoutine
bool isSpecializedAccRoutine(mlir::Operation *op)
Used to check whether this is a specialized accelerator version of acc routine function.
Definition OpenACC.h:201

mlir::acc::ACCToGPUMappingPolicy
ACCParMappingPolicy< mlir::acc::GPUParallelDimAttr > ACCToGPUMappingPolicy
Type alias for the GPU-specific mapping policy.
Definition OpenACCParMapping.h:158

mlir::func
Definition LoopUtils.h:30

mlir
Include the generated interface declarations.
Definition ABIRewriteContext.h:29