doxygen/OpenACCCG_8cpp_source.html

//===- OpenACCCG.cpp - OpenACC codegen ops, attributes, and types ---------===//

//

// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// Implementation for OpenACC codegen operations, attributes, and types.

// These correspond to the definitions in OpenACCCG*.td tablegen files

// and are kept in a separate file because they do not represent direct mappings

// of OpenACC language constructs; they are intermediate representations used

// when decomposing and lowering primary `acc` dialect operations.

//

//===----------------------------------------------------------------------===//


#include "mlir/Dialect/GPU/IR/GPUDialect.h"

#include "mlir/Dialect/OpenACC/OpenACC.h"

#include "mlir/Dialect/Utils/StaticValueUtils.h"

#include "mlir/IR/BuiltinAttributes.h"

#include "mlir/IR/BuiltinTypes.h"

#include "mlir/IR/Region.h"

#include "mlir/Interfaces/ControlFlowInterfaces.h"

#include "mlir/Support/LogicalResult.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallVector.h"


using namespace mlir;

using namespace acc;


namespace {


/// Generic helper for single-region OpenACC ops that execute their body once

/// and then return to the parent operation with their results (if any).

static void

getSingleRegionOpSuccessorRegions(Operation *op, Region &region,

                                  RegionBranchPoint point,

                                  SmallVectorImpl<RegionSuccessor> &regions) {

  if (point.isParent()) {

    regions.push_back(RegionSuccessor(&region));

    return;

  }

  regions.push_back(RegionSuccessor::parent());

}


static ValueRange getSingleRegionSuccessorInputs(Operation *op,

                                                 RegionSuccessor successor) {

  return successor.isParent() ? ValueRange(op->getResults()) : ValueRange();

}


/// Remove empty acc.kernel_environment operations. If the operation has wait

/// operands, create a acc.wait operation to preserve synchronization.

struct RemoveEmptyKernelEnvironment

    : public OpRewritePattern<acc::KernelEnvironmentOp> {

  using OpRewritePattern<acc::KernelEnvironmentOp>::OpRewritePattern;


  LogicalResult matchAndRewrite(acc::KernelEnvironmentOp op,

                                PatternRewriter &rewriter) const override {

    assert(op->getNumRegions() == 1 && "expected op to have one region");


    Block &block = op.getRegion().front();

    if (!block.empty())

      return failure();


    // Conservatively disable canonicalization of empty acc.kernel_environment

    // operations if the wait operands in the kernel_environment cannot be fully

    // represented by acc.wait operation.


    // Disable canonicalization if device type is not the default

    if (auto deviceTypeAttr = op.getWaitOperandsDeviceTypeAttr()) {

      for (auto attr : deviceTypeAttr) {

        if (auto dtAttr = mlir::dyn_cast<acc::DeviceTypeAttr>(attr)) {

          if (dtAttr.getValue() != mlir::acc::DeviceType::None)

            return failure();

        }

      }

    }


    // Disable canonicalization if any wait segment has a devnum

    if (auto hasDevnumAttr = op.getHasWaitDevnumAttr()) {

      for (auto attr : hasDevnumAttr) {

        if (auto boolAttr = mlir::dyn_cast<mlir::BoolAttr>(attr)) {

          if (boolAttr.getValue())

            return failure();

        }

      }

    }


    // Disable canonicalization if there are multiple wait segments

    if (auto segmentsAttr = op.getWaitOperandsSegmentsAttr()) {

      if (segmentsAttr.size() > 1)

        return failure();

    }


    // Remove empty kernel environment.

    // Preserve synchronization by creating acc.wait operation if needed.

    if (!op.getWaitOperands().empty() || op.getWaitOnlyAttr())

      rewriter.replaceOpWithNewOp<acc::WaitOp>(op, op.getWaitOperands(),

                                               /*asyncOperand=*/Value(),

                                               /*waitDevnum=*/Value(),

                                               /*async=*/nullptr,

                                               /*ifCond=*/Value());

    else

      rewriter.eraseOp(op);


    return success();

  }

};


template <typename EffectTy>

static void addOperandEffect(

    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>

        &effects,

    const MutableOperandRange &operand) {

  for (unsigned i = 0, e = operand.size(); i < e; ++i)

    effects.emplace_back(EffectTy::get(), &operand[i]);

}


template <typename EffectTy>

static void addResultEffect(

    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>

        &effects,

    Value result) {

  effects.emplace_back(EffectTy::get(), mlir::cast<mlir::OpResult>(result));

}


static int64_t gpuProcessorIndex(gpu::Processor p) {

  switch (p) {

  case gpu::Processor::Sequential:

    return 0;

  case gpu::Processor::ThreadX:

    return 1;

  case gpu::Processor::ThreadY:

    return 2;

  case gpu::Processor::ThreadZ:

    return 3;

  case gpu::Processor::BlockX:

    return 4;

  case gpu::Processor::BlockY:

    return 5;

  case gpu::Processor::BlockZ:

    return 6;

  }

  llvm_unreachable("unhandled gpu::Processor");

}


static gpu::Processor indexToGpuProcessor(int64_t idx) {

  switch (idx) {

  case 0:

    return gpu::Processor::Sequential;

  case 1:

    return gpu::Processor::ThreadX;

  case 2:

    return gpu::Processor::ThreadY;

  case 3:

    return gpu::Processor::ThreadZ;

  case 4:

    return gpu::Processor::BlockX;

  case 5:

    return gpu::Processor::BlockY;

  case 6:

    return gpu::Processor::BlockZ;

  default:

    return gpu::Processor::Sequential;

  }

}


static GPUParallelDimAttr intToParDim(MLIRContext *context, int64_t dimInt) {

  return GPUParallelDimAttr::get(

      context, IntegerAttr::get(IndexType::get(context), dimInt));

}


static GPUParallelDimAttr processorParDim(MLIRContext *context,

                                          gpu::Processor proc) {

  return GPUParallelDimAttr::get(

      context,

      IntegerAttr::get(IndexType::get(context), gpuProcessorIndex(proc)));

}


static ParseResult parseProcessorValue(AsmParser &parser,

                                       GPUParallelDimAttr &dim) {

  std::string keyword;

  llvm::SMLoc loc = parser.getCurrentLocation();

  if (failed(parser.parseKeywordOrString(&keyword)))

    return failure();

  auto maybeProcessor = gpu::symbolizeProcessor(keyword);

  if (!maybeProcessor)

    return parser.emitError(loc)

           << "expected one of ::mlir::gpu::Processor enum names";

  dim = intToParDim(parser.getContext(), gpuProcessorIndex(*maybeProcessor));

  return success();

}


static void printProcessorValue(AsmPrinter &printer,

                                const GPUParallelDimAttr &attr) {

  gpu::Processor processor = indexToGpuProcessor(attr.getValue().getInt());

  printer << gpu::stringifyProcessor(processor);

}


} // namespace


//===----------------------------------------------------------------------===//

// KernelEnvironmentOp

//===----------------------------------------------------------------------===//


void KernelEnvironmentOp::getSuccessorRegions(

    RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> &regions) {

  getSingleRegionOpSuccessorRegions(getOperation(), getRegion(), point,

                                    regions);

}


ValueRange KernelEnvironmentOp::getSuccessorInputs(RegionSuccessor successor) {

  return getSingleRegionSuccessorInputs(getOperation(), successor);

}


void KernelEnvironmentOp::getCanonicalizationPatterns(

    RewritePatternSet &results, MLIRContext *context) {

  results.add<RemoveEmptyKernelEnvironment>(context);

}


template <typename ComputeConstructT>

KernelEnvironmentOp

KernelEnvironmentOp::createAndPopulate(ComputeConstructT computeConstruct,

                                       OpBuilder &builder) {

  auto kernelEnvironment = KernelEnvironmentOp::create(

      builder, computeConstruct->getLoc(),

      computeConstruct.getDataClauseOperands(),

      computeConstruct.getAsyncOperands(),

      computeConstruct.getAsyncOperandsDeviceTypeAttr(),

      computeConstruct.getAsyncOnlyAttr(), computeConstruct.getWaitOperands(),

      computeConstruct.getWaitOperandsSegmentsAttr(),

      computeConstruct.getWaitOperandsDeviceTypeAttr(),

      computeConstruct.getHasWaitDevnumAttr(),

      computeConstruct.getWaitOnlyAttr());

  Block &block = kernelEnvironment.getRegion().emplaceBlock();

  builder.setInsertionPointToStart(&block);

  return kernelEnvironment;

}


template KernelEnvironmentOp

KernelEnvironmentOp::createAndPopulate<ParallelOp>(ParallelOp, OpBuilder &);

template KernelEnvironmentOp

KernelEnvironmentOp::createAndPopulate<KernelsOp>(KernelsOp, OpBuilder &);

template KernelEnvironmentOp

KernelEnvironmentOp::createAndPopulate<SerialOp>(SerialOp, OpBuilder &);


//===----------------------------------------------------------------------===//

// FirstprivateMapInitialOp

//===----------------------------------------------------------------------===//


LogicalResult FirstprivateMapInitialOp::verify() {

  if (getDataClause() != acc::DataClause::acc_firstprivate)

    return emitError("data clause associated with firstprivate operation must "

                     "match its intent");

  if (!getVar())

    return emitError("must have var operand");

  if (!mlir::isa<mlir::acc::PointerLikeType>(getVar().getType()) &&

      !mlir::isa<mlir::acc::MappableType>(getVar().getType()))

    return emitError("var must be mappable or pointer-like");

  if (mlir::isa<mlir::acc::PointerLikeType>(getVar().getType()) &&

      getVarType() == getVar().getType())

    return emitError("varType must capture the element type of var");

  if (getModifiers() != acc::DataClauseModifier::none)

    return emitError("no data clause modifiers are allowed");

  return success();

}


void FirstprivateMapInitialOp::getEffects(

    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>

        &effects) {

  effects.emplace_back(MemoryEffects::Read::get(),

                       acc::CurrentDeviceIdResource::get());

  addOperandEffect<MemoryEffects::Read>(effects, getVarMutable());

  addResultEffect<MemoryEffects::Write>(effects, getAccVar());

}


//===----------------------------------------------------------------------===//

// ReductionInitOp

//===----------------------------------------------------------------------===//


void ReductionInitOp::getSuccessorRegions(

    RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> &regions) {

  getSingleRegionOpSuccessorRegions(getOperation(), getRegion(), point,

                                    regions);

}


void ReductionInitOp::getRegionInvocationBounds(

    ArrayRef<Attribute> operands,

    SmallVectorImpl<InvocationBounds> &invocationBounds) {

  invocationBounds.emplace_back(1, 1);

}


ValueRange ReductionInitOp::getSuccessorInputs(RegionSuccessor successor) {

  return getSingleRegionSuccessorInputs(getOperation(), successor);

}


LogicalResult ReductionInitOp::verify() {

  Block &block = getRegion().front();

  if (auto yieldOp = dyn_cast<acc::YieldOp>(block.getTerminator())) {

    if (yieldOp.getNumOperands() != 1)

      return emitOpError(

          "region must yield exactly one value (private storage)");

    if (yieldOp.getOperand(0).getType() != getVar().getType())

      return emitOpError("yielded value type must match var type");

  }

  return success();

}


//===----------------------------------------------------------------------===//

// ReductionCombineRegionOp

//===----------------------------------------------------------------------===//


void ReductionCombineRegionOp::getSuccessorRegions(

    RegionBranchPoint point, SmallVectorImpl<RegionSuccessor> &regions) {

  getSingleRegionOpSuccessorRegions(getOperation(), getRegion(), point,

                                    regions);

}


void ReductionCombineRegionOp::getRegionInvocationBounds(

    ArrayRef<Attribute> operands,

    SmallVectorImpl<InvocationBounds> &invocationBounds) {

  invocationBounds.emplace_back(1, 1);

}


ValueRange

ReductionCombineRegionOp::getSuccessorInputs(RegionSuccessor successor) {

  return getSingleRegionSuccessorInputs(getOperation(), successor);

}


LogicalResult ReductionCombineRegionOp::verify() {

  Block &block = getRegion().front();

  if (auto yieldOp = dyn_cast<acc::YieldOp>(block.getTerminator())) {

    if (yieldOp.getNumOperands() != 0)

      return emitOpError("region must be terminated by acc.yield with no "

                         "operands");

  }

  return success();

}


//===----------------------------------------------------------------------===//

// ReductionCombineOp

//===----------------------------------------------------------------------===//


void ReductionCombineOp::getEffects(

    SmallVectorImpl<SideEffects::EffectInstance<MemoryEffects::Effect>>

        &effects) {

  effects.emplace_back(MemoryEffects::Read::get(), &getSrcMemrefMutable(),

                       SideEffects::DefaultResource::get());

  effects.emplace_back(MemoryEffects::Read::get(), &getDestMemrefMutable(),

                       SideEffects::DefaultResource::get());

  effects.emplace_back(MemoryEffects::Write::get(), &getDestMemrefMutable(),

                       SideEffects::DefaultResource::get());

}


//===----------------------------------------------------------------------===//

// ComputeRegionOp

//===----------------------------------------------------------------------===//


static ParWidthOp getParWidthOpForLaunchArg(ComputeRegionOp op,

                                            GPUParallelDimAttr parDim) {

  for (auto launchArg : op.getLaunchArgs()) {

    auto parOp = launchArg.getDefiningOp<ParWidthOp>();

    if (!parOp)

      continue;

    auto launchArgDim = cast<GPUParallelDimAttr>(parOp.getParDim());

    if (launchArgDim == parDim)

      return parOp;

  }

  return nullptr;

}


std::optional<Value> ComputeRegionOp::getLaunchArg(GPUParallelDimAttr parDim) {

  if (auto parWidthOp = getParWidthOpForLaunchArg(*this, parDim))

    return parWidthOp.getResult();

  return {};

}


std::optional<Value>

ComputeRegionOp::getKnownLaunchArg(GPUParallelDimAttr parDim) {

  if (auto parWidthOp = getParWidthOpForLaunchArg(*this, parDim))

    if (parWidthOp.getLaunchArg())

      return parWidthOp.getLaunchArg();

  return {};

}


std::optional<uint64_t>

ComputeRegionOp::getKnownConstantLaunchArg(GPUParallelDimAttr parDim) {

  auto knownParWidth = getKnownLaunchArg(parDim);

  if (knownParWidth.has_value())

    return getConstantIntValue(knownParWidth.value());

  return {};

}


BlockArgument ComputeRegionOp::appendInputArg(Value value) {

  getInputArgsMutable().append(value);

  return getBody()->addArgument(value.getType(), getLoc());

}


bool ComputeRegionOp::isEffectivelySerial() {

  auto *ctx = getContext();


  if (getLaunchArg(GPUParallelDimAttr::seqDim(ctx)))

    return true;


  auto checkDim = [&](GPUParallelDimAttr dim) -> bool {

    auto val = getKnownConstantLaunchArg(dim);

    return val && *val == 1;

  };


  return checkDim(GPUParallelDimAttr::threadXDim(ctx)) &&

         checkDim(GPUParallelDimAttr::threadYDim(ctx)) &&

         checkDim(GPUParallelDimAttr::threadZDim(ctx)) &&

         checkDim(GPUParallelDimAttr::blockXDim(ctx)) &&

         checkDim(GPUParallelDimAttr::blockYDim(ctx)) &&

         checkDim(GPUParallelDimAttr::blockZDim(ctx));

}


BlockArgument ComputeRegionOp::parDimToWidth(GPUParallelDimAttr parDim) {

  for (auto [pos, launchArg] : llvm::enumerate(getLaunchArgs())) {

    auto parOp = launchArg.getDefiningOp<ParWidthOp>();

    assert(parOp);

    auto launchArgDim = cast<GPUParallelDimAttr>(parOp.getParDim());

    if (launchArgDim == parDim) {

      assert(pos < getRegion().front().getNumArguments() &&

             "launch arg position out of range");

      return getRegion().front().getArgument(pos);

    }

  }

  llvm_unreachable("attempting to get unspecified parDim");

}


SmallVector<GPUParallelDimAttr> ComputeRegionOp::getLaunchParDims() {

  SmallVector<GPUParallelDimAttr> parDims;

  for (auto launchArg : getLaunchArgs()) {

    auto parOp = launchArg.getDefiningOp<ParWidthOp>();

    auto launchArgDim = cast<GPUParallelDimAttr>(parOp.getParDim());

    int64_t dimInt = launchArgDim.getValue().getInt();

    parDims.push_back(intToParDim(getContext(), dimInt));

  }

  return parDims;

}


Value ComputeRegionOp::getOperand(BlockArgument blockArg) {

  unsigned argNumber = blockArg.getArgNumber();

  unsigned numLaunchArgs = getLaunchArgs().size();

  assert(argNumber < (numLaunchArgs + getInputArgs().size()) &&

         "invalid block argument");

  if (argNumber < numLaunchArgs)

    return getLaunchArgs()[argNumber];

  return getInputArgs()[argNumber - numLaunchArgs];

}


BlockArgument ComputeRegionOp::gpuParWidth(gpu::Processor processor) {

  return parDimToWidth(GPUParallelDimAttr::get(getContext(), processor));

}


LogicalResult ComputeRegionOp::verify() {

  unsigned expectedBlockArgs = getLaunchArgs().size() + getInputArgs().size();

  unsigned actualBlockArgs = getRegion().front().getNumArguments();

  if (expectedBlockArgs != actualBlockArgs)

    return emitOpError("expected ")

           << expectedBlockArgs << " block arguments (launch + input), got "

           << actualBlockArgs;


  return success();

}


void ComputeRegionOp::print(OpAsmPrinter &p) {

  ValueRange regionArgs = getBody()->getArguments();

  ValueRange launchArgs = getLaunchArgs();

  ValueRange inputArgs = getInputArgs();


  assert(regionArgs.size() == (launchArgs.size() + inputArgs.size()) &&

         "region args mismatch");


  if (getStream())

    p << " stream(" << getStream() << " : " << getStream().getType() << ")";


  size_t i = 0;

  if (!launchArgs.empty()) {

    p << " launch(";

    for (size_t j = 0; j < launchArgs.size(); ++j, ++i) {

      p << regionArgs[i] << " = " << launchArgs[j];

      if (j < launchArgs.size() - 1)

        p << ", ";

    }

    p << ")";

  }

  if (!inputArgs.empty()) {

    p << " ins(";

    for (size_t j = 0; j < inputArgs.size(); ++j, ++i) {

      p << regionArgs[i] << " = " << inputArgs[j];

      if (j < inputArgs.size() - 1)

        p << ", ";

    }

    p << ") : (";

    for (size_t j = 0; j < inputArgs.size(); ++j) {

      p << inputArgs[j].getType();

      if (j < inputArgs.size() - 1)

        p << ", ";

    }

    p << ")";

  }

  p.printOptionalArrowTypeList(getResultTypes());

  p << " ";

  p.printRegion(getRegion(), /*printEntryBlockArgs=*/false);

  p.printOptionalAttrDict((*this)->getAttrs(),

                          /*elidedAttrs=*/getOperandSegmentSizeAttr());

}


ParseResult ComputeRegionOp::parse(OpAsmParser &parser,

                                   OperationState &result) {

  auto &builder = parser.getBuilder();


  SmallVector<OpAsmParser::Argument> regionArgs;

  OpAsmParser::UnresolvedOperand streamOperand;

  Type streamType;

  SmallVector<OpAsmParser::UnresolvedOperand> launchOperands;

  SmallVector<OpAsmParser::UnresolvedOperand> inputOperands;

  SmallVector<Type> types;


  bool hasStream = false;

  if (succeeded(parser.parseOptionalKeyword("stream"))) {

    hasStream = true;

    if (parser.parseLParen() || parser.parseOperand(streamOperand) ||

        parser.parseColon() || parser.parseType(streamType) ||

        parser.parseRParen())

      return failure();

  }


  if (succeeded(parser.parseOptionalKeyword("launch"))) {

    if (parser.parseAssignmentList(regionArgs, launchOperands))

      return failure();

    auto parWidthType = acc::ParWidthType::get(builder.getContext());

    for (size_t i = 0; i < regionArgs.size(); ++i)

      types.push_back(parWidthType);

  }


  if (succeeded(parser.parseOptionalKeyword("ins"))) {

    if (parser.parseAssignmentList(regionArgs, inputOperands) ||

        parser.parseColon() || parser.parseLParen() ||

        parser.parseTypeList(types) || parser.parseRParen())

      return failure();

  }


  if (parser.parseOptionalArrowTypeList(result.types))

    return failure();


  for (auto [iterArg, type] : llvm::zip_equal(regionArgs, types))

    iterArg.type = type;


  Region *body = result.addRegion();

  if (parser.parseRegion(*body, regionArgs))

    return failure();


  const size_t numLaunchOperands = launchOperands.size();

  const size_t numInputOperands = inputOperands.size();

  assert(numLaunchOperands + numInputOperands == regionArgs.size() &&

         "compute region args mismatch");


  result.addAttribute(

      ComputeRegionOp::getOperandSegmentSizeAttr(),

      builder.getDenseI32ArrayAttr({static_cast<int32_t>(numLaunchOperands),

                                    static_cast<int32_t>(numInputOperands),

                                    hasStream ? 1 : 0}));


  for (size_t i = 0; i < numLaunchOperands; ++i) {

    if (parser.resolveOperand(launchOperands[i], types[i], result.operands))

      return failure();

  }


  for (size_t i = numLaunchOperands; i < regionArgs.size(); ++i) {

    if (parser.resolveOperand(inputOperands[i - numLaunchOperands], types[i],

                              result.operands))

      return failure();

  }


  if (hasStream) {

    if (parser.resolveOperand(streamOperand, streamType, result.operands))

      return failure();

  }


  if (parser.parseOptionalAttrDict(result.attributes))

    return failure();


  return success();

}


//===----------------------------------------------------------------------===//

// GPUParallelDimAttr

//===----------------------------------------------------------------------===//


GPUParallelDimAttr GPUParallelDimAttr::get(MLIRContext *context,

                                           gpu::Processor proc) {

  return processorParDim(context, proc);

}


GPUParallelDimAttr GPUParallelDimAttr::seqDim(MLIRContext *context) {

  return processorParDim(context, gpu::Processor::Sequential);

}


GPUParallelDimAttr GPUParallelDimAttr::threadXDim(MLIRContext *context) {

  return processorParDim(context, gpu::Processor::ThreadX);

}


GPUParallelDimAttr GPUParallelDimAttr::threadYDim(MLIRContext *context) {

  return processorParDim(context, gpu::Processor::ThreadY);

}


GPUParallelDimAttr GPUParallelDimAttr::threadZDim(MLIRContext *context) {

  return processorParDim(context, gpu::Processor::ThreadZ);

}


GPUParallelDimAttr GPUParallelDimAttr::blockXDim(MLIRContext *context) {

  return processorParDim(context, gpu::Processor::BlockX);

}


GPUParallelDimAttr GPUParallelDimAttr::blockYDim(MLIRContext *context) {

  return processorParDim(context, gpu::Processor::BlockY);

}


GPUParallelDimAttr GPUParallelDimAttr::blockZDim(MLIRContext *context) {

  return processorParDim(context, gpu::Processor::BlockZ);

}


Attribute GPUParallelDimAttr::parse(AsmParser &parser, Type type) {

  GPUParallelDimAttr dim;

  if (parser.parseLess() || parseProcessorValue(parser, dim) ||

      parser.parseGreater()) {

    parser.emitError(parser.getCurrentLocation(),

                     "expected format `<` processor_name `>`");

    return {};

  }

  return dim;

}


void GPUParallelDimAttr::print(AsmPrinter &printer) const {

  printer << "<";

  printProcessorValue(printer, *this);

  printer << ">";

}


GPUParallelDimAttr GPUParallelDimAttr::threadDim(MLIRContext *context,

                                                 unsigned index) {

  assert(index <= 2 && "thread dimension index must be 0, 1, or 2");

  switch (index) {

  case 0:

    return threadXDim(context);

  case 1:

    return threadYDim(context);

  case 2:

    return threadZDim(context);

  }

  llvm_unreachable("validated thread dimension index");

}


GPUParallelDimAttr GPUParallelDimAttr::blockDim(MLIRContext *context,

                                                unsigned index) {

  assert(index <= 2 && "block dimension index must be 0, 1, or 2");

  switch (index) {

  case 0:

    return blockXDim(context);

  case 1:

    return blockYDim(context);

  case 2:

    return blockZDim(context);

  }

  llvm_unreachable("validated block dimension index");

}


gpu::Processor GPUParallelDimAttr::getProcessor() const {

  return indexToGpuProcessor(getValue().getInt());

}


int GPUParallelDimAttr::getOrder() const {

  return gpuProcessorIndex(getProcessor());

}


GPUParallelDimAttr GPUParallelDimAttr::getOneHigher() const {

  int order = getOrder();

  if (order >= 6) // BlockZ is the highest

    return *this;

  return get(getContext(), indexToGpuProcessor(order + 1));

}


GPUParallelDimAttr GPUParallelDimAttr::getOneLower() const {

  int order = getOrder();

  if (order <= 0) // Sequential is the lowest

    return *this;

  return get(getContext(), indexToGpuProcessor(order - 1));

}


bool GPUParallelDimAttr::isSeq() const {

  return getProcessor() == gpu::Processor::Sequential;

}

bool GPUParallelDimAttr::isThreadX() const {

  return getProcessor() == gpu::Processor::ThreadX;

}

bool GPUParallelDimAttr::isThreadY() const {

  return getProcessor() == gpu::Processor::ThreadY;

}

bool GPUParallelDimAttr::isThreadZ() const {

  return getProcessor() == gpu::Processor::ThreadZ;

}

bool GPUParallelDimAttr::isBlockX() const {

  return getProcessor() == gpu::Processor::BlockX;

}

bool GPUParallelDimAttr::isBlockY() const {

  return getProcessor() == gpu::Processor::BlockY;

}

bool GPUParallelDimAttr::isBlockZ() const {

  return getProcessor() == gpu::Processor::BlockZ;

}

bool GPUParallelDimAttr::isAnyThread() const {

  return isThreadX() || isThreadY() || isThreadZ();

}

bool GPUParallelDimAttr::isAnyBlock() const {

  return isBlockX() || isBlockY() || isBlockZ();

}


//===----------------------------------------------------------------------===//

// GPUParallelDimsAttr

//===----------------------------------------------------------------------===//


GPUParallelDimsAttr GPUParallelDimsAttr::seq(MLIRContext *ctx) {

  return GPUParallelDimsAttr::get(ctx, {GPUParallelDimAttr::seqDim(ctx)});

}


bool GPUParallelDimsAttr::isSeq() const {

  assert(!getArray().empty() && "no par_dims found");

  if (getArray().size() == 1) {

    auto parDim = dyn_cast<GPUParallelDimAttr>(getArray()[0]);

    assert(parDim && "expected GPUParallelDimAttr");

    return parDim.isSeq();

  }

  return false;

}


bool GPUParallelDimsAttr::isParallel() const { return !isSeq(); }


bool GPUParallelDimsAttr::isMultiDim() const { return getArray().size() > 1; }


bool GPUParallelDimsAttr::hasAnyBlockLevel() const {

  return llvm::any_of(

      getArray(), [](const GPUParallelDimAttr &p) { return p.isAnyBlock(); });

}


bool GPUParallelDimsAttr::hasOnlyBlockLevel() const {

  return !getArray().empty() &&

         llvm::all_of(getArray(), [](const GPUParallelDimAttr &p) {

           return p.isAnyBlock();

         });

}


bool GPUParallelDimsAttr::hasOnlyThreadYLevel() const {

  return !getArray().empty() &&

         llvm::all_of(getArray(), [](const GPUParallelDimAttr &p) {

           return p.isThreadY();

         });

}


bool GPUParallelDimsAttr::hasOnlyThreadXLevel() const {

  return !getArray().empty() &&

         llvm::all_of(getArray(), [](const GPUParallelDimAttr &p) {

           return p.isThreadX();

         });

}


Attribute GPUParallelDimsAttr::parse(AsmParser &parser, Type type) {

  auto delimiter = AsmParser::Delimiter::Square;

  SmallVector<GPUParallelDimAttr> parDims;

  auto parseParDim = [&]() -> ParseResult {

    GPUParallelDimAttr dim;

    if (parseProcessorValue(parser, dim))

      return failure();

    parDims.push_back(dim);

    return success();

  };

  if (parser.parseCommaSeparatedList(delimiter, parseParDim,

                                     "list of OpenACC GPU parallel dimensions"))

    return {};

  return GPUParallelDimsAttr::get(parser.getContext(), parDims);

}


void GPUParallelDimsAttr::print(AsmPrinter &printer) const {

  printer << "[";

  llvm::interleaveComma(getArray(), printer,

                        [&printer](const GPUParallelDimAttr &p) {

                          printProcessorValue(printer, p);

                        });

  printer << "]";

}

success
return success()

emitOpError
p<< " : "<< getMemRefType()<< ", "<< getType();}static LogicalResult verifyVectorMemoryOp(Operation *op, MemRefType memrefType, VectorType vectorType) { if(memrefType.getElementType() !=vectorType.getElementType()) return op-> emitOpError("requires memref and vector types of the same elemental type")
Given a list of lists of parsed operands, populates uniqueOperands with unique operands.

ControlFlowInterfaces.h

addOperandEffect
static void addOperandEffect(SmallVectorImpl< SideEffects::EffectInstance< MemoryEffects::Effect > > &effects, MutableOperandRange operand)
Helper to add an effect on an operand, referenced by its mutable range.
Definition OpenACC.cpp:1224

addResultEffect
static void addResultEffect(SmallVectorImpl< SideEffects::EffectInstance< MemoryEffects::Effect > > &effects, Value result)
Helper to add an effect on a result value.
Definition OpenACC.cpp:1234

getSingleRegionOpSuccessorRegions
static void getSingleRegionOpSuccessorRegions(Operation *op, Region &region, RegionBranchPoint point, SmallVectorImpl< RegionSuccessor > &regions)
Generic helper for single-region OpenACC ops that execute their body once and then return to the pare...
Definition OpenACC.cpp:422

getSingleRegionSuccessorInputs
static ValueRange getSingleRegionSuccessorInputs(Operation *op, RegionSuccessor successor)
Definition OpenACC.cpp:433

GPUDialect.h

Region.h

ValueRange
b ValueRange
Definition LinalgTransformOps.cpp:2125

result
result
Definition LinalgTransformOps.cpp:2120

LogicalResult.h

getContext
b getContext())

getParWidthOpForLaunchArg
static ParWidthOp getParWidthOpForLaunchArg(ComputeRegionOp op, GPUParallelDimAttr parDim)
Definition OpenACCCG.cpp:359

StaticValueUtils.h

int64_t

llvm::ArrayRef
Definition LLVM.h:40

llvm::SmallVectorImpl
Definition LLVM.h:66

llvm::SmallVector
Definition LLVM.h:64

mlir::AsmParser
This base class exposes generic asm parser hooks, usable across the various derived parsers.
Definition OpImplementation.h:578

mlir::AsmParser::Delimiter::Square
@ Square
Square brackets surrounding zero or more operands.
Definition OpImplementation.h:820

mlir::AsmParser::getBuilder
virtual Builder & getBuilder() const =0
Return a builder which provides useful access to MLIRContext, global objects like types and attribute...

mlir::AsmParser::parseCommaSeparatedList
virtual ParseResult parseCommaSeparatedList(Delimiter delimiter, function_ref< ParseResult()> parseElementFn, StringRef contextMessage=StringRef())=0
Parse a list of comma-separated items with an optional delimiter.

mlir::AsmParser::parseOptionalAttrDict
virtual ParseResult parseOptionalAttrDict(NamedAttrList &result)=0
Parse a named dictionary into 'result' if it is present.

mlir::AsmParser::parseOptionalKeyword
virtual ParseResult parseOptionalKeyword(StringRef keyword)=0
Parse the given keyword if present.

mlir::AsmParser::getContext
MLIRContext * getContext() const
Definition AsmPrinter.cpp:72

mlir::AsmParser::parseRParen
virtual ParseResult parseRParen()=0
Parse a ) token.

mlir::AsmParser::emitError
virtual InFlightDiagnostic emitError(SMLoc loc, const Twine &message={})=0
Emit a diagnostic at the specified location and return failure.

mlir::AsmParser::parseKeywordOrString
ParseResult parseKeywordOrString(std::string *result)
Parse a keyword or a quoted string.
Definition OpImplementation.h:954

mlir::AsmParser::parseLess
virtual ParseResult parseLess()=0
Parse a '<' token.

mlir::AsmParser::getCurrentLocation
virtual SMLoc getCurrentLocation()=0
Get the location of the next token and store it into the argument.

mlir::AsmParser::parseColon
virtual ParseResult parseColon()=0
Parse a : token.

mlir::AsmParser::parseGreater
virtual ParseResult parseGreater()=0
Parse a '>' token.

mlir::AsmParser::parseLParen
virtual ParseResult parseLParen()=0
Parse a ( token.

mlir::AsmParser::parseType
virtual ParseResult parseType(Type &result)=0
Parse a type.

mlir::AsmParser::parseOptionalArrowTypeList
virtual ParseResult parseOptionalArrowTypeList(SmallVectorImpl< Type > &result)=0
Parse an optional arrow followed by a type list.

mlir::AsmParser::parseTypeList
ParseResult parseTypeList(SmallVectorImpl< Type > &result)
Parse a type list.
Definition AsmPrinter.cpp:77

mlir::AsmPrinter
This base class exposes generic asm printer hooks, usable across the various derived printers.
Definition OpImplementation.h:120

mlir::AsmPrinter::printOptionalArrowTypeList
void printOptionalArrowTypeList(TypeRange &&types)
Print an optional arrow followed by a type list.
Definition OpImplementation.h:253

mlir::Attribute
Attributes are known-constant values of operations.
Definition Attributes.h:25

mlir::BlockArgument
This class represents an argument of a Block.
Definition Value.h:309

mlir::BlockArgument::getArgNumber
unsigned getArgNumber() const
Returns the number of this argument.
Definition Value.h:321

mlir::Block
Block represents an ordered list of Operations.
Definition Block.h:33

mlir::Block::empty
bool empty()
Definition Block.h:158

mlir::Block::front
Operation & front()
Definition Block.h:163

mlir::Block::getTerminator
Operation * getTerminator()
Get the terminator operation of this block.
Definition Block.cpp:249

mlir::Builder::getDenseI32ArrayAttr
DenseI32ArrayAttr getDenseI32ArrayAttr(ArrayRef< int32_t > values)
Definition Builders.cpp:167

mlir::Builder::getContext
MLIRContext * getContext() const
Definition Builders.h:56

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63

mlir::MutableOperandRange
This class provides a mutable adaptor for a range of operands.
Definition ValueRange.h:118

mlir::MutableOperandRange::size
unsigned size() const
Returns the current size of the range.
Definition ValueRange.h:156

mlir::OpAsmParser
The OpAsmParser has methods for interacting with the asm parser: parsing things from it,...
Definition OpImplementation.h:1516

mlir::OpAsmParser::parseRegion
virtual ParseResult parseRegion(Region &region, ArrayRef< Argument > arguments={}, bool enableNameShadowing=false)=0
Parses a region.

mlir::OpAsmParser::parseAssignmentList
ParseResult parseAssignmentList(SmallVectorImpl< Argument > &lhs, SmallVectorImpl< UnresolvedOperand > &rhs)
Parse a list of assignments of the form (x1 = y1, x2 = y2, ...)
Definition OpImplementation.h:1769

mlir::OpAsmParser::resolveOperand
virtual ParseResult resolveOperand(const UnresolvedOperand &operand, Type type, SmallVectorImpl< Value > &result)=0
Resolve an operand to an SSA value, emitting an error on failure.

mlir::OpAsmParser::parseOperand
virtual ParseResult parseOperand(UnresolvedOperand &result, bool allowResultNumber=true)=0
Parse a single SSA value operand name along with a result number if allowResultNumber is true.

mlir::OpAsmPrinter
This is a pure-virtual base class that exposes the asmprinter hooks necessary to implement a custom p...
Definition OpImplementation.h:455

mlir::OpAsmPrinter::printOptionalAttrDict
virtual void printOptionalAttrDict(ArrayRef< NamedAttribute > attrs, ArrayRef< StringRef > elidedAttrs={})=0
If the specified operation has attributes, print out an attribute dictionary with their values.

mlir::OpAsmPrinter::printRegion
virtual void printRegion(Region &blocks, bool printEntryBlockArgs=true, bool printBlockTerminators=true, bool printEmptyBlock=false)=0
Prints a region.

mlir::OpBuilder
This class helps build Operations.
Definition Builders.h:209

mlir::OpBuilder::setInsertionPointToStart
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Definition Builders.h:433

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88

mlir::Operation::getResults
result_range getResults()
Definition Operation.h:444

mlir::RegionBranchPoint
This class represents a point being branched from in the methods of the RegionBranchOpInterface.
Definition ControlFlowInterfaces.h:239

mlir::RegionBranchPoint::isParent
bool isParent() const
Returns true if branching from the parent op.
Definition ControlFlowInterfaces.h:252

mlir::RegionSuccessor
This class represents a successor of a region.
Definition ControlFlowInterfaces.h:199

mlir::RegionSuccessor::parent
static RegionSuccessor parent()
Initialize a successor that branches after/out of the parent operation.
Definition ControlFlowInterfaces.h:207

mlir::RegionSuccessor::isParent
bool isParent() const
Return true if the successor is the parent operation.
Definition ControlFlowInterfaces.h:214

mlir::Region
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26

mlir::RewritePatternSet
Definition PatternMatch.h:822

mlir::RewritePatternSet::add
RewritePatternSet & add(ConstructorArg &&arg, ConstructorArgs &&...args)
Add an instance of each of the pattern types 'Ts' to the pattern list with the given arguments.
Definition PatternMatch.h:861

mlir::RewriterBase::eraseOp
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
Definition PatternMatch.cpp:155

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition PatternMatch.h:529

mlir::SideEffects::EffectInstance
This class represents a specific instance of an effect.
Definition SideEffectInterfaces.h:203

mlir::SideEffects::Effect::Base< DerivedEffect, Effect >::get
static DerivedEffect * get()
Definition SideEffectInterfaces.h:44

mlir::SideEffects::Resource::Base< CurrentDeviceIdResource >::get
static CurrentDeviceIdResource * get()
Definition SideEffectInterfaces.h:101

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74

mlir::ValueRange
This class provides an abstraction over the different types of ranges over Values.
Definition ValueRange.h:387

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition Value.h:105

OpenACC.h

BuiltinAttributes.h

BuiltinTypes.h

mlir::acc
Definition OpenACCSupport.h:65

mlir::acc::getAccVar
mlir::Value getAccVar(mlir::Operation *accDataClauseOp)
Used to obtain the accVar from a data clause operation.
Definition OpenACC.cpp:5080

mlir::acc::getVar
mlir::Value getVar(mlir::Operation *accDataClauseOp)
Used to obtain the var from a data clause operation.
Definition OpenACC.cpp:5049

mlir::acc::getDataClause
std::optional< mlir::acc::DataClause > getDataClause(mlir::Operation *accDataEntryOp)
Used to obtain the dataClause from a data entry operation.
Definition OpenACC.cpp:5153

mlir::acc::getVarType
mlir::Type getVarType(mlir::Operation *accDataClauseOp)
Used to obtains the varType from a data clause operation which records the type of variable.
Definition OpenACC.cpp:5057

mlir::index
Definition IndexToLLVM.h:23

mlir::remark::failed
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:717

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::getConstantIntValue
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Definition StaticValueUtils.cpp:148

mlir::getType
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:305

mlir::emitError
InFlightDiagnostic emitError(Location loc)
Utility method to emit an error message using this location.
Definition Diagnostics.cpp:332

mlir::HoistingKind::Block
@ Block
Definition AllocationOpInterface.h:24

mlir::get
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
Definition BytecodeImplementation.h:516

mlir::OpAsmParser::UnresolvedOperand
This is the representation of an operand reference.
Definition OpImplementation.h:1567

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition PatternMatch.h:314

mlir::OperationState
This represents an operation in an abstracted form, suitable for use with the builder APIs.
Definition OperationSupport.h:941

j
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.