16 #include "llvm/ADT/SmallVectorExtras.h"
17 #include "llvm/Support/FormatVariadic.h"
27 workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
28 for (
const auto &en :
llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
31 auto type = dyn_cast<MemRefType>(attribution.
getType());
32 assert(type && type.hasStaticShape() &&
"unexpected type in attribution");
34 uint64_t numElements = type.getNumElements();
39 std::string name = std::string(
40 llvm::formatv(
"__wg_{0}_{1}", gpuFuncOp.getName(), en.index()));
41 uint64_t alignment = 0;
43 dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getWorkgroupAttributionAttr(
44 en.index(), LLVM::LLVMDialect::getAlignAttrName())))
45 alignment = alignAttr.getInt();
46 auto globalOp = rewriter.
create<LLVM::GlobalOp>(
47 gpuFuncOp.getLoc(), arrayType,
false,
48 LLVM::Linkage::Internal, name,
Attribute(), alignment,
50 workgroupBuffers.push_back(globalOp);
55 gpuFuncOp.front().getNumArguments());
58 gpuFuncOp.getFunctionType(),
false,
62 diag <<
"failed to convert function signature type for: "
63 << gpuFuncOp.getFunctionType();
71 for (
const auto &attr : gpuFuncOp->getAttrs()) {
73 attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
75 gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
76 attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
77 attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName())
79 if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
80 argAttrs = gpuFuncOp.getArgAttrsAttr();
83 attributes.push_back(attr);
88 if (gpuFuncOp.isKernel())
89 attributes.emplace_back(kernelAttributeName, rewriter.
getUnitAttr());
90 auto llvmFuncOp = rewriter.
create<LLVM::LLVMFuncOp>(
91 gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
92 LLVM::Linkage::External,
false, LLVM::CConv::C,
105 unsigned numProperArguments = gpuFuncOp.getNumArguments();
108 LLVM::GlobalOp global = en.value();
109 Value address = rewriter.
create<LLVM::AddressOfOp>(
112 global.getAddrSpace()),
113 global.getSymNameAttr());
115 cast<LLVM::LLVMArrayType>(global.getType()).getElementType();
119 global.getAddrSpace()),
126 Value attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()];
127 auto type = cast<MemRefType>(attribution.getType());
130 signatureConversion.
remapInput(numProperArguments + en.index(), descr);
134 unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
136 for (
const auto &en :
llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
137 Value attribution = en.value();
138 auto type = cast<MemRefType>(attribution.
getType());
139 assert(type && type.hasStaticShape() &&
"unexpected type in attribution");
147 Value numElements = rewriter.
create<LLVM::ConstantOp>(
148 gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
149 uint64_t alignment = 0;
151 dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getPrivateAttributionAttr(
152 en.index(), LLVM::LLVMDialect::getAlignAttrName())))
153 alignment = alignAttr.getInt();
155 gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
159 numProperArguments + numWorkgroupAttributions + en.index(), descr);
167 &signatureConversion)))
177 auto memrefTy = dyn_cast<MemRefType>(en.value());
180 assert(memrefTy.hasStaticShape() &&
181 "Bare pointer convertion used with dynamically-shaped memrefs");
185 assert(remapping && remapping->size == 1 &&
186 "Type converter should produce 1-to-1 mapping for bare memrefs");
188 llvmFuncOp.getBody().getArgument(remapping->inputNo);
189 auto placeholder = rewriter.
create<LLVM::UndefOp>(
201 auto memrefTy = en.value().dyn_cast<MemRefType>();
203 ? argAttrs[en.index()].cast<DictionaryAttr>()
206 auto copyPointerAttribute = [&](StringRef attrName) {
207 Attribute attr = argAttr.erase(attrName);
213 if (remapping->size > 1 &&
214 attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
216 "Cannot copy noalias with non-bare pointers.\n");
219 for (
size_t i = 0, e = remapping->size; i < e; ++i) {
220 if (llvmFuncOp.getArgument(remapping->inputNo + i)
222 .isa<LLVM::LLVMPointerType>()) {
223 llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
232 copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
233 copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
234 copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
235 copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
236 copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
237 copyPointerAttribute(
238 LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
246 const char formatStringPrefix[] =
"printfFormat_";
248 unsigned stringNumber = 0;
251 stringConstName.clear();
252 (formatStringPrefix + Twine(stringNumber++)).toStringRef(stringConstName);
253 }
while (moduleOp.lookupSymbol(stringConstName));
254 return stringConstName;
257 template <
typename T>
261 LLVM::LLVMFunctionType type) {
262 LLVM::LLVMFuncOp ret;
263 if (!(ret = moduleOp.template lookupSymbol<LLVM::LLVMFuncOp>(name))) {
264 ConversionPatternRewriter::InsertionGuard guard(rewriter);
266 ret = rewriter.
create<LLVM::LLVMFuncOp>(loc, name, type,
267 LLVM::Linkage::External);
273 gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
275 Location loc = gpuPrintfOp->getLoc();
284 auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
289 LLVM::LLVMFuncOp ocklAppendArgs;
290 if (!adaptor.getArgs().empty()) {
292 moduleOp, loc, rewriter,
"__ockl_printf_append_args",
294 llvmI64, {llvmI64, llvmI32, llvmI64, llvmI64, llvmI64,
295 llvmI64, llvmI64, llvmI64, llvmI64, llvmI32}));
298 moduleOp, loc, rewriter,
"__ockl_printf_append_string_n",
301 {llvmI64, i8Ptr, llvmI64, llvmI32}));
304 Value zeroI64 = rewriter.
create<LLVM::ConstantOp>(loc, llvmI64, 0);
305 auto printfBeginCall = rewriter.
create<LLVM::CallOp>(loc, ocklBegin, zeroI64);
306 Value printfDesc = printfBeginCall.getResult();
312 formatString.push_back(
'\0');
313 size_t formatStringSize = formatString.size_in_bytes();
316 LLVM::GlobalOp global;
320 global = rewriter.
create<LLVM::GlobalOp>(
322 true, LLVM::Linkage::Internal, stringConstName,
327 Value globalPtr = rewriter.
create<LLVM::AddressOfOp>(
330 global.getSymNameAttr());
334 rewriter.
create<LLVM::ConstantOp>(loc, llvmI64, formatStringSize);
336 Value oneI32 = rewriter.
create<LLVM::ConstantOp>(loc, llvmI32, 1);
337 Value zeroI32 = rewriter.
create<LLVM::ConstantOp>(loc, llvmI32, 0);
339 auto appendFormatCall = rewriter.
create<LLVM::CallOp>(
340 loc, ocklAppendStringN,
341 ValueRange{printfDesc, stringStart, stringLen,
342 adaptor.getArgs().empty() ? oneI32 : zeroI32});
343 printfDesc = appendFormatCall.
getResult();
346 constexpr
size_t argsPerAppend = 7;
347 size_t nArgs = adaptor.getArgs().size();
348 for (
size_t group = 0; group < nArgs; group += argsPerAppend) {
349 size_t bound =
std::min(group + argsPerAppend, nArgs);
350 size_t numArgsThisCall = bound - group;
353 arguments.push_back(printfDesc);
355 rewriter.
create<LLVM::ConstantOp>(loc, llvmI32, numArgsThisCall));
356 for (
size_t i = group; i < bound; ++i) {
357 Value arg = adaptor.getArgs()[i];
358 if (
auto floatType = dyn_cast<FloatType>(arg.
getType())) {
359 if (!floatType.isF64())
360 arg = rewriter.
create<LLVM::FPExtOp>(
362 arg = rewriter.
create<LLVM::BitcastOp>(loc, llvmI64, arg);
365 arg = rewriter.
create<LLVM::ZExtOp>(loc, llvmI64, arg);
367 arguments.push_back(arg);
370 for (
size_t extra = numArgsThisCall; extra < argsPerAppend; ++extra) {
371 arguments.push_back(zeroI64);
374 auto isLast = (bound == nArgs) ? oneI32 : zeroI32;
375 arguments.push_back(isLast);
376 auto call = rewriter.
create<LLVM::CallOp>(loc, ocklAppendArgs, arguments);
384 gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
386 Location loc = gpuPrintfOp->getLoc();
394 auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
398 LLVM::LLVMFuncOp printfDecl =
405 formatString.push_back(
'\0');
408 LLVM::GlobalOp global;
412 global = rewriter.
create<LLVM::GlobalOp>(
414 true, LLVM::Linkage::Internal, stringConstName,
419 Value globalPtr = rewriter.
create<LLVM::AddressOfOp>(
422 global.getSymNameAttr());
427 auto argsRange = adaptor.getArgs();
429 printfArgs.reserve(argsRange.size() + 1);
430 printfArgs.push_back(stringStart);
431 printfArgs.append(argsRange.begin(), argsRange.end());
433 rewriter.
create<LLVM::CallOp>(loc, printfDecl, printfArgs);
439 gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
441 Location loc = gpuPrintfOp->getLoc();
449 auto moduleOp = gpuPrintfOp->getParentOfType<gpu::GPUModuleOp>();
453 LLVM::LLVMFuncOp vprintfDecl =
460 formatString.push_back(
'\0');
463 LLVM::GlobalOp global;
467 global = rewriter.
create<LLVM::GlobalOp>(
469 true, LLVM::Linkage::Internal, stringConstName,
474 Value globalPtr = rewriter.
create<LLVM::AddressOfOp>(loc, global);
480 for (
Value arg : adaptor.getArgs()) {
481 Type type = arg.getType();
482 Value promotedArg = arg;
484 if (isa<FloatType>(type)) {
486 promotedArg = rewriter.
create<LLVM::FPExtOp>(loc, type, arg);
488 types.push_back(type);
489 args.push_back(promotedArg);
496 Value tempAlloc = rewriter.
create<LLVM::AllocaOp>(loc, structPtrType, one,
502 rewriter.
create<LLVM::StoreOp>(loc, arg, ptr);
504 tempAlloc = rewriter.
create<LLVM::BitcastOp>(loc, i8Ptr, tempAlloc);
505 std::array<Value, 2> printfArgs = {stringStart, tempAlloc};
507 rewriter.
create<LLVM::CallOp>(loc, vprintfDecl, printfArgs);
517 if (llvm::none_of(operandTypes,
518 [](
Type type) {
return isa<VectorType>(type); })) {
530 Value result = rewriter.
create<LLVM::UndefOp>(loc, vectorType);
533 Type elementType = vectorType.getElementType();
535 for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
536 Value index = rewriter.
create<LLVM::ConstantOp>(loc, indexType, i);
537 auto extractElement = [&](
Value operand) ->
Value {
538 if (!isa<VectorType>(operand.getType()))
540 return rewriter.
create<LLVM::ExtractElementOp>(loc, operand, index);
542 auto scalarOperands = llvm::map_to_vector(operands, extractElement);
544 rewriter.
create(loc, name, scalarOperands, elementType, op->
getAttrs());
545 result = rewriter.
create<LLVM::InsertElementOp>(
546 loc, result, scalarOp->
getResult(0), index);
560 [mapping](
BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
561 gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
562 unsigned addressSpace = mapping(memorySpace);
static LLVM::LLVMFuncOp getOrDefineFunction(T &moduleOp, const Location loc, ConversionPatternRewriter &rewriter, StringRef name, LLVM::LLVMFunctionType type)
static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space)
static SmallString< 16 > getUniqueFormatGlobalName(gpu::GPUModuleOp moduleOp)
static std::string diag(const llvm::Value &value)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
This class provides a shared interface for ranked and unranked memref types.
This class represents an argument of a Block.
IntegerAttr getIndexAttr(int64_t value)
IntegerType getIntegerType(unsigned width)
StringAttr getStringAttr(const Twine &bytes)
MLIRContext * getContext() const
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
PatternRewriter hook for replacing an operation.
FailureOr< Block * > convertRegionTypes(Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion=nullptr)
Convert the types of block arguments within the given region.
LogicalResult notifyMatchFailure(Location loc, function_ref< void(Diagnostic &)> reasonCallback) override
PatternRewriter hook for notifying match failure reasons.
void inlineRegionBefore(Region ®ion, Region &parent, Region::iterator before) override
PatternRewriter hook for moving blocks out of a region.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
void replaceUsesOfBlockArgument(BlockArgument from, Value to)
Replace all the uses of the block argument from with value to.
const TypeConverter * typeConverter
An optional type converter for use by this pattern.
const LLVMTypeConverter * getTypeConverter() const
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
Conversion from types to the LLVM IR dialect.
Type convertFunctionSignature(FunctionType funcTy, bool isVariadic, bool useBarePtrCallConv, SignatureConversion &result) const
Convert a function type.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
LLVM::LLVMPointerType getPointerType(Type elementType, unsigned addressSpace=0) const
Creates an LLVM pointer type with the given element type and address space.
static LLVMStructType getLiteral(MLIRContext *context, ArrayRef< Type > types, bool isPacked=false)
Gets or creates a literal struct with the given body in the provided context.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, const LLVMTypeConverter &typeConverter, MemRefType type, Value memory)
Builds IR creating a MemRef descriptor that represents type and populates it with static shape and st...
NamedAttrList is array of NamedAttributes that tracks whether it is sorted and does some basic work t...
RAII guard to reset the insertion point of the builder when destroyed.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
unsigned getNumSuccessors()
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
unsigned getNumRegions()
Returns the number of regions held by this operation.
Location getLoc()
The source location the operation was defined or derived from.
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OperationName getName()
The name of an operation is the key identifier for it.
unsigned getNumResults()
Return the number of results held by this operation.
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
This class provides all of the information necessary to convert a type signature.
std::optional< InputMapping > getInputMapping(unsigned input) const
Get the input mapping for the given argument.
void remapInput(unsigned origInputNo, Value replacement)
Remap an input of the original signature to another replacement value.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
void addTypeAttributeConversion(FnT &&callback)
Register a conversion function for attributes within types.
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op if it's operating on vectors.
This header declares functions that assist transformations in the MemRef dialect.
LogicalResult failure(bool isFailure=true)
Utility function to generate a LogicalResult.
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
std::function< unsigned(gpu::AddressSpace)> MemorySpaceMapping
A function that maps a MemorySpace enum to a target-specific integer value.
LogicalResult success(bool isSuccess=true)
Utility function to generate a LogicalResult.
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
bool failed(LogicalResult result)
Utility function that returns true if the provided LogicalResult corresponds to a failure value.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
This class represents an efficient way to signal success or failure.