33 auto memRefType = cast<MemRefType>(from.
getType());
34 auto rank = memRefType.getRank();
42 if (rank < GPUDialect::getNumWorkgroupDimensions()) {
43 unsigned extraLoops = GPUDialect::getNumWorkgroupDimensions() - rank;
44 lbs.resize(extraLoops, zero);
45 ubs.resize(extraLoops, one);
46 steps.resize(extraLoops, one);
50 lbs.append(rank, zero);
51 ubs.reserve(lbs.size());
52 steps.reserve(lbs.size());
53 for (
auto idx = 0; idx < rank; ++idx) {
61 for (
auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z}) {
62 threadIds.push_back(b.
create<gpu::ThreadIdOp>(indexType, dim));
63 blockDims.push_back(b.
create<gpu::BlockDimOp>(indexType, dim));
69 b, b.
getLoc(), lbs, ubs, steps,
71 ivs.assign(loopIvs.begin(), loopIvs.end());
72 auto activeIvs = llvm::ArrayRef(ivs).take_back(rank);
73 Value loaded = b.create<memref::LoadOp>(loc, from, activeIvs);
74 b.create<memref::StoreOp>(loc, loaded, to, activeIvs);
80 GPUDialect::getNumWorkgroupDimensions())))) {
84 {blockDims[en.index()]});
123 auto fromType = cast<MemRefType>(from.
getType());
124 auto toType = cast<MemRefType>(to.
getType());
127 assert(fromType.getShape() == toType.getShape());
128 assert(fromType.getRank() != 0);
129 assert(llvm::hasSingleElement(region) &&
130 "unstructured control flow not supported");
134 b.create<gpu::BarrierOp>();
136 b.setInsertionPoint(®ion.
front().
back());
137 b.create<gpu::BarrierOp>();
144 Value value = op.getArgument(arg);
145 auto type = dyn_cast<MemRefType>(value.
getType());
146 assert(type && type.hasStaticShape() &&
"can only promote memrefs");
150 op->getContext(), gpu::AddressSpace::Workgroup);
151 auto bufferType =
MemRefType::get(type.getShape(), type.getElementType(),
152 MemRefLayoutAttrInterface{},
154 Value attribution = op.addWorkgroupAttribution(bufferType, value.
getLoc());
159 insertCopies(op.getBody(), op.getLoc(), value, attribution);
Attributes are known-constant values of operations.
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Location getLoc() const
Accessors for the implied location.
static ImplicitLocOpBuilder atBlockBegin(Location loc, Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the first operation in the block but still ins...
OpTy create(Args &&...args)
Create an operation of specific op type at the current insertion point and location.
void createOrFold(llvm::SmallVectorImpl< Value > &results, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
This class helps build Operations.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Operation * getParentOp()
Return the parent operation this region is attached to.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
void replaceAllUsesWith(Value newValue)
Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...
Location getLoc() const
Return the location of this value.
Region * getParentRegion()
Return the Region in which this Value is defined.
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)
Creates a perfect nest of "for" loops, i.e.
Include the generated interface declarations.
void promoteToWorkgroupMemory(gpu::GPUFuncOp op, unsigned arg)
Promotes a function argument to workgroup memory in the given function.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...