doxygen/DropUnitDims_8cpp_source.html

//===- DropUnitDims.cpp - Pass to drop use of unit-extent for broadcasting ===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file implements patterns/pass to remove usage of unit-extent dimensions

// to specify broadcasting in favor of more canonical representation of the

// computation

//

//===----------------------------------------------------------------------===//


#include "mlir/Dialect/Linalg/Passes.h"


#include "mlir/Dialect/Affine/IR/AffineOps.h"

#include "mlir/Dialect/Arith/IR/Arith.h"

#include "mlir/Dialect/Linalg/IR/Linalg.h"

#include "mlir/Dialect/Linalg/Transforms/Transforms.h"

#include "mlir/Dialect/Linalg/Utils/Utils.h"

#include "mlir/Dialect/MemRef/Transforms/Transforms.h"

#include "mlir/Dialect/Tensor/IR/Tensor.h"

#include "mlir/Dialect/Tensor/Transforms/Transforms.h"

#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"

#include "mlir/IR/AffineExpr.h"

#include "mlir/IR/AffineMap.h"

#include "mlir/IR/BuiltinTypes.h"

#include "mlir/Transforms/FoldUtils.h"

#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

#include "llvm/Support/Debug.h"


namespace mlir {

#define GEN_PASS_DEF_LINALGFOLDUNITEXTENTDIMSPASS

#include "mlir/Dialect/Linalg/Passes.h.inc"

} // namespace mlir


#define DEBUG_TYPE "linalg-drop-unit-dims"


using namespace mlir;

using namespace mlir::linalg;


namespace {

/// Pattern to move init operands to ins when all the loops are parallel and

/// blockArgument corresponding to init is used in the region. This is a fix-up

/// when unit reduction dimensions are all folded away. In this context, it

/// becomes a elementwise generic op. E.g., it converts

///

///  %0 = tensor.empty() : tensor<1x1xf32>

///  %1 = linalg.fill

///    ins(%cst : f32)

///    outs(%0 : tensor<1x1xf32>) -> tensor<1x1xf32>

///  %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (0, d0, 0, 0)>,

///                                        affine_map<(d0) -> (0, d0)>],

///                       iterator_types = ["parallel"]}

///    ins(%arg0 : tensor<1x?x1x1xf32>)

///    outs(%1 : tensor<1x1xf32>) {

///  ^bb0(%in: f32, %out: f32):

///    %3 = arith.addf %in, %out : f32

///    linalg.yield %3 : f32

///  } -> tensor<1x1xf32>

///

///  into

///

///  %0 = tensor.empty() : tensor<1x1xf32>

///  %1 = linalg.fill

///    ins(%cst : f32)

///    outs(%0 : tensor<1x1xf32>) -> tensor<1x1xf32>

///  %2 = tensor.empty() : tensor<1x1xf32>

///  %3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (0, d0, 0, 0)>,

///                                        affine_map<(d0) -> (0, d0)>,

///                                        affine_map<(d0) -> (0, d0)>],

///                       iterator_types = ["parallel"]}

///   ins(%arg0, %1 : tensor<1x?x1x1xf32>, tensor<1x1xf32>)

///   outs(%2 : tensor<1x1xf32>) {

///  ^bb0(%in: f32, %in_0: f32, %out: f32):

///    %4 = arith.addf %in, %in_0 : f32

///    linalg.yield %4 : f32

///  } -> tensor<1x1xf32>

struct MoveInitOperandsToInput : public OpRewritePattern<GenericOp> {

  using OpRewritePattern<GenericOp>::OpRewritePattern;

  LogicalResult matchAndRewrite(GenericOp genericOp,

                                PatternRewriter &rewriter) const override {

    if (!genericOp.hasPureTensorSemantics())

      return failure();

    if (genericOp.getNumParallelLoops() != genericOp.getNumLoops())

      return failure();


    auto outputOperands = genericOp.getDpsInitsMutable();

    SetVector<OpOperand *> candidates;

    for (OpOperand &op : outputOperands) {

      if (genericOp.getMatchingBlockArgument(&op).use_empty())

        continue;

      candidates.insert(&op);

    }


    if (candidates.empty())

      return failure();


    // Compute the modified indexing maps.

    int64_t origNumInput = genericOp.getNumDpsInputs();

    SmallVector<Value> newInputOperands = genericOp.getDpsInputs();

    SmallVector<AffineMap> indexingMaps = genericOp.getIndexingMapsArray();

    SmallVector<AffineMap> newIndexingMaps;

    newIndexingMaps.append(indexingMaps.begin(),

                           std::next(indexingMaps.begin(), origNumInput));

    for (OpOperand *op : candidates) {

      newInputOperands.push_back(op->get());

      newIndexingMaps.push_back(genericOp.getMatchingIndexingMap(op));

    }

    newIndexingMaps.append(std::next(indexingMaps.begin(), origNumInput),

                           indexingMaps.end());


    Location loc = genericOp.getLoc();

    SmallVector<Value> newOutputOperands =

        llvm::to_vector(genericOp.getDpsInits());

    for (OpOperand *op : candidates) {

      OpBuilder::InsertionGuard guard(rewriter);

      rewriter.setInsertionPointAfterValue(op->get());

      auto elemType = cast<ShapedType>(op->get().getType()).getElementType();

      auto empty = tensor::EmptyOp::create(

          rewriter, loc, tensor::getMixedSizes(rewriter, loc, op->get()),

          elemType);


      unsigned start = genericOp.getDpsInits().getBeginOperandIndex();

      newOutputOperands[op->getOperandNumber() - start] = empty.getResult();

    }


    auto newOp = GenericOp::create(

        rewriter, loc, genericOp.getResultTypes(), newInputOperands,

        newOutputOperands, newIndexingMaps, genericOp.getIteratorTypesArray(),

        /*bodyBuild=*/nullptr, linalg::getPrunedAttributeList(genericOp));


    OpBuilder::InsertionGuard guard(rewriter);

    Region &region = newOp.getRegion();

    Block *block = rewriter.createBlock(&region);

    IRMapping mapper;

    for (auto bbarg : genericOp.getRegionInputArgs())

      mapper.map(bbarg, block->addArgument(bbarg.getType(), loc));


    for (OpOperand *op : candidates) {

      BlockArgument bbarg = genericOp.getMatchingBlockArgument(op);

      mapper.map(bbarg, block->addArgument(bbarg.getType(), loc));

    }


    for (OpOperand &op : outputOperands) {

      BlockArgument bbarg = genericOp.getMatchingBlockArgument(&op);

      if (candidates.count(&op))

        block->addArgument(bbarg.getType(), loc);

      else

        mapper.map(bbarg, block->addArgument(bbarg.getType(), loc));

    }


    for (auto &op : genericOp.getBody()->getOperations()) {

      rewriter.clone(op, mapper);

    }

    rewriter.replaceOp(genericOp, newOp.getResults());


    return success();

  }

};

} // namespace


//===---------------------------------------------------------------------===//

// Drop loops that are unit-extents within Linalg operations.

//===---------------------------------------------------------------------===//


/// Implements a pass that canonicalizes the uses of unit-extent dimensions for

/// broadcasting. For example,

///

/// ```mlir

/// #accesses = [

///   affine_map<(d0, d1) -> (0, d1)>,

///   affine_map<(d0, d1) -> (d0, 0)>,

///   affine_map<(d0, d1) -> (d0, d1)>

/// ]

///

/// #trait = {

///   indexing_maps = #accesses,

///   iterator_types = ["parallel", "parallel"],

///   library_call = "some_external_fn"

/// }

///

/// func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) ->

/// tensor<5x5xf32>

/// {

///   %0 = linalg.tensor_reshape %arg0 [affine_map<(d0, d1) -> (d0, d1)>] :

///        tensor<5xf32> into tensor<1x5xf32>

///   %1 = linalg.tensor_reshape %arg1 [affine_map<(d0, d1) -> (d0, d1)>] :

///        tensor<5xf32> into tensor<5x1xf32>

///   %2 = linalg.generic #trait %0, %1 {

///        ^bb0(%arg2: f32, %arg3: f32):

///          %3 = arith.addf %arg2, %arg3 : f32

///          linalg.yield %3 : f32

///        } : tensor<1x5xf32>, tensor<5x1xf32> -> tensor<5x5xf32>

///   return %2 : tensor<5x5xf32>

/// }

///

/// would canonicalize to

///

/// ```mlir

/// #accesses = [

///   affine_map<(d0, d1) -> (d1)>,

///   affine_map<(d0, d1) -> (d0)>,

///   affine_map<(d0, d1) -> (d0, d1)>

/// ]

///

/// #trait = {

///   indexing_maps = #accesses,

///   iterator_types = ["parallel", "parallel"],

///   library_call = "some_external_fn"

/// }

///

/// func @broadcast_test(%arg0 : tensor<5xf32>, %arg1 : tensor<5xf32>) ->

/// tensor<5x5xf32>

/// {

///   %0 = linalg.generic #trait %arg0, %arg1 {

///        ^bb0(%arg2: f32, %arg3: f32):

///          %3 = arith.addf %arg2, %arg3 : f32

///          linalg.yield %3 : f32

///        } : tensor<5xf32>, tensor<5xf32> -> tensor<5x5xf32>

///   return %0 : tensor<5x5xf32>

/// }


/// Update the index accesses of linalg operations having index semantics.

static void


replaceUnitDimIndexOps(GenericOp genericOp,

                       const llvm::SmallDenseSet<unsigned> &unitDims,

                       RewriterBase &rewriter) {

  for (IndexOp indexOp :

       llvm::make_early_inc_range(genericOp.getBody()->getOps<IndexOp>())) {

    OpBuilder::InsertionGuard guard(rewriter);

    rewriter.setInsertionPoint(indexOp);

    if (unitDims.count(indexOp.getDim()) != 0) {

      rewriter.replaceOpWithNewOp<arith::ConstantIndexOp>(indexOp, 0);

    } else {

      // Update the dimension of the index operation if needed.

      unsigned droppedDims = llvm::count_if(

          unitDims, [&](unsigned dim) { return dim < indexOp.getDim(); });

      if (droppedDims != 0)

        rewriter.replaceOpWithNewOp<IndexOp>(indexOp,

                                             indexOp.getDim() - droppedDims);

    }

  }

}


/// Expand the given `value` so that the type matches the type of `origDest`.

/// The `reassociation` is used when `rankReductionStrategy` is set to

/// `RankReductionStrategy::ReassociativeReshape`.

static Value


expandValue(RewriterBase &rewriter, Location loc, Value result, Value origDest,

            ArrayRef<ReassociationIndices> reassociation,

            ControlDropUnitDims::RankReductionStrategy rankReductionStrategy) {

  // There are no results for memref outputs.

  auto origResultType = cast<RankedTensorType>(origDest.getType());

  if (rankReductionStrategy ==

      ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {

    unsigned rank = origResultType.getRank();

    SmallVector<OpFoldResult> offsets(rank, rewriter.getIndexAttr(0));

    SmallVector<OpFoldResult> sizes =

        tensor::getMixedSizes(rewriter, loc, origDest);

    SmallVector<OpFoldResult> strides(rank, rewriter.getIndexAttr(1));

    return rewriter.createOrFold<tensor::InsertSliceOp>(

        loc, result, origDest, offsets, sizes, strides);

  }


  assert(rankReductionStrategy ==

             ControlDropUnitDims::RankReductionStrategy::ReassociativeReshape &&

         "unknown rank reduction strategy");

  return tensor::ExpandShapeOp::create(rewriter, loc, origResultType, result,

                                       reassociation)

      .getResult();

}


/// Collapse the given `value` so that the type matches the type of

/// `origOutput`. The `reassociation` is used when `rankReductionStrategy` is

/// set to `RankReductionStrategy::ReassociativeReshape`.


static Value collapseValue(

    RewriterBase &rewriter, Location loc, Value operand,

    ArrayRef<int64_t> targetShape, ArrayRef<ReassociationIndices> reassociation,

    ControlDropUnitDims::RankReductionStrategy rankReductionStrategy) {

  if (auto memrefType = dyn_cast<MemRefType>(operand.getType())) {

    if (rankReductionStrategy ==

        ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {

      FailureOr<Value> rankReducingExtract =

          memref::SubViewOp::rankReduceIfNeeded(rewriter, loc, operand,

                                                targetShape);

      assert(succeeded(rankReducingExtract) && "not a unit-extent collapse");

      return *rankReducingExtract;

    }


    assert(

        rankReductionStrategy ==

            ControlDropUnitDims::RankReductionStrategy::ReassociativeReshape &&

        "unknown rank reduction strategy");

    MemRefLayoutAttrInterface layout;

    auto targetType = MemRefType::get(targetShape, memrefType.getElementType(),

                                      layout, memrefType.getMemorySpace());

    return memref::CollapseShapeOp::create(rewriter, loc, targetType, operand,

                                           reassociation);

  }

  if (auto tensorType = dyn_cast<RankedTensorType>(operand.getType())) {

    if (rankReductionStrategy ==

        ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {

      FailureOr<Value> rankReducingExtract =

          tensor::ExtractSliceOp::rankReduceIfNeeded(rewriter, loc, operand,

                                                     targetShape);

      assert(succeeded(rankReducingExtract) && "not a unit-extent collapse");

      return *rankReducingExtract;

    }


    assert(

        rankReductionStrategy ==

            ControlDropUnitDims::RankReductionStrategy::ReassociativeReshape &&

        "unknown rank reduction strategy");

    auto targetType =

        RankedTensorType::get(targetShape, tensorType.getElementType());

    return tensor::CollapseShapeOp::create(rewriter, loc, targetType, operand,

                                           reassociation);

  }

  llvm_unreachable("unsupported operand type");

}


/// Compute the modified metadata for an operands of operation

/// whose unit dims are being dropped. Return the new indexing map

/// to use, the shape of the operand in the replacement op

/// and the `reassocation` to use to go from original operand shape

/// to modified operand shape.


struct UnitExtentReplacementInfo {

  AffineMap indexMap;

  SmallVector<ReassociationIndices> reassociation;

  SmallVector<int64_t> targetShape;

};


static UnitExtentReplacementInfo dropUnitExtentFromOperandMetadata(

    MLIRContext *context, IndexingMapOpInterface op, OpOperand *opOperand,

    llvm::SmallDenseMap<unsigned, unsigned> &oldDimsToNewDimsMap,

    ArrayRef<AffineExpr> dimReplacements) {

  UnitExtentReplacementInfo info;

  ReassociationIndices reassociationGroup;

  SmallVector<AffineExpr> newIndexExprs;

  AffineMap indexingMap = op.getMatchingIndexingMap(opOperand);

  SmallVector<int64_t> operandShape = op.getStaticOperandShape(opOperand);

  ArrayRef<AffineExpr> exprs = indexingMap.getResults();


  auto isUnitDim = [&](unsigned dim) {

    if (auto dimExpr = dyn_cast<AffineDimExpr>(exprs[dim])) {

      unsigned oldPosition = dimExpr.getPosition();

      return !oldDimsToNewDimsMap.count(oldPosition) &&

             (operandShape[dim] == 1);

    }

    // Handle the other case where the shape is 1, and is accessed using a

    // constant 0.

    if (operandShape[dim] == 1) {

      auto constAffineExpr = dyn_cast<AffineConstantExpr>(exprs[dim]);

      return constAffineExpr && constAffineExpr.getValue() == 0;

    }

    return false;

  };


  unsigned dim = 0;

  while (dim < operandShape.size() && isUnitDim(dim))

    reassociationGroup.push_back(dim++);

  while (dim < operandShape.size()) {

    assert(!isUnitDim(dim) && "expected non unit-extent");

    reassociationGroup.push_back(dim);

    AffineExpr newExpr = exprs[dim].replaceDims(dimReplacements);

    newIndexExprs.push_back(newExpr);

    info.targetShape.push_back(operandShape[dim]);

    ++dim;

    // Fold all following dimensions that are unit-extent.

    while (dim < operandShape.size() && isUnitDim(dim)) {

      reassociationGroup.push_back(dim++);

    }

    info.reassociation.push_back(reassociationGroup);

    reassociationGroup.clear();

  }

  info.indexMap =

      AffineMap::get(oldDimsToNewDimsMap.size(), indexingMap.getNumSymbols(),

                     newIndexExprs, context);

  return info;

}


FailureOr<DropUnitDimsResult>


linalg::dropUnitDims(RewriterBase &rewriter, IndexingMapOpInterface op,

                     const DroppedUnitDimsBuilder &droppedUnitDimsBuilder,

                     const ControlDropUnitDims &options) {

  auto dpsOp = dyn_cast<DestinationStyleOpInterface>(op.getOperation());

  if (!dpsOp) {

    return rewriter.notifyMatchFailure(

        op, "op should implement DestinationStyleOpInterface");

  }


  SmallVector<AffineMap> indexingMaps = op.getIndexingMapsArray();

  if (indexingMaps.empty())

    return failure();


  // 1. Check if any of the iteration dimensions are unit-trip count. They will

  //    end up being unit-trip count if they are used to index into a unit-dim

  //    tensor/memref.

  AffineMap invertedMap =

      inversePermutation(concatAffineMaps(indexingMaps, rewriter.getContext()));

  if (!invertedMap) {

    return rewriter.notifyMatchFailure(op,

                                       "invalid indexing maps for operation");

  }


  SmallVector<int64_t> allShapesSizes;

  for (OpOperand &opOperand : op->getOpOperands())

    llvm::append_range(allShapesSizes, op.getStaticOperandShape(&opOperand));


  // 1a. Get the allowed list of dimensions to drop from the `options`.

  SmallVector<unsigned> allowedUnitDims = options.controlFn(op);

  if (allowedUnitDims.empty()) {

    return rewriter.notifyMatchFailure(

        op, "control function returns no allowed unit dims to prune");

  }

  llvm::SmallDenseSet<unsigned> unitDimsFilter(allowedUnitDims.begin(),

                                               allowedUnitDims.end());

  llvm::SmallDenseSet<unsigned> unitDims;

  for (const auto &expr : enumerate(invertedMap.getResults())) {

    if (AffineDimExpr dimExpr = dyn_cast<AffineDimExpr>(expr.value())) {

      if (allShapesSizes[dimExpr.getPosition()] == 1 &&

          unitDimsFilter.count(expr.index()))

        unitDims.insert(expr.index());

    }

  }


  // 2. Compute the new loops of the modified op by dropping the one-trip

  //    count loops.

  llvm::SmallDenseMap<unsigned, unsigned> oldDimToNewDimMap;

  SmallVector<AffineExpr> dimReplacements;

  unsigned newDims = 0;

  for (auto index : llvm::seq<int64_t>(op.getStaticLoopRanges().size())) {

    if (unitDims.count(index)) {

      dimReplacements.push_back(

          getAffineConstantExpr(0, rewriter.getContext()));

    } else {

      oldDimToNewDimMap[index] = newDims;

      dimReplacements.push_back(

          getAffineDimExpr(newDims, rewriter.getContext()));

      newDims++;

    }

  }


  // 3. For each of the operands, find the

  //    - modified affine map to use.

  //    - shape of the operands after the unit-dims are dropped.

  //    - the reassociation indices used to convert from the original

  //      operand type to modified operand (needed only when using reshapes

  //      for rank reduction strategy)

  // Note that the indexing maps might need changing even if there are no

  // unit dimensions that are dropped to handle cases where `0` is used to

  // access a unit-extent tensor. Consider moving this out of this specific

  // transformation as a stand-alone transformation. Kept here right now due

  // to legacy.

  SmallVector<AffineMap> newIndexingMaps;

  SmallVector<SmallVector<ReassociationIndices>> reassociations;

  SmallVector<SmallVector<int64_t>> targetShapes;

  SmallVector<bool> collapsed;

  auto hasCollapsibleType = [](OpOperand &operand) {

    Type operandType = operand.get().getType();

    if (auto memrefOperandType = dyn_cast_or_null<MemRefType>(operandType)) {

      return memrefOperandType.getLayout().isIdentity();

    }

    if (auto tensorOperandType = dyn_cast<RankedTensorType>(operandType)) {

      return tensorOperandType.getEncoding() == nullptr;

    }

    return false;

  };

  for (OpOperand &opOperand : op->getOpOperands()) {

    auto indexingMap = op.getMatchingIndexingMap(&opOperand);

    SmallVector<int64_t> shape = op.getStaticOperandShape(&opOperand);

    if (!hasCollapsibleType(opOperand)) {

      AffineMap newIndexingMap = indexingMap.replaceDimsAndSymbols(

          dimReplacements, ArrayRef<AffineExpr>{}, oldDimToNewDimMap.size(), 0);

      newIndexingMaps.push_back(newIndexingMap);

      targetShapes.push_back(llvm::to_vector(shape));

      collapsed.push_back(false);

      reassociations.push_back({});

      continue;

    }

    auto replacementInfo =

        dropUnitExtentFromOperandMetadata(rewriter.getContext(), op, &opOperand,

                                          oldDimToNewDimMap, dimReplacements);

    reassociations.push_back(replacementInfo.reassociation);

    newIndexingMaps.push_back(replacementInfo.indexMap);

    targetShapes.push_back(replacementInfo.targetShape);

    collapsed.push_back(!(replacementInfo.indexMap.getNumResults() ==

                          indexingMap.getNumResults()));

  }


  // Abort if the indexing maps of the result operation are not invertible

  // (i.e. not legal) or if no dimension was reduced.

  if (newIndexingMaps == indexingMaps ||

      !inversePermutation(

          concatAffineMaps(newIndexingMaps, rewriter.getContext())))

    return failure();


  Location loc = op.getLoc();

  // 4. For each of the operands, collapse the operand to convert

  //    from original shape to shape in the modified operation if needed,

  //    either through use of reshapes or rank-reducing slices as

  //    specified in `options`.

  SmallVector<Value> newOperands;

  for (OpOperand &opOperand : op->getOpOperands()) {

    int64_t idx = opOperand.getOperandNumber();

    if (!collapsed[idx]) {

      newOperands.push_back(opOperand.get());

      continue;

    }

    newOperands.push_back(collapseValue(rewriter, loc, opOperand.get(),

                                        targetShapes[idx], reassociations[idx],

                                        options.rankReductionStrategy));

  }


  IndexingMapOpInterface replacementOp = droppedUnitDimsBuilder(

      loc, rewriter, op, newOperands, newIndexingMaps, unitDims);


  // 6. If any result type changes, insert a reshape/slice to convert from the

  //    original type to the new type.

  SmallVector<Value> resultReplacements;

  for (auto [index, result] : llvm::enumerate(replacementOp->getResults())) {

    unsigned opOperandIndex = index + dpsOp.getNumDpsInputs();

    Value origDest = dpsOp.getDpsInitOperand(index)->get();

    if (!collapsed[opOperandIndex]) {

      resultReplacements.push_back(result);

      continue;

    }

    Value expandedValue = expandValue(rewriter, loc, result, origDest,

                                      reassociations[opOperandIndex],

                                      options.rankReductionStrategy);

    resultReplacements.push_back(expandedValue);

  }


  return DropUnitDimsResult{replacementOp, resultReplacements};

}


FailureOr<DropUnitDimsResult>


linalg::dropUnitDims(RewriterBase &rewriter, GenericOp genericOp,

                     const ControlDropUnitDims &options) {


  DroppedUnitDimsBuilder build =

      [](Location loc, OpBuilder &b, IndexingMapOpInterface op,

         ArrayRef<Value> newOperands, ArrayRef<AffineMap> newIndexingMaps,

         const llvm::SmallDenseSet<unsigned> &droppedDims)

      -> IndexingMapOpInterface {

    auto genericOp = cast<GenericOp>(op);

    // Compute the iterator types of the modified op by dropping the one-trip

    // count loops.

    SmallVector<utils::IteratorType> newIteratorTypes;

    for (auto [index, attr] :

         llvm::enumerate(genericOp.getIteratorTypesArray())) {

      if (!droppedDims.count(index))

        newIteratorTypes.push_back(attr);

    }


    // Create the `linalg.generic` operation with the new operands,

    //    indexing maps, iterator types and result types.

    ArrayRef<Value> newInputs =

        ArrayRef<Value>(newOperands).take_front(genericOp.getNumDpsInputs());

    ArrayRef<Value> newOutputs =

        ArrayRef<Value>(newOperands).take_back(genericOp.getNumDpsInits());

    SmallVector<Type> resultTypes;

    resultTypes.reserve(genericOp.getNumResults());

    for (unsigned i : llvm::seq<unsigned>(0, genericOp.getNumResults()))

      resultTypes.push_back(newOutputs[i].getType());

    GenericOp replacementOp =

        GenericOp::create(b, loc, resultTypes, newInputs, newOutputs,

                          newIndexingMaps, newIteratorTypes);

    b.cloneRegionBefore(genericOp.getRegion(), replacementOp.getRegion(),

                        replacementOp.getRegion().begin());

    // 5a. Replace `linalg.index` operations that refer to the dropped unit

    //     dimensions.

    IRRewriter rewriter(b);

    replaceUnitDimIndexOps(replacementOp, droppedDims, rewriter);


    return replacementOp;

  };


  return dropUnitDims(rewriter, genericOp, build, options);

}


namespace {

struct DropUnitDims : public OpRewritePattern<GenericOp> {

  DropUnitDims(MLIRContext *context, ControlDropUnitDims options = {},

               PatternBenefit benefit = 1)

      : OpRewritePattern(context, benefit), options(std::move(options)) {}


  LogicalResult matchAndRewrite(GenericOp genericOp,

                                PatternRewriter &rewriter) const override {

    FailureOr<DropUnitDimsResult> result =

        dropUnitDims(rewriter, genericOp, options);

    if (failed(result)) {

      return failure();

    }

    rewriter.replaceOp(genericOp, result->replacements);

    return success();

  }


private:

  ControlDropUnitDims options;

};

} // namespace


//===---------------------------------------------------------------------===//

// Drop dimensions that are unit-extents within tensor operations.

//===---------------------------------------------------------------------===//


namespace {

struct DropPadUnitDims : public OpRewritePattern<tensor::PadOp> {

  DropPadUnitDims(MLIRContext *context, ControlDropUnitDims options = {},

                  PatternBenefit benefit = 1)

      : OpRewritePattern(context, benefit), options(std::move(options)) {}


  LogicalResult matchAndRewrite(tensor::PadOp padOp,

                                PatternRewriter &rewriter) const override {

    // 1a. Get the allowed list of dimensions to drop from the `options`.

    SmallVector<unsigned> allowedUnitDims = options.controlFn(padOp);

    if (allowedUnitDims.empty()) {

      return rewriter.notifyMatchFailure(

          padOp, "control function returns no allowed unit dims to prune");

    }


    if (padOp.getSourceType().getEncoding()) {

      return rewriter.notifyMatchFailure(

          padOp, "cannot collapse dims of tensor with encoding");

    }


    // Fail for non-constant padding values. The body of the pad could

    // depend on the padding indices and/or properties of the padded

    // tensor so for now we fail.

    // TODO: Support non-constant padding values.

    Value paddingVal = padOp.getConstantPaddingValue();

    if (!paddingVal) {

      return rewriter.notifyMatchFailure(

          padOp, "unimplemented: non-constant padding value");

    }


    ArrayRef<int64_t> sourceShape = padOp.getSourceType().getShape();

    ArrayRef<int64_t> resultShape = padOp.getResultType().getShape();

    int64_t padRank = sourceShape.size();


    auto isStaticZero = [](OpFoldResult f) {

      return getConstantIntValue(f) == 0;

    };


    llvm::SmallDenseSet<unsigned> unitDimsFilter(allowedUnitDims.begin(),

                                                 allowedUnitDims.end());

    llvm::SmallDenseSet<unsigned> unitDims;

    SmallVector<int64_t> newShape;

    SmallVector<int64_t> newResultShape;

    SmallVector<OpFoldResult> newLowPad;

    SmallVector<OpFoldResult> newHighPad;

    for (const auto [dim, size, outSize, low, high] : zip_equal(

             llvm::seq(static_cast<int64_t>(0), padRank), sourceShape,

             resultShape, padOp.getMixedLowPad(), padOp.getMixedHighPad())) {

      if (unitDimsFilter.contains(dim) && size == 1 && isStaticZero(low) &&

          isStaticZero(high)) {

        unitDims.insert(dim);

      } else {

        newShape.push_back(size);

        newResultShape.push_back(outSize);

        newLowPad.push_back(low);

        newHighPad.push_back(high);

      }

    }


    if (unitDims.empty()) {

      return rewriter.notifyMatchFailure(padOp, "no unit dims to collapse");

    }


    ReassociationIndices reassociationGroup;

    SmallVector<ReassociationIndices> reassociationMap;

    int64_t dim = 0;

    while (dim < padRank && unitDims.contains(dim))

      reassociationGroup.push_back(dim++);

    while (dim < padRank) {

      assert(!unitDims.contains(dim) && "expected non unit-extent");

      reassociationGroup.push_back(dim);

      dim++;

      // Fold all following dimensions that are unit-extent.

      while (dim < padRank && unitDims.contains(dim))

        reassociationGroup.push_back(dim++);

      reassociationMap.push_back(reassociationGroup);

      reassociationGroup.clear();

    }


    Value collapsedSource =

        collapseValue(rewriter, padOp.getLoc(), padOp.getSource(), newShape,

                      reassociationMap, options.rankReductionStrategy);


    auto newResultType = RankedTensorType::get(

        newResultShape, padOp.getResultType().getElementType());

    auto newPadOp = tensor::PadOp::create(

        rewriter, padOp.getLoc(), /*result=*/newResultType, collapsedSource,

        newLowPad, newHighPad, paddingVal, padOp.getNofold());


    Value dest = padOp.getResult();

    if (options.rankReductionStrategy ==

        ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {

      SmallVector<OpFoldResult> expandedSizes;

      int64_t numUnitDims = 0;

      for (auto dim : llvm::seq(static_cast<int64_t>(0), padRank)) {

        if (unitDims.contains(dim)) {

          expandedSizes.push_back(rewriter.getIndexAttr(1));

          numUnitDims++;

          continue;

        }

        expandedSizes.push_back(tensor::getMixedSize(

            rewriter, padOp.getLoc(), newPadOp, dim - numUnitDims));

      }

      dest = tensor::EmptyOp::create(rewriter, padOp.getLoc(), expandedSizes,

                                     padOp.getResultType().getElementType());

    }


    Value expandedValue =

        expandValue(rewriter, padOp.getLoc(), newPadOp.getResult(), dest,


                    reassociationMap, options.rankReductionStrategy);

    rewriter.replaceOp(padOp, expandedValue);

    return success();

  }


private:

  ControlDropUnitDims options;

};

} // namespace


namespace {

/// Convert `extract_slice` operations to rank-reduced versions.


struct RankReducedExtractSliceOp

    : public OpRewritePattern<tensor::ExtractSliceOp> {

  using OpRewritePattern<tensor::ExtractSliceOp>::OpRewritePattern;


  LogicalResult matchAndRewrite(tensor::ExtractSliceOp sliceOp,

                                PatternRewriter &rewriter) const override {

    RankedTensorType resultType = sliceOp.getType();

    SmallVector<OpFoldResult> targetShape;


    for (auto size : resultType.getShape())

      targetShape.push_back(rewriter.getIndexAttr(size));

    auto reassociation = getReassociationMapForFoldingUnitDims(targetShape);


    if (!reassociation ||

        reassociation->size() == static_cast<size_t>(resultType.getRank()))

      return failure();


    SmallVector<OpFoldResult> offsets = sliceOp.getMixedOffsets();

    SmallVector<OpFoldResult> strides = sliceOp.getMixedStrides();


    SmallVector<OpFoldResult> sizes = sliceOp.getMixedSizes();

    auto rankReducedType = cast<RankedTensorType>(


        tensor::ExtractSliceOp::inferCanonicalRankReducedResultType(

            reassociation->size(), sliceOp.getSourceType(), offsets, sizes,

            strides));


    Location loc = sliceOp.getLoc();


    Value newSlice = tensor::ExtractSliceOp::create(

        rewriter, loc, rankReducedType, sliceOp.getSource(), offsets, sizes,

        strides);

    rewriter.replaceOpWithNewOp<tensor::ExpandShapeOp>(

        sliceOp, resultType, newSlice, *reassociation);


    return success();

  }

};


/// Convert `insert_slice` operations to rank-reduced versions.

/// This patterns works with both InsertSliceOp and ParallelInsertSliceOp.


template <typename InsertOpTy>

struct RankReducedInsertSliceOp : public OpRewritePattern<InsertOpTy> {

  using OpRewritePattern<InsertOpTy>::OpRewritePattern;


  LogicalResult matchAndRewrite(InsertOpTy insertSliceOp,

                                PatternRewriter &rewriter) const override {

    RankedTensorType sourceType = insertSliceOp.getSourceType();


    SmallVector<OpFoldResult> targetShape;

    for (auto size : sourceType.getShape())

      targetShape.push_back(rewriter.getIndexAttr(size));


    auto reassociation = getReassociationMapForFoldingUnitDims(targetShape);


    if (!reassociation ||

        reassociation->size() == static_cast<size_t>(sourceType.getRank()))

      return failure();


    Location loc = insertSliceOp.getLoc();

    tensor::CollapseShapeOp reshapedSource;


    {

      OpBuilder::InsertionGuard g(rewriter);

      // The only difference between InsertSliceOp and ParallelInsertSliceOp


      // is the insertion point is just before the ParallelCombiningOp in the


      // parallel case.

      if (std::is_same<InsertOpTy, tensor::ParallelInsertSliceOp>::value)

        rewriter.setInsertionPoint(insertSliceOp->getParentOp());


      reshapedSource = tensor::CollapseShapeOp::create(

          rewriter, loc, insertSliceOp.getSource(), *reassociation);

    }

    rewriter.replaceOpWithNewOp<InsertOpTy>(

        insertSliceOp, reshapedSource, insertSliceOp.getDest(),

        insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(),

        insertSliceOp.getMixedStrides());

    return success();

  }

};

} // namespace


/// Patterns that are used to canonicalize the use of unit-extent dims for

/// broadcasting.

static void


populateFoldUnitExtentDimsViaReshapesPatterns(RewritePatternSet &patterns,

                                              ControlDropUnitDims &options) {

  auto *context = patterns.getContext();

  patterns.add<DropUnitDims>(context, options);

  patterns.add<DropPadUnitDims>(context, options);

  // TODO: Patterns unrelated to unit dim folding should be factored out.

  patterns.add<RankReducedExtractSliceOp,

               RankReducedInsertSliceOp<tensor::InsertSliceOp>,

               RankReducedInsertSliceOp<tensor::ParallelInsertSliceOp>>(

      context);

  linalg::FillOp::getCanonicalizationPatterns(patterns, context);

  tensor::CollapseShapeOp::getCanonicalizationPatterns(patterns, context);

  tensor::EmptyOp::getCanonicalizationPatterns(patterns, context);

  tensor::ExpandShapeOp::getCanonicalizationPatterns(patterns, context);

  tensor::populateFoldTensorEmptyPatterns(patterns);

  memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns);

  memref::populateResolveShapedTypeResultDimsPatterns(patterns);

}


static void


populateFoldUnitExtentDimsViaSlicesPatterns(RewritePatternSet &patterns,

                                            ControlDropUnitDims &options) {

  auto *context = patterns.getContext();

  patterns.add<DropUnitDims>(context, options);

  patterns.add<DropPadUnitDims>(context, options);

  // TODO: Patterns unrelated to unit dim folding should be factored out.

  linalg::FillOp::getCanonicalizationPatterns(patterns, context);

  tensor::EmptyOp::getCanonicalizationPatterns(patterns, context);

  tensor::populateFoldTensorEmptyPatterns(patterns);

  memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns);

  memref::populateResolveShapedTypeResultDimsPatterns(patterns);

}


void mlir::linalg::populateFoldUnitExtentDimsPatterns(

    RewritePatternSet &patterns, linalg::ControlDropUnitDims &options) {

  if (options.rankReductionStrategy ==

      linalg::ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {

    populateFoldUnitExtentDimsViaSlicesPatterns(patterns, options);

  } else if (options.rankReductionStrategy ==

             linalg::ControlDropUnitDims::RankReductionStrategy::

                 ReassociativeReshape) {

    populateFoldUnitExtentDimsViaReshapesPatterns(patterns, options);

  }

}


void mlir::linalg::populateMoveInitOperandsToInputPattern(

    RewritePatternSet &patterns) {

  patterns.add<MoveInitOperandsToInput>(patterns.getContext());

}


namespace {

/// Pass that removes unit-extent dims within generic ops.

struct LinalgFoldUnitExtentDimsPass

    : public impl::LinalgFoldUnitExtentDimsPassBase<

          LinalgFoldUnitExtentDimsPass> {

  using impl::LinalgFoldUnitExtentDimsPassBase<

      LinalgFoldUnitExtentDimsPass>::LinalgFoldUnitExtentDimsPassBase;

  void runOnOperation() override {

    Operation *op = getOperation();

    MLIRContext *context = op->getContext();

    RewritePatternSet patterns(context);

    ControlDropUnitDims options;

    if (useRankReducingSlices) {

      options.rankReductionStrategy = linalg::ControlDropUnitDims::

          RankReductionStrategy::ExtractInsertSlice;

    }

    linalg::populateFoldUnitExtentDimsPatterns(patterns, options);

    populateMoveInitOperandsToInputPattern(patterns);

    (void)applyPatternsGreedily(op, std::move(patterns));

  }

};


} // namespace


namespace {


/// Returns reassociation indices for collapsing/expanding a

/// tensor of rank `rank` at position `pos`.

static SmallVector<ReassociationIndices>

getReassociationForReshapeAtDim(int64_t rank, int64_t pos) {

  SmallVector<ReassociationIndices> reassociation(rank - 1, {0, 1});

  bool lastDim = pos == rank - 1;

  if (rank > 2) {

    for (int64_t i = 0; i < rank - 1; i++) {

      if (i == pos || (lastDim && i == pos - 1))

        reassociation[i] = ReassociationIndices{i, i + 1};

      else if (i < pos)

        reassociation[i] = ReassociationIndices{i};

      else

        reassociation[i] = ReassociationIndices{i + 1};

    }

  }

  return reassociation;

}


/// Returns a collapsed `val` where the collapsing occurs at dim `pos`.

/// If `pos < 0`, then don't collapse.

static Value collapseSingletonDimAt(PatternRewriter &rewriter, Value val,

                                    int64_t pos) {

  if (pos < 0)

    return val;

  auto valType = cast<ShapedType>(val.getType());

  SmallVector<int64_t> collapsedShape(valType.getShape());

  collapsedShape.erase(collapsedShape.begin() + pos);

  return collapseValue(

      rewriter, val.getLoc(), val, collapsedShape,

      getReassociationForReshapeAtDim(valType.getRank(), pos),

      ControlDropUnitDims::RankReductionStrategy::ReassociativeReshape);

}


/// Base class for all rank reduction patterns for contraction ops

/// with unit dimensions.  All patterns should convert one named op

/// to another named op.  Intended to reduce only one iteration space dim

/// at a time.

/// Reducing multiple dims will happen with recusive application of

/// pattern rewrites.

template <typename FromOpTy, typename ToOpTy>

struct RankReduceContractionOps : OpRewritePattern<FromOpTy> {

  using OpRewritePattern<FromOpTy>::OpRewritePattern;


  /// Collapse all collapsable operands.

  SmallVector<Value>

  collapseOperands(PatternRewriter &rewriter, ArrayRef<Value> operands,

                   ArrayRef<int64_t> operandCollapseDims) const {

    assert(operandCollapseDims.size() == 3 && operands.size() == 3 &&

           "expected 3 operands and dims");

    return llvm::map_to_vector(

        llvm::zip(operands, operandCollapseDims), [&](auto pair) {

          return collapseSingletonDimAt(rewriter, std::get<0>(pair),

                                        std::get<1>(pair));

        });

  }


  /// Expand result tensor.

  Value expandResult(PatternRewriter &rewriter, Value result,

                     RankedTensorType expandedType, int64_t dim) const {

    return tensor::ExpandShapeOp::create(

        rewriter, result.getLoc(), expandedType, result,

        getReassociationForReshapeAtDim(expandedType.getRank(), dim));

  }


  LogicalResult matchAndRewrite(FromOpTy contractionOp,

                                PatternRewriter &rewriter) const override {

    if (contractionOp.hasUserDefinedMaps()) {

      return rewriter.notifyMatchFailure(

          contractionOp, "ops with user-defined maps are not supported");

    }


    auto loc = contractionOp.getLoc();

    auto inputs = contractionOp.getDpsInputs();

    auto inits = contractionOp.getDpsInits();

    if (inputs.size() != 2 || inits.size() != 1)

      return rewriter.notifyMatchFailure(contractionOp,

                                         "expected 2 inputs and 1 init");

    auto lhs = inputs[0];

    auto rhs = inputs[1];

    auto init = inits[0];

    SmallVector<Value> operands{lhs, rhs, init};


    SmallVector<int64_t> operandUnitDims;

    if (failed(getOperandUnitDims(contractionOp, operandUnitDims)))

      return rewriter.notifyMatchFailure(contractionOp,

                                         "no reducable dims found");


    SmallVector<Value> collapsedOperands =

        collapseOperands(rewriter, operands, operandUnitDims);

    Value collapsedLhs = collapsedOperands[0];

    Value collapsedRhs = collapsedOperands[1];

    Value collapsedInit = collapsedOperands[2];

    SmallVector<Type, 1> collapsedResultTy;

    if (isa<RankedTensorType>(collapsedInit.getType()))

      collapsedResultTy.push_back(collapsedInit.getType());

    auto collapsedOp = ToOpTy::create(rewriter, loc, collapsedResultTy,

                                      ValueRange{collapsedLhs, collapsedRhs},

                                      ValueRange{collapsedInit});

    for (auto attr : contractionOp->getAttrs()) {

      if (attr.getName() == LinalgDialect::kMemoizedIndexingMapsAttrName ||

          attr.getName() == "indexing_maps")

        continue;

      collapsedOp->setAttr(attr.getName(), attr.getValue());

    }


    auto results = contractionOp.getResults();

    assert(results.size() < 2 && "expected at most one result");

    if (results.empty()) {

      rewriter.replaceOp(contractionOp, collapsedOp);

    } else {

      rewriter.replaceOp(

          contractionOp,

          expandResult(rewriter, collapsedOp.getResultTensors()[0],

                       cast<RankedTensorType>(results[0].getType()),

                       operandUnitDims[2]));

    }


    return success();

  }


  /// Populate `operandUnitDims` with 3 indices indicating the unit dim

  /// for each operand that should be collapsed in this pattern.  If an

  /// operand shouldn't be collapsed, the index should be negative.

  virtual LogicalResult

  getOperandUnitDims(LinalgOp op,

                     SmallVectorImpl<int64_t> &operandUnitDims) const = 0;

};


/// Patterns for unbatching batched contraction ops

template <typename FromOpTy, typename ToOpTy>

struct RankReduceToUnBatched : RankReduceContractionOps<FromOpTy, ToOpTy> {

  using RankReduceContractionOps<FromOpTy, ToOpTy>::RankReduceContractionOps;


  /// Look for unit batch dims to collapse.

  LogicalResult

  getOperandUnitDims(LinalgOp op,

                     SmallVectorImpl<int64_t> &operandUnitDims) const override {

    FailureOr<ContractionDimensions> maybeContractionDims =

        inferContractionDims(op);

    if (failed(maybeContractionDims)) {

      LLVM_DEBUG(llvm::dbgs() << "could not infer contraction dims");

      return failure();

    }

    ContractionDimensions contractionDims = maybeContractionDims.value();


    if (contractionDims.batch.size() != 1)

      return failure();

    auto batchDim = contractionDims.batch[0];

    SmallVector<std::pair<Value, unsigned>, 3> bOperands;

    op.mapIterationSpaceDimToAllOperandDims(batchDim, bOperands);

    if (bOperands.size() != 3 || llvm::any_of(bOperands, [](auto pair) {

          return cast<ShapedType>(std::get<0>(pair).getType())

                     .getShape()[std::get<1>(pair)] != 1;

        })) {

      LLVM_DEBUG(llvm::dbgs() << "specified unit dims not found");

      return failure();

    }


    operandUnitDims = SmallVector<int64_t>{std::get<1>(bOperands[0]),

                                           std::get<1>(bOperands[1]),

                                           std::get<1>(bOperands[2])};

    return success();

  }

};


/// Patterns for reducing non-batch dimensions

template <typename FromOpTy, typename ToOpTy>

struct RankReduceMatmul : RankReduceContractionOps<FromOpTy, ToOpTy> {

  using RankReduceContractionOps<FromOpTy, ToOpTy>::RankReduceContractionOps;


  /// Helper for determining whether the lhs/init or rhs/init are reduced.

  static bool constexpr reduceLeft =

      (std::is_same_v<FromOpTy, BatchMatmulOp> &&

       std::is_same_v<ToOpTy, BatchVecmatOp>) ||

      (std::is_same_v<FromOpTy, MatmulOp> &&

       std::is_same_v<ToOpTy, VecmatOp>) ||

      (std::is_same_v<FromOpTy, MatvecOp> && std::is_same_v<ToOpTy, DotOp>);


  /// Look for non-batch spatial dims to collapse.

  LogicalResult

  getOperandUnitDims(LinalgOp op,

                     SmallVectorImpl<int64_t> &operandUnitDims) const override {

    FailureOr<ContractionDimensions> maybeContractionDims =

        inferContractionDims(op);

    if (failed(maybeContractionDims)) {

      LLVM_DEBUG(llvm::dbgs() << "could not infer contraction dims");

      return failure();

    }

    ContractionDimensions contractionDims = maybeContractionDims.value();


    if constexpr (reduceLeft) {

      auto m = contractionDims.m[0];

      SmallVector<std::pair<Value, unsigned>, 2> mOperands;

      op.mapIterationSpaceDimToAllOperandDims(m, mOperands);

      if (mOperands.size() != 2)

        return failure();

      if (llvm::all_of(mOperands, [](auto pair) {

            return cast<ShapedType>(std::get<0>(pair).getType())

                       .getShape()[std::get<1>(pair)] == 1;

          })) {

        operandUnitDims = SmallVector<int64_t>{std::get<1>(mOperands[0]), -1,

                                               std::get<1>(mOperands[1])};

        return success();

      }

    } else {

      auto n = contractionDims.n[0];

      SmallVector<std::pair<Value, unsigned>, 2> nOperands;

      op.mapIterationSpaceDimToAllOperandDims(n, nOperands);

      if (nOperands.size() != 2)

        return failure();

      if (llvm::all_of(nOperands, [](auto pair) {

            return cast<ShapedType>(std::get<0>(pair).getType())

                       .getShape()[std::get<1>(pair)] == 1;

          })) {

        operandUnitDims = SmallVector<int64_t>{-1, std::get<1>(nOperands[0]),

                                               std::get<1>(nOperands[1])};

        return success();

      }

    }

    LLVM_DEBUG(llvm::dbgs() << "specified unit dims not found");

    return failure();

  }

};


} // namespace


void mlir::linalg::populateContractionOpRankReducingPatterns(

    RewritePatternSet &patterns) {

  MLIRContext *context = patterns.getContext();

  // Unbatching patterns for unit batch size

  patterns.add<RankReduceToUnBatched<BatchMatmulOp, MatmulOp>>(context);

  patterns.add<RankReduceToUnBatched<BatchMatvecOp, MatvecOp>>(context);

  patterns.add<RankReduceToUnBatched<BatchVecmatOp, VecmatOp>>(context);


  // Non-batch rank 1 reducing patterns

  patterns.add<RankReduceMatmul<MatmulOp, VecmatOp>>(context);

  patterns.add<RankReduceMatmul<MatmulOp, MatvecOp>>(context);

  // Batch rank 1 reducing patterns

  patterns.add<RankReduceMatmul<BatchMatmulOp, BatchVecmatOp>>(context);

  patterns.add<RankReduceMatmul<BatchMatmulOp, BatchMatvecOp>>(context);


  // Non-batch rank 0 reducing patterns

  patterns.add<RankReduceMatmul<MatvecOp, DotOp>>(context);

  patterns.add<RankReduceMatmul<VecmatOp, DotOp>>(context);

}


success
return success()

AffineOps.h

Passes.h

Utils.h

expandValue
static Value expandValue(RewriterBase &rewriter, Location loc, Value result, Value origDest, ArrayRef< ReassociationIndices > reassociation, ControlDropUnitDims::RankReductionStrategy rankReductionStrategy)
Expand the given value so that the type matches the type of origDest.
Definition DropUnitDims.cpp:251

replaceUnitDimIndexOps
static void replaceUnitDimIndexOps(GenericOp genericOp, const llvm::SmallDenseSet< unsigned > &unitDims, RewriterBase &rewriter)
Implements a pass that canonicalizes the uses of unit-extent dimensions for broadcasting.
Definition DropUnitDims.cpp:227

dropUnitExtentFromOperandMetadata
static UnitExtentReplacementInfo dropUnitExtentFromOperandMetadata(MLIRContext *context, IndexingMapOpInterface op, OpOperand *opOperand, llvm::SmallDenseMap< unsigned, unsigned > &oldDimsToNewDimsMap, ArrayRef< AffineExpr > dimReplacements)
Definition DropUnitDims.cpp:334

populateFoldUnitExtentDimsViaReshapesPatterns
static void populateFoldUnitExtentDimsViaReshapesPatterns(RewritePatternSet &patterns, ControlDropUnitDims &options)
Patterns that are used to canonicalize the use of unit-extent dims for broadcasting.
Definition DropUnitDims.cpp:804

populateFoldUnitExtentDimsViaSlicesPatterns
static void populateFoldUnitExtentDimsViaSlicesPatterns(RewritePatternSet &patterns, ControlDropUnitDims &options)
Definition DropUnitDims.cpp:824

collapseValue
static Value collapseValue(RewriterBase &rewriter, Location loc, Value operand, ArrayRef< int64_t > targetShape, ArrayRef< ReassociationIndices > reassociation, ControlDropUnitDims::RankReductionStrategy rankReductionStrategy)
Collapse the given value so that the type matches the type of origOutput.
Definition DropUnitDims.cpp:278

FoldUtils.h

GreedyPatternRewriteDriver.h

lhs
lhs
Definition AffineExpr.cpp:832

b
b
Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...
Definition LinalgTransformOps.cpp:2096

ValueRange
b ValueRange
Definition LinalgTransformOps.cpp:2102

result
result
Definition LinalgTransformOps.cpp:2097

options
static llvm::ManagedStatic< PassManagerOptions > options
Definition PassManagerOptions.cpp:89

ReshapeOpsUtils.h

rhs
*B rhs
Definition VectorTransforms.cpp:2247

int64_t

llvm::ArrayRef
Definition LLVM.h:48

llvm::SmallVector
Definition LLVM.h:72

mlir::AffineDimExpr
A dimensional identifier appearing in an affine expression.
Definition AffineExpr.h:223

mlir::AffineExpr
Base type for affine expression.
Definition AffineExpr.h:68

mlir::AffineMap
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
Definition AffineMap.h:46

mlir::AffineMap::get
static AffineMap get(MLIRContext *context)
Returns a zero result affine map with no dimensions or symbols: () -> ().
Definition MLIRContext.cpp:1224

mlir::AffineMap::getNumSymbols
unsigned getNumSymbols() const
Definition AffineMap.cpp:394

mlir::AffineMap::getResults
ArrayRef< AffineExpr > getResults() const
Definition AffineMap.cpp:403

mlir::AffineMap::replaceDimsAndSymbols
AffineMap replaceDimsAndSymbols(ArrayRef< AffineExpr > dimReplacements, ArrayRef< AffineExpr > symReplacements, unsigned numResultDims, unsigned numResultSyms) const
This method substitutes any uses of dimensions and symbols (e.g.
Definition AffineMap.cpp:496

mlir::BlockArgument
This class represents an argument of a Block.
Definition Value.h:309

mlir::Block
Block represents an ordered list of Operations.
Definition Block.h:33

mlir::Block::addArgument
BlockArgument addArgument(Type type, Location loc)
Add one value to the argument list.
Definition Block.cpp:153

mlir::Builder::getIndexAttr
IntegerAttr getIndexAttr(int64_t value)
Definition Builders.cpp:108

mlir::Builder::getContext
MLIRContext * getContext() const
Definition Builders.h:56

mlir::IRMapping
This is a utility class for mapping one set of IR entities to another.
Definition IRMapping.h:26

mlir::IRMapping::map
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
Definition IRMapping.h:30

mlir::IRRewriter
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
Definition PatternMatch.h:774

mlir::Location
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
Definition Location.h:76

mlir::MLIRContext
MLIRContext is the top-level object for a collection of MLIR operations.
Definition MLIRContext.h:63

mlir::OpBuilder::InsertionGuard
RAII guard to reset the insertion point of the builder when destroyed.
Definition Builders.h:348

mlir::OpBuilder
This class helps build Operations.
Definition Builders.h:207

mlir::OpBuilder::createBlock
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes={}, ArrayRef< Location > locs={})
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
Definition Builders.cpp:430

mlir::OpBuilder::clone
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
Definition Builders.cpp:562

mlir::OpBuilder::setInsertionPoint
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Definition Builders.h:398

mlir::OpBuilder::createOrFold
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Definition Builders.h:526

mlir::OpBuilder::setInsertionPointAfterValue
void setInsertionPointAfterValue(Value val)
Sets the insertion point to the node after the specified value.
Definition Builders.h:421

mlir::OpOperand
This class represents an operand of an operation.
Definition Value.h:257

mlir::Operation
Operation is the basic unit of execution within MLIR.
Definition Operation.h:88

mlir::Operation::getContext
MLIRContext * getContext()
Return the context this operation is associated with.
Definition Operation.h:216

mlir::PatternBenefit
This class represents the benefit of a pattern match in a unitless scheme that ranges from 0 (very li...
Definition PatternMatch.h:34

mlir::PatternRewriter
A special type of RewriterBase that coordinates the application of a rewrite pattern on the current I...
Definition PatternMatch.h:793

mlir::Region
This class contains a list of basic blocks and a link to the parent operation it is attached to.
Definition Region.h:26

mlir::RewritePatternSet
Definition PatternMatch.h:816

mlir::RewriterBase
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
Definition PatternMatch.h:368

mlir::RewriterBase::replaceOp
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
Definition PatternMatch.cpp:127

mlir::RewriterBase::notifyMatchFailure
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
Definition PatternMatch.h:726

mlir::RewriterBase::replaceOpWithNewOp
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
Definition PatternMatch.h:529

mlir::Type
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
Definition Types.h:74

mlir::Value
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Definition Value.h:96

mlir::Value::getType
Type getType() const
Return the type of this value.
Definition Value.h:105

mlir::Value::getLoc
Location getLoc() const
Return the location of this value.
Definition Value.cpp:24

mlir::arith::ConstantIndexOp
Specialization of arith.constant op that returns an integer of index type.
Definition Arith.h:113

mlir::impl::LinalgFoldUnitExtentDimsPassBase
Definition DropUnitDims.cpp:718

void

Arith.h

Linalg.h

Transforms.h

Transforms.h

Tensor.h

Transforms.h

AffineExpr.h

AffineMap.h

BuiltinTypes.h

mlir::index
Definition IndexToLLVM.h:23

mlir::linalg
Definition LinalgToStandard.h:24

mlir::linalg::populateMoveInitOperandsToInputPattern
void populateMoveInitOperandsToInputPattern(RewritePatternSet &patterns)
A pattern that converts init operands to input operands.
Definition DropUnitDims.cpp:849

mlir::linalg::DroppedUnitDimsBuilder
std::function< IndexingMapOpInterface( Location loc, OpBuilder &, IndexingMapOpInterface, ArrayRef< Value > newOperands, ArrayRef< AffineMap > newIndexingMaps, const llvm::SmallDenseSet< unsigned > &droppedDims)> DroppedUnitDimsBuilder
Definition Transforms.h:544

mlir::linalg::populateContractionOpRankReducingPatterns
void populateContractionOpRankReducingPatterns(RewritePatternSet &patterns)
Adds patterns that reduce the rank of named contraction ops that have unit dimensions in the operand(...
Definition DropUnitDims.cpp:1107

mlir::linalg::getReassociationMapForFoldingUnitDims
std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)
Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...
Definition Utils.cpp:1790

mlir::linalg::populateFoldUnitExtentDimsPatterns
void populateFoldUnitExtentDimsPatterns(RewritePatternSet &patterns, ControlDropUnitDims &options)
Patterns to fold unit-extent dimensions in operands/results of linalg ops on tensors via reassociativ...
Definition DropUnitDims.cpp:837

mlir::linalg::inferContractionDims
FailureOr< ContractionDimensions > inferContractionDims(LinalgOp linalgOp)
Find at least 2 parallel (m and n) and 1 reduction (k) dimension candidates that form a matmul subcom...
Definition LinalgInterfaces.cpp:484

mlir::linalg::dropUnitDims
FailureOr< DropUnitDimsResult > dropUnitDims(RewriterBase &rewriter, IndexingMapOpInterface op, const DroppedUnitDimsBuilder &droppedUnitDimsBuilder, const ControlDropUnitDims &options)
Definition DropUnitDims.cpp:384

mlir::linalg::getPrunedAttributeList
SmallVector< NamedAttribute > getPrunedAttributeList(OpTy op)
Returns an attribute list that excludes pre-defined attributes.
Definition Utils.h:388

mlir::memref::populateResolveRankedShapedTypeResultDimsPatterns
void populateResolveRankedShapedTypeResultDimsPatterns(RewritePatternSet &patterns)
Appends patterns that resolve memref.dim operations with values that are defined by operations that i...
Definition ResolveShapedTypeResultDims.cpp:181

mlir::memref::populateResolveShapedTypeResultDimsPatterns
void populateResolveShapedTypeResultDimsPatterns(RewritePatternSet &patterns)
Appends patterns that resolve memref.dim operations with values that are defined by operations that i...
Definition ResolveShapedTypeResultDims.cpp:188

mlir::remark::failed
detail::InFlightRemark failed(Location loc, RemarkOpts opts)
Report an optimization remark that failed.
Definition Remarks.h:561

mlir::shape
Definition ShapeMappingAnalysis.h:20

mlir::tensor::populateFoldTensorEmptyPatterns
void populateFoldTensorEmptyPatterns(RewritePatternSet &patterns, bool foldSingleUseOnly=false)
Populates patterns with patterns that fold tensor.empty with its consumers.
Definition EmptyOpPatterns.cpp:130

mlir::tensor::getMixedSize
OpFoldResult getMixedSize(OpBuilder &builder, Location loc, Value value, int64_t dim)
Return the dimension of the given tensor value.
Definition TensorOps.cpp:57

mlir::tensor::getMixedSizes
SmallVector< OpFoldResult > getMixedSizes(OpBuilder &builder, Location loc, Value value)
Return the dimensions of the given tensor value.
Definition TensorOps.cpp:66

mlir
Include the generated interface declarations.
Definition AliasAnalysis.h:19

mlir::concatAffineMaps
AffineMap concatAffineMaps(ArrayRef< AffineMap > maps, MLIRContext *context)
Concatenates a list of maps into a single AffineMap, stepping over potentially empty maps.
Definition AffineMap.cpp:829

mlir::getConstantIntValue
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Definition StaticValueUtils.cpp:134

mlir::getType
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
Definition Utils.cpp:304

mlir::applyPatternsGreedily
LogicalResult applyPatternsGreedily(Region &region, const FrozenRewritePatternSet &patterns, GreedyRewriteConfig config=GreedyRewriteConfig(), bool *changed=nullptr)
Rewrite ops in the given region, which must be isolated from above, by repeatedly applying the highes...
Definition GreedyPatternRewriteDriver.cpp:913

mlir::inversePermutation
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
Definition AffineMap.cpp:784

mlir::SetVector
llvm::SetVector< T, Vector, Set, N > SetVector
Definition LLVM.h:131

mlir::patterns
const FrozenRewritePatternSet & patterns
Definition GreedyPatternRewriteDriver.h:283

mlir::getAffineConstantExpr
AffineExpr getAffineConstantExpr(int64_t constant, MLIRContext *context)

mlir::ReassociationIndices
SmallVector< int64_t, 2 > ReassociationIndices
Definition Utils.h:27

mlir::getAffineDimExpr
AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)
These free functions allow clients of the API to not use classes in detail.

UnitExtentReplacementInfo
Compute the modified metadata for an operands of operation whose unit dims are being dropped.
Definition DropUnitDims.cpp:329

UnitExtentReplacementInfo::reassociation
SmallVector< ReassociationIndices > reassociation
Definition DropUnitDims.cpp:331

UnitExtentReplacementInfo::targetShape
SmallVector< int64_t > targetShape
Definition DropUnitDims.cpp:332

UnitExtentReplacementInfo::indexMap
AffineMap indexMap
Definition DropUnitDims.cpp:330

mlir::OpRewritePattern
OpRewritePattern is a wrapper around RewritePattern that allows for matching and rewriting against an...
Definition PatternMatch.h:314

mlir::linalg::ContractionDimensions::batch
SmallVector< unsigned, 2 > batch
Definition LinalgInterfaces.h:45

mlir::linalg::ContractionDimensions::m
SmallVector< unsigned, 2 > m
Definition LinalgInterfaces.h:46

mlir::linalg::ContractionDimensions::n
SmallVector< unsigned, 2 > n
Definition LinalgInterfaces.h:47

mlir::linalg::ControlDropUnitDims
Transformation to drop unit-extent dimensions from linalg.generic operations.
Definition Transforms.h:521

mlir::linalg::ControlDropUnitDims::RankReductionStrategy
RankReductionStrategy
Definition Transforms.h:522

mlir::linalg::ControlDropUnitDims::RankReductionStrategy::ReassociativeReshape
@ ReassociativeReshape
Definition Transforms.h:522

mlir::linalg::ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice
@ ExtractInsertSlice
Definition Transforms.h:522

mlir::linalg::DropUnitDimsResult
Definition Transforms.h:540