mlir.dialects._structured_transform_ops_gen
===========================================

.. py:module:: mlir.dialects._structured_transform_ops_gen


Attributes
----------

.. autoapisummary::

   mlir.dialects._structured_transform_ops_gen._ods_ir


Classes
-------

.. autoapisummary::

   mlir.dialects._structured_transform_ops_gen.ApplyDecomposeTensorPackUnpackPatternsOp
   mlir.dialects._structured_transform_ops_gen.ApplyDecomposeTensorPadPatternsOp
   mlir.dialects._structured_transform_ops_gen.ApplyEraseUnnecessaryInputsPatternsOp
   mlir.dialects._structured_transform_ops_gen.ApplyFoldAddIntoDestPatternsOp
   mlir.dialects._structured_transform_ops_gen.ApplyFoldIntoPackAndUnpackPatternsOp
   mlir.dialects._structured_transform_ops_gen.ApplyFoldPackUnpackIntoEmptyPatternsOp
   mlir.dialects._structured_transform_ops_gen.ApplyFoldUnitExtentDimsViaReshapesPatternsOp
   mlir.dialects._structured_transform_ops_gen.ApplyFoldUnitExtentDimsViaSlicesPatternsOp
   mlir.dialects._structured_transform_ops_gen.ApplyPadVectorizationPatternsOp
   mlir.dialects._structured_transform_ops_gen.ApplyTilingCanonicalizationPatternsOp
   mlir.dialects._structured_transform_ops_gen.BufferizeToAllocationOp
   mlir.dialects._structured_transform_ops_gen.ContinuousTileSizesOp
   mlir.dialects._structured_transform_ops_gen.ConvertConv2DToImg2ColOp
   mlir.dialects._structured_transform_ops_gen.ConvertToLoopsOp
   mlir.dialects._structured_transform_ops_gen.DecomposeInterfaceOp
   mlir.dialects._structured_transform_ops_gen.DecomposeOp
   mlir.dialects._structured_transform_ops_gen.DecomposeWinogradOp
   mlir.dialects._structured_transform_ops_gen.EliminateLinalgOpAnchoredEmptyTensorsOp
   mlir.dialects._structured_transform_ops_gen.FlattenElementwiseLinalgOp
   mlir.dialects._structured_transform_ops_gen.FuseIntoContainingOp
   mlir.dialects._structured_transform_ops_gen.FuseOp
   mlir.dialects._structured_transform_ops_gen.GeneralizeOp
   mlir.dialects._structured_transform_ops_gen.HoistPadBuildPackingLoopNestOp
   mlir.dialects._structured_transform_ops_gen.HoistPadOp
   mlir.dialects._structured_transform_ops_gen.HoistRedundantVectorBroadcastsOp
   mlir.dialects._structured_transform_ops_gen.HoistRedundantVectorTransfersOp
   mlir.dialects._structured_transform_ops_gen.InsertSliceToCopyOp
   mlir.dialects._structured_transform_ops_gen.InterchangeOp
   mlir.dialects._structured_transform_ops_gen.LinalgCopyToMemrefOp
   mlir.dialects._structured_transform_ops_gen.LowerPackOp
   mlir.dialects._structured_transform_ops_gen.LowerUnPackOp
   mlir.dialects._structured_transform_ops_gen.MapCopyToThreadsOp
   mlir.dialects._structured_transform_ops_gen.MatchOp
   mlir.dialects._structured_transform_ops_gen.MultiTileSizesOp
   mlir.dialects._structured_transform_ops_gen.PackGreedilyOp
   mlir.dialects._structured_transform_ops_gen.PackOp
   mlir.dialects._structured_transform_ops_gen.PackTransposeOp
   mlir.dialects._structured_transform_ops_gen.PadOp
   mlir.dialects._structured_transform_ops_gen.PadTilingInterfaceOp
   mlir.dialects._structured_transform_ops_gen.PromoteOp
   mlir.dialects._structured_transform_ops_gen.PromoteTensorOp
   mlir.dialects._structured_transform_ops_gen.ReplaceOp
   mlir.dialects._structured_transform_ops_gen.RewriteInDestinationPassingStyleOp
   mlir.dialects._structured_transform_ops_gen.ScalarizeOp
   mlir.dialects._structured_transform_ops_gen.SpecializeOp
   mlir.dialects._structured_transform_ops_gen.SplitOp
   mlir.dialects._structured_transform_ops_gen.SplitReductionOp
   mlir.dialects._structured_transform_ops_gen.TileReductionUsingForOp
   mlir.dialects._structured_transform_ops_gen.TileReductionUsingForallOp
   mlir.dialects._structured_transform_ops_gen.TileUsingForOp
   mlir.dialects._structured_transform_ops_gen.TileUsingForallOp
   mlir.dialects._structured_transform_ops_gen.TransposeConv2DOp
   mlir.dialects._structured_transform_ops_gen.TransposeMatmulOp
   mlir.dialects._structured_transform_ops_gen.VectorizeChildrenAndApplyPatternsOp
   mlir.dialects._structured_transform_ops_gen.VectorizeOp
   mlir.dialects._structured_transform_ops_gen.WinogradConv2DOp


Functions
---------

.. autoapisummary::

   mlir.dialects._structured_transform_ops_gen.apply_patterns_linalg_decompose_pack_unpack
   mlir.dialects._structured_transform_ops_gen.apply_patterns_linalg_decompose_pad
   mlir.dialects._structured_transform_ops_gen.apply_patterns_linalg_erase_unnecessary_inputs
   mlir.dialects._structured_transform_ops_gen.apply_patterns_linalg_fold_add_into_dest
   mlir.dialects._structured_transform_ops_gen.apply_patterns_tensor_fold_into_pack_and_unpack
   mlir.dialects._structured_transform_ops_gen.apply_patterns_linalg_fold_pack_unpack_into_empty
   mlir.dialects._structured_transform_ops_gen.apply_patterns_linalg_fold_unit_extent_dims_via_reshapes
   mlir.dialects._structured_transform_ops_gen.apply_patterns_linalg_fold_unit_extent_dims_via_slices
   mlir.dialects._structured_transform_ops_gen.apply_patterns_linalg_pad_vectorization
   mlir.dialects._structured_transform_ops_gen.apply_patterns_linalg_tiling_canonicalization
   mlir.dialects._structured_transform_ops_gen.structured_bufferize_to_allocation
   mlir.dialects._structured_transform_ops_gen.structured_continuous_tile_sizes
   mlir.dialects._structured_transform_ops_gen.structured_convert_conv2d_to_img2col
   mlir.dialects._structured_transform_ops_gen.structured_convert_to_loops
   mlir.dialects._structured_transform_ops_gen.structured_decompose_interface
   mlir.dialects._structured_transform_ops_gen.structured_decompose
   mlir.dialects._structured_transform_ops_gen.structured_decompose_winograd_op
   mlir.dialects._structured_transform_ops_gen.structured_eliminate_empty_tensors
   mlir.dialects._structured_transform_ops_gen.structured_flatten_elementwise
   mlir.dialects._structured_transform_ops_gen.structured_fuse_into_containing_op
   mlir.dialects._structured_transform_ops_gen.structured_fuse
   mlir.dialects._structured_transform_ops_gen.structured_generalize
   mlir.dialects._structured_transform_ops_gen.structured_hoist_pad_build_packing_loop_nest
   mlir.dialects._structured_transform_ops_gen.structured_hoist_pad
   mlir.dialects._structured_transform_ops_gen.structured_hoist_redundant_vector_broadcasts
   mlir.dialects._structured_transform_ops_gen.structured_hoist_redundant_vector_transfers
   mlir.dialects._structured_transform_ops_gen.structured_insert_slice_to_copy
   mlir.dialects._structured_transform_ops_gen.structured_interchange
   mlir.dialects._structured_transform_ops_gen.structured_linalg_copy_to_memref
   mlir.dialects._structured_transform_ops_gen.structured_lower_pack
   mlir.dialects._structured_transform_ops_gen.structured_lower_unpack
   mlir.dialects._structured_transform_ops_gen.structured_gpu_map_copy_to_threads
   mlir.dialects._structured_transform_ops_gen.structured_match
   mlir.dialects._structured_transform_ops_gen.structured_multitile_sizes
   mlir.dialects._structured_transform_ops_gen.structured_pack_greedily
   mlir.dialects._structured_transform_ops_gen.structured_pack
   mlir.dialects._structured_transform_ops_gen.structured_pack_transpose
   mlir.dialects._structured_transform_ops_gen.structured_pad
   mlir.dialects._structured_transform_ops_gen.structured_pad_tiling_interface
   mlir.dialects._structured_transform_ops_gen.structured_promote
   mlir.dialects._structured_transform_ops_gen.structured_promote_tensor
   mlir.dialects._structured_transform_ops_gen.structured_replace
   mlir.dialects._structured_transform_ops_gen.structured_rewrite_in_destination_passing_style
   mlir.dialects._structured_transform_ops_gen.structured_scalarize
   mlir.dialects._structured_transform_ops_gen.structured_specialize
   mlir.dialects._structured_transform_ops_gen.structured_split
   mlir.dialects._structured_transform_ops_gen.structured_split_reduction
   mlir.dialects._structured_transform_ops_gen.structured_tile_reduction_using_for
   mlir.dialects._structured_transform_ops_gen.structured_tile_reduction_using_forall
   mlir.dialects._structured_transform_ops_gen.structured_tile_using_for
   mlir.dialects._structured_transform_ops_gen.structured_tile_using_forall
   mlir.dialects._structured_transform_ops_gen.structured_transpose_conv2d
   mlir.dialects._structured_transform_ops_gen.structured_transpose_matmul
   mlir.dialects._structured_transform_ops_gen.structured_vectorize_children_and_apply_patterns
   mlir.dialects._structured_transform_ops_gen.structured_vectorize
   mlir.dialects._structured_transform_ops_gen.structured_winograd_conv2d


Module Contents
---------------

.. py:data:: _ods_ir

.. py:class:: ApplyDecomposeTensorPackUnpackPatternsOp(*, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Collect patterns to decompose linalg.pack and linalg.unpack into e.g.
   tensor::PadOp, linalg::transposeOp Ops. Requires all outer dims to be unit.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.linalg.decompose_pack_unpack'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


.. py:function:: apply_patterns_linalg_decompose_pack_unpack(*, loc=None, ip=None) -> ApplyDecomposeTensorPackUnpackPatternsOp

.. py:class:: ApplyDecomposeTensorPadPatternsOp(*, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Collect patterns to decompose tensor.pad into e.g. tensor::EmptyOp,
   linalg::FillOp and tensor::InsertSliceOp.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.linalg.decompose_pad'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


.. py:function:: apply_patterns_linalg_decompose_pad(*, loc=None, ip=None) -> ApplyDecomposeTensorPadPatternsOp

.. py:class:: ApplyEraseUnnecessaryInputsPatternsOp(*, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Collects patterns that promote inputs to outputs and remove unused inputs of
   ``linalg.generic`` ops.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.linalg.erase_unnecessary_inputs'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


.. py:function:: apply_patterns_linalg_erase_unnecessary_inputs(*, loc=None, ip=None) -> ApplyEraseUnnecessaryInputsPatternsOp

.. py:class:: ApplyFoldAddIntoDestPatternsOp(*, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Collects patterns to replace linalg.add when destination passing suffices
   for achieving the sum.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.linalg.fold_add_into_dest'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


.. py:function:: apply_patterns_linalg_fold_add_into_dest(*, loc=None, ip=None) -> ApplyFoldAddIntoDestPatternsOp

.. py:class:: ApplyFoldIntoPackAndUnpackPatternsOp(*, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Indicates that operations like tensor.pad and tensor.extract_slice should
   be folded into linalg.pack and linalg.unpack operations, respectively.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.tensor.fold_into_pack_and_unpack'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


.. py:function:: apply_patterns_tensor_fold_into_pack_and_unpack(*, loc=None, ip=None) -> ApplyFoldIntoPackAndUnpackPatternsOp

.. py:class:: ApplyFoldPackUnpackIntoEmptyPatternsOp(*, fold_single_use_only=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   // TODO:


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.linalg.fold_pack_unpack_into_empty'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: fold_single_use_only() -> _ods_ir


.. py:function:: apply_patterns_linalg_fold_pack_unpack_into_empty(*, fold_single_use_only=None, loc=None, ip=None) -> ApplyFoldPackUnpackIntoEmptyPatternsOp

.. py:class:: ApplyFoldUnitExtentDimsViaReshapesPatternsOp(*, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Collects patterns to fold unit-extent dimensions in operands/results of
   linalg ops on tensors via reassociative reshape ops.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.linalg.fold_unit_extent_dims_via_reshapes'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


.. py:function:: apply_patterns_linalg_fold_unit_extent_dims_via_reshapes(*, loc=None, ip=None) -> ApplyFoldUnitExtentDimsViaReshapesPatternsOp

.. py:class:: ApplyFoldUnitExtentDimsViaSlicesPatternsOp(*, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Collects patterns to fold unit-extent dimensions in operands/results of
   linalg ops on tensors via rank-reducing slices.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.linalg.fold_unit_extent_dims_via_slices'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


.. py:function:: apply_patterns_linalg_fold_unit_extent_dims_via_slices(*, loc=None, ip=None) -> ApplyFoldUnitExtentDimsViaSlicesPatternsOp

.. py:class:: ApplyPadVectorizationPatternsOp(*, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Apply patterns that vectorize tensor.pad.

   These patterns rewrite tensor.pad Ops using vector.transfer_read and
   vector.transfer_write operations. This is done either by:

   #. Folding tensor.pad with an existing vector.transfer_read /
   vector.transfer_write Op (generated prior to running these patterns).
   #. Rewriting it (when matched together with q tensor.insert_slice
   consumer Op) as a vector.transfer_read + vector.transfer_write pair.

   In both cases, these patterns look at producers and consumers for the
   matched tensor.pad Op to find opportunities for vectorization.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.linalg.pad_vectorization'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


.. py:function:: apply_patterns_linalg_pad_vectorization(*, loc=None, ip=None) -> ApplyPadVectorizationPatternsOp

.. py:class:: ApplyTilingCanonicalizationPatternsOp(*, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Collects canonicalization patterns relevant to apply after tiling patterns.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.apply_patterns.linalg.tiling_canonicalization'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


.. py:function:: apply_patterns_linalg_tiling_canonicalization(*, loc=None, ip=None) -> ApplyTilingCanonicalizationPatternsOp

.. py:class:: BufferizeToAllocationOp(target, *, memory_space=None, memcpy_op=None, alloc_op=None, bufferize_destination_only=None, emit_dealloc=None, results=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   This transform bufferizes the targeted operation and materializes the
   result in a new allocation. It replaces all original uses of the target
   result with the newly allocated buffer, wrapped in a
   ``bufferization.to_tensor`` op. It returns a handle to the newly allocated
   buffer. Furthermore, it returns a handle that is mapped to all newly created
   ops.

   Only bufferizable ops are that bufferize to a memory write or have an
   aliasing OpOperand (and do not themselves bufferize to an allocation) are
   supported. They are bufferized using their BufferizableOpInterface
   implementation. E.g.:

   .. code::

       %0 = tensor.insert %f into %dest[%pos] : tensor<10xf32>

   Is bufferized to:

   .. code::

       %alloc = memref.alloc() : memref<10xf32>
       bufferization.materialize_in_destination %dest in %alloc
       memref.store %f, %alloc[%pos] : memref<10xf32>
       %0 = bufferization.to_tensor %alloc restrict writable : memref<10xf32>

   Selected ops that bufferize to an allocation (or need special handling) are
   also supported:

   * ``tensor.pad`` is lowered to an allocation, followed by a ``linalg.fill`` and
   and a buffer copy (all on memrefs).
   * ``vector.mask`` is bufferized together with its region. The allocation is
   placed in front of the ``vector.mask`` op.

   An optional memory space attribute can be specified for the materialized
   buffer allocation.

   If a memory copy is needed, a "bufferization.materialize_in_destination" is
   used when possible. This is an op with tensor semantics that will bufferize
   to a memory copy later. Which concrete op will be used for the memory copy
   is up to the bufferization framework. Alternatively, a custom memcpy op can
   be specified via ``memcpy_op``. Currently supported are "memref.copy" and
   "linalg.copy". In that case, the source of each memcpy must not have a
   custom memory space. Furthermore, because the future buffer layout unknown
   for a given tensor, a fully dynamic layout is assumed for best
   compatibility. Users should use "bufferization.materialize_in_destination"
   when possible.

   "memref.alloc" is used for new buffer allocations. The buffer is deallocated
   at the end of the block if the "emit_dealloc" attribute is present. If this
   attribute is not present, the allocated memory will be leaked. However,
   running the ``-buffer-deallocation-pipeline`` after all bufferization is done
   will properly insert the corresponding deallocation(s). Custom allocation
   ops can be specified via ``alloc_op``. Currently supported are "memref.alloc"
   and "memref.alloca". In case of a "memref.alloca", the buffer is not
   deallocated.

   If ``bufferize_destination_only`` is set, only the destination operands of the
   op are bufferized to a new memory allocation, but not the op itself.

   Return modes
   ------------

   This operation consumes the ``target`` handle and produces the
   ``allocated_buffer`` and ``new_ops`` handles. It always succeeds.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.bufferize_to_allocation'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: memory_space() -> Optional[_ods_ir]


   .. py:method:: memcpy_op() -> _ods_ir


   .. py:method:: alloc_op() -> _ods_ir


   .. py:method:: bufferize_destination_only() -> bool


   .. py:method:: emit_dealloc() -> bool


   .. py:method:: allocated_buffer() -> _ods_ir


   .. py:method:: new_ops() -> _ods_ir


.. py:function:: structured_bufferize_to_allocation(target, *, memory_space=None, memcpy_op=None, alloc_op=None, bufferize_destination_only=None, emit_dealloc=None, results=None, loc=None, ip=None) -> _ods_ir

.. py:class:: ContinuousTileSizesOp(tile_sizes, chunk_sizes, target, dimension, target_size, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   This transform emits the IR computing the list of (1) exponentially
   diminishing tile sizes that are powers of 2; and (2) the corresponding
   chunk-sizes the target op should be split into along the given dimension.

   For example, for ``target_size`` 9, and ``dimension`` 0 for the following
   linalg op as target

   .. code::

         %0 = linalg.matmul  ins(%arg0, %arg1: tensor<25x34xf32>, tensor<34x25xf32>)
                         outs(%arg2: tensor<25x25xf32>)

   the first result ``tile_sizes`` will be a list of diminishing tile sizes
   9, 4, 2, 1; and the second result will be a list of chunk sizes
   18, 4, 2, 1 that the corresponding dimension should be split into.

   After the target op has been split along the given dimension (for example
   using multiway split), each chunk can be tiled with the corresponding tile
   size in the ``tile_sizes`` list generated as a result of this op.

   Specifying the output type as !transform.param will cause ``tile_sizes``
   and ``chunk_sizes`` to be computed statically and not dynamically.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.continuous_tile_sizes'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: dimension() -> _ods_ir


   .. py:method:: target_size() -> _ods_ir


   .. py:method:: tile_sizes() -> _ods_ir


   .. py:method:: chunk_sizes() -> _ods_ir


.. py:function:: structured_continuous_tile_sizes(tile_sizes, chunk_sizes, target, dimension, target_size, *, loc=None, ip=None) -> _ods_ir

.. py:class:: ConvertConv2DToImg2ColOp(img2col_tensor, transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Convert linalg.conv_2d_xxx into linalg.generic (for img2col packing)
   and linalg.matmul.

   A convolution operation can be written as a matrix-matrix multiplication by
   unfolding the cross-correlation between input and filter and explicitly copy
   overlapped sliding window inputs.

   Consider 2D input X with single channel input and output and 2x2 filter W:

   .. code::

       [x(0, 0)  , x(0, 1)  , ...,   x(0, n)  ]
       [x(1, 0)  , x(1, 1)  , ...,   x(1, n)  ]
       [.        ,  .       ,.   ,      .     ]            [w(0, 0), w(0, 1)]
       [.        ,  .       , .  ,      .     ]    (conv)  [w(1, 0), w(1, 1)]
       [.        ,  .       ,   .,      .     ]
       [x(n-1, 0), x(n-1, 1), ..., x(n-1, n-1)]

   The packed input data (img2col) is a matrix with |rows| = output spatial
   size, |columns| = filter spatial size. To compute the output Y(i, j) we need
   to calculate the dot product between filter window at input X(x, y)) and the
   filter which will look like the following where r.h.s is the img2col matrix
   and l.h.s is the flattned filter:

   .. code::

       [x(0,0), x(0,1), x(1,0), x(1,1)]
       [x(0,1), x(1,1), x(0,2), x(1,2)] (matmul) [w(0,0), w(0,1), w(1,0), w(1,1)]
       [x(0,1), x(1,1), x(0,2), x(1,2)]
       [   .  ,    .  ,    .  ,    .  ]

   In general for 2D case with (N, H, W, C) input and (Kh, Kw, C, D) filter
   and output (N, Ho, Wo, D) the convolution is the following matrix-matrix
   multiplication (Ho x Wo, Kh x Kw x C) * (Kh x Kw x C, D) for each input in
   the N input. For the case where N > 1 its a batched matrxi-matrix
   multplication.

   Returns two handles:

   * One on the operation that produces the img2col tensor.
   * One on the final operation of the sequence that replaces the original
   convolution.

   Return modes:
   -------------

   Returns a definite failure if target is not isolated from above.
   Returns a silenceable failure if the pattern application failed.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.convert_conv2d_to_img2col'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: img2col_tensor() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_convert_conv2d_to_img2col(img2col_tensor, transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: ConvertToLoopsOp(result, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   For operations that implement the ``TilingInterface``, and implement
   the ``generateScalarImplementation`` method, lowers the operation to
   loops. The return handle points to all generated loops.
   Fails if the payload ops cannot be lowered to loops.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.convert_to_loops'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: result() -> _ods_ir

      
      Shortcut to get an op result if it has only one (throws an error otherwise).


.. py:function:: structured_convert_to_loops(result, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: DecomposeInterfaceOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   TODO


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.decompose_interface'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_decompose_interface(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: DecomposeOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Decomposes named complex operations, such as higher-dimensional
   (depthwise) convolutions, into combinations of lower-dimensional equivalents
   when possible.

   Return modes
   ------------

   This operation ignores non-Linalg ops and drops them in the return.
   If all the operations referred to by the ``target`` handle decompose
   properly, the transform succeeds. Otherwise the transform produces a
   silenceable failure. The return handle points to only the subset of
   successfully produced computational operations, which can be empty.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.decompose'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_decompose(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: DecomposeWinogradOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Decompose winograd operations. It will convert filter, input and output
   transform operations into a combination of scf, tensor, and linalg
   equivalent operations. Before applying this transform operations, users
   need to tile winograd transform operations into supported sizes.

   Return modes:
   -------------

   This operation fails if ``target`` is unsupported. Otherwise, the operation
   succeeds and returns a handle of the sequence that replaces the original
   operations.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.decompose_winograd_op'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_decompose_winograd_op(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: EliminateLinalgOpAnchoredEmptyTensorsOp(target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Try to eliminate all ``tensor.empty`` op uses that are anchored on a LinalgOp
   within the targeted op.

   This op is similar to ``bufferization.eliminate_empty_tensors``, but specific
   to LinalgOps.

   ``tensor.empty`` ops cannot be bufferized. They can either be converted to
   ``bufferization.alloc_tensor`` or replaced with another tensor (via this
   transform). ``tensor.empty`` does not specify the contents of the returned
   tensor so their results can be replaced with arbitrary tensor values as long
   as the dimensions match.

   This transform looks for ``tensor.empty`` ops where the SSA use-def chain of
   the result ends in a supported LinalgOp (always following the aliasing
   OpOperand/OpResult chain). The following LinalgOps are supported:

   * Only parallel iterator types.
   * The use-def chain ends in an input operand of the LinalgOp.
   * The LinalgOp has an unused output operand with the same shape and
   indexing map.

   Example:

   .. code::

       %0 = tensor.empty()
       %1 = linalg.matmul ins(...) outs(%0)
       %2 = linalg.generic ins(%1) outs(%dest) {
         ^bb0(%in: f32, %out: f32):
         // out not used
       }

   Is rewritten with:

   .. code::

       %0 = tensor.empty()
       %1 = linalg.matmul ins(...) outs(%dest)
       %2 = linalg.generic ins(%0) outs(%1) {
         ^bb0(%in: f32, %out: f32):
         // Use %out instead of %in
       }

   After this transformation, the "ins" operand has no uses inside the body of
   the LinalgOp and can be folded away with existing cleanup patterns.
   Afterwards, the tensor::EmptyOp can also fold away, so that the example can
   bufferize without an allocation (in the absence of other conflicts).

   Return modes
   ------------

   This transform reads the target handle and modifies the payload. It does
   not produce any handle.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.eliminate_empty_tensors'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


.. py:function:: structured_eliminate_empty_tensors(target, *, loc=None, ip=None) -> EliminateLinalgOpAnchoredEmptyTensorsOp

.. py:class:: FlattenElementwiseLinalgOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Flattens the iteration space and (applicable) operands of elementwise
   linalg ops to a single dimension.

   Returns one handle:

   * Flattened linalg operation.

   Return modes:
   -------------

   Returns a definite failure if target is not isolated from above.
   Returns a silenceable failure if the pattern application failed.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.flatten_elementwise'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_flatten_elementwise(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: FuseIntoContainingOp(fused_op, new_containing_op, producer_op, containing_op, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Fuses the ``producer_op`` into the ``containing_op``.
   Returns a handle to the fused ops and the ``new_containing_op``.

   The producer is typically a slice of a tileable op (i.e., implements
   TilingInterface). In that case, this transform computes the accessed
   producer slice inside of the containing op ("tile and fuse") and if required,
   creates a new containing op with outputs from the fused producer. Otherwise,
   the entire producer is cloned inside the containing op ("clone and fuse").

   The containing op handle must be associated with exactly one payload op. The
   producer op handle may be associated with multiple payload ops. This
   transform fuses producers one-by-one, always picking an unspecified producer
   that has at least one use inside the containing op among the
   producers. A producer can be listed multiple times in the handle.

   Note: If a producer has multiple uses inside the containing op, it is
   currently tiled and/or cloned multiple times into the containing op.
   TODO: Reuse already fused OpResults instead of tiling/cloning a second time
   when possible. Fuse producers according to a topological sorting to achieve
   the largest amount of reuse.

   Return modes
   ------------

   If at least one producer could not be fused, this operation produces a
   silenceable failure.  This is the case when tiling fails or when no
   producer op could be found among the remaining producers that has at least
   one use within the containing op. I.e., "producers" that are not consumed
   within the containing op are rejected by this operation.

   This operation consumes the producer handle.
   This operation only reads the containing op handle.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.fuse_into_containing_op'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: producer_op() -> _ods_ir


   .. py:method:: containing_op() -> _ods_ir


   .. py:method:: fused_op() -> _ods_ir


   .. py:method:: new_containing_op() -> _ods_ir


.. py:function:: structured_fuse_into_containing_op(fused_op, new_containing_op, producer_op, containing_op, *, loc=None, ip=None) -> _ods_ir

.. py:class:: FuseOp(transformed, loops, target, tile_sizes, tile_interchange, *, static_tile_sizes=None, static_tile_interchange=None, apply_cleanup=None, use_forall=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Tiles the operations pointed to by the target handle and fuses their
   producers greedily using the options provided as attributes. Tile sizes
   and loop interchange permutation can be provided as either static
   attributes or dynamic values (transform parameters or payload handles).

   If ``apply_cleanup`` is true then slice canonicalization is applied between
   fusion steps. If ``use_forall`` is true then tiling method generates a
   ``scf.forall`` loop instead of ``scf.for`` loops.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.fuse'


   .. py:attribute:: _ODS_OPERAND_SEGMENTS


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: tile_sizes() -> _ods_ir


   .. py:method:: tile_interchange() -> _ods_ir


   .. py:method:: static_tile_sizes() -> Optional[_ods_ir]


   .. py:method:: static_tile_interchange() -> Optional[_ods_ir]


   .. py:method:: apply_cleanup() -> bool


   .. py:method:: use_forall() -> bool


   .. py:method:: transformed() -> _ods_ir


   .. py:method:: loops() -> _ods_ir


.. py:function:: structured_fuse(transformed, loops, target, tile_sizes, tile_interchange, *, static_tile_sizes=None, static_tile_interchange=None, apply_cleanup=None, use_forall=None, loc=None, ip=None) -> Union[_ods_ir, _ods_ir, FuseOp]

.. py:class:: GeneralizeOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Transforms a named structured operation into the generic form with the
   explicit attached region.

   Return modes
   ------------

   This operation ignores non-Linalg ops and drops them in the return.
   If all the operations referred to by the ``target`` handle generalize
   properly, the transform succeeds. Otherwise the transform produces a
   silenceable failure.  The return handle points to only the subset of
   successfully produced equivalent generic operations, which can be empty or
   contain the original ops if they were already in generic form.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.generalize'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_generalize(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: HoistPadBuildPackingLoopNestOp(packing_loop, target, loop, *, transpose=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Helper transform used to hoist a tensor.pad target operation. This operation
   creates the packing loop nest required by the hoist_pad operation and makes
   that functionality available independently.

   TODO: In the future, we should consider rewriting as a linalg.pack after
   hoisting since this abstraction is now available.

   Return modes
   ------------

   This operation ignores non-tensor.pad ops and drops them in the result.
   If any non-tensor.pad is passed, the transform emits a silenceable failure.

   The return handle points to only the subset of successfully created packing
   loop nests, which can be empty.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.hoist_pad.build_packing_loop_nest'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: loop() -> _ods_ir


   .. py:method:: transpose() -> _ods_ir


   .. py:method:: packing_loop() -> _ods_ir


.. py:function:: structured_hoist_pad_build_packing_loop_nest(packing_loop, target, loop, *, transpose=None, loc=None, ip=None) -> _ods_ir

.. py:class:: HoistPadOp(transformed, target, num_loops, *, transpose=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Hoist the tensor.pad target operation by at most the given number of loops.
   Optionally apply the transpose attribute to the inner dimensions.

   TODO: In the future, we should consider rewriting as a linalg.pack after
   hoisting since this abstraction is now available.
   TODO: Maybe also return the linalg.generic transpose created at some point.

   Return modes
   ------------

   This operation ignores non-tensor.pad ops and drops them in the result.
   If any non-tensor.pad is passed, the transform emits a silenceable failure.

   If all the operations referred to by the ``target`` handle padproperly, the
   transform succeeds. Otherwise the transform produces a silenceable failure.

   The return handle points to only the subset of successfully hoisted
   tensor.pad operations, which can be empty.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.hoist_pad'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: num_loops() -> _ods_ir


   .. py:method:: transpose() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_hoist_pad(transformed, target, num_loops, *, transpose=None, loc=None, ip=None) -> _ods_ir

.. py:class:: HoistRedundantVectorBroadcastsOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Hoist vector.extract / vector.broadcasts pairs out of immediately
   enclosing scf::ForOp iteratively.

   Return modes:
   -------------

   The operation always succeeds and returns a handle to the transformed
   function op.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.hoist_redundant_vector_broadcasts'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_hoist_redundant_vector_broadcasts(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: HoistRedundantVectorTransfersOp(transformed, target, *, verify_non_zero_trip=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Hoist vector.transfer_read / vector.transfer_write pairs out of immediately
   enclosing scf::ForOp iteratively, if the following conditions are true:

   #. The 2 ops access the same memref with the same indices.
   #. All operands are invariant under the enclosing scf::ForOp.
   #. No uses of the memref either dominate the transfer_read or are
   dominated by the transfer_write (i.e. no aliasing between the write and
   the read across the loop)

   WARNING: This hoisting does not model parallelism and is generally incorrect
   when used on distributed loops with memref semantics!
   TODO: obsolete and should be retired.

   Return modes:
   -------------

   The operation always succeeds and returns a handle to the transformed
   function op.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.hoist_redundant_vector_transfers'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: verify_non_zero_trip() -> bool


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_hoist_redundant_vector_transfers(transformed, target, *, verify_non_zero_trip=None, loc=None, ip=None) -> _ods_ir

.. py:class:: InsertSliceToCopyOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Targeted rewrite of an tensor.insert_slice to linalg.copy.
   This is useful to materialize copies explicitly before bufferization and
   transform them, avoiding the need to rediscover them after bufferization.

   If the insert_slice source is already a linalg.copy, only return the source
   op (i.e. do not create an additional linalg.copy op).

   Return modes:
   -------------

   The operation always succeeds and returns a handle to the relevant
   linalg.copy op.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.insert_slice_to_copy'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_insert_slice_to_copy(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: InterchangeOp(transformed, target, *, iterator_interchange=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Interchanges the iterators of the operations pointed to by the target handle
   using the iterator interchange attribute.

   Return modes
   ------------

   This operation ignores non-linalg::Generic ops and drops them in the return.
   This operation fails if the interchange attribute is invalid.
   If all the operations referred to by the ``target`` handle interchange
   properly, the transform succeeds.
   If any interchange fails, the transform produces a definite failure.
   The return handle points to only the subset of successfully produced
   interchanged operations, which can be empty.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.interchange'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: iterator_interchange() -> Optional[_ods_ir]


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_interchange(transformed, target, *, iterator_interchange=None, loc=None, ip=None) -> _ods_ir

.. py:class:: LinalgCopyToMemrefOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Targeted rewrite of a linalg.copy on memrefs to a memref.copy.
   This is useful when bufferizing copies to a linalg.copy, later applying some
   transformations, and then rewriting the copy into a memref.copy.
   If the element types of the source and destination differ, or if the source
   is a scalar, the transform produces a silenceable failure.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.linalg_copy_to_memref'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_linalg_copy_to_memref(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: LowerPackOp(pad_op, expand_shape_op, transpose_op, target, *, lowerPadLikeWithInsertSlice=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Rewrite a linalg.pack into tensor.pad + tensor.expand_shape + linalg.transpose.

   Return modes
   ------------

   This operation ignores non-pack ops and drops them in the return. This
   operation produces a silenceable failure if the rewrite fails for any
   reason. If all the operations referred to by the ``target`` are rewritten,
   the transform succeeds. Return handles to the newly produced pad,
   expand_shape and transpose ops.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.lower_pack'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: lowerPadLikeWithInsertSlice() -> _ods_ir


   .. py:method:: pad_op() -> _ods_ir


   .. py:method:: expand_shape_op() -> _ods_ir


   .. py:method:: transpose_op() -> _ods_ir


.. py:function:: structured_lower_pack(pad_op, expand_shape_op, transpose_op, target, *, lower_pad_like_with_insert_slice=None, loc=None, ip=None) -> _ods_ir

.. py:class:: LowerUnPackOp(empty_op, transpose_op, collapse_shape_op, extract_slice_op, target, *, lowerUnpadLikeWithExtractSlice=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Lower a linalg.unpack into empty + linalg.transpose + tensor.collapse_shape +
   tensor.extract_slice.

   Return modes
   ------------

   This operation ignores non-unpack ops and drops them in the return. This
   operation produces a silenceable failure if the rewrite fails for any
   reason. If all the operations referred to by the ``target`` are rewritten,
   the transform succeeds. Return handles to the newly produced empty,
   transpose, collapse_shape and extract_slice ops.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.lower_unpack'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: lowerUnpadLikeWithExtractSlice() -> _ods_ir


   .. py:method:: empty_op() -> _ods_ir


   .. py:method:: transpose_op() -> _ods_ir


   .. py:method:: collapse_shape_op() -> _ods_ir


   .. py:method:: extract_slice_op() -> _ods_ir


.. py:function:: structured_lower_unpack(empty_op, transpose_op, collapse_shape_op, extract_slice_op, target, *, lower_unpad_like_with_extract_slice=None, loc=None, ip=None) -> _ods_ir

.. py:class:: MapCopyToThreadsOp(forall_op, tiled_op, target, total_num_threads, desired_bit_alignment, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Targeted mapping of a linalg.copy / tensor.pad operation on tensors to a GPU
   thread mapping.

   This operation implements a greedy heuristic that determines a good
   distribution of threads to break down the copy/pad operation into.
   The heuristic is driven by considerations related to the underlying
   architecture for which good high-level decisions are needed assuming certain
   hardware features. Relevant features are exposed via first-class attributes
   to control the behavior of the transformation at a high level.

   For now, a single heuristic is implemented and can be extended on a per-need
   basis.

   Return modes
   ------------

   This operation fails definitely if there is an unsupported op (i.e., not
   linalg.copy / tensor.pad) among the targeted op. Otherwise, the operation
   always succeeds and returns a handle to the relevant tiled linalg.copy /
   tensor.pad op and the enclosing scf.forall op.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.gpu.map_copy_to_threads'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: total_num_threads() -> _ods_ir


   .. py:method:: desired_bit_alignment() -> _ods_ir


   .. py:method:: forall_op() -> _ods_ir


   .. py:method:: tiled_op() -> _ods_ir


.. py:function:: structured_gpu_map_copy_to_threads(forall_op, tiled_op, target, total_num_threads, desired_bit_alignment, *, loc=None, ip=None) -> _ods_ir

.. py:class:: MatchOp(results_, target, *, ops=None, interface=None, op_attrs=None, filter_result_type=None, filter_operand_types=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Match op with the specified constraints, within the target op.

   The following constraints are supported:

   * interface: an optional MatchInterfaceEnum specifying an enum
   representation for an interface to target.
   * ops: an optional StrArrayAttr specifying the concrete name of an op.
   Multiple names can be specified. Matched ops must have one of specified
   names.
   * attribute: the matched op must have all specified attributes (with their
   specified values).
   * filter_result_type: the matched op must return exactly this one type.
   * filter_operand_types: all the operands of the matched op must must be of
   this type. If more than a type is specified, then the length of the list
   must be equal to the number of operands in the matched op, and the match
   will succeed only if the operand types match all the types in the list
   in the order in which they are specified.

   Note: Only ops that satisfy all specified constraints are matched.

   TODO: Extend with regions to allow a limited form of constraints.

   Return modes
   ------------

   This op traverses the ops nested under ``target`` and returns the handles to
   all the operations that match the requirements.

   This op fails if the target is not a handle to exactly one operation.
   Otherwise it succeeds.

   This operation does not consume the target handle and produces new handles:
   it is a navigation op.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.match'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: ops() -> Optional[_ods_ir]


   .. py:method:: interface() -> Optional[_ods_ir]


   .. py:method:: op_attrs() -> Optional[_ods_ir]


   .. py:method:: filter_result_type() -> Optional[_ods_ir]


   .. py:method:: filter_operand_types() -> Optional[_ods_ir]


   .. py:method:: results_() -> _ods_ir


.. py:function:: structured_match(results_, target, *, ops=None, interface=None, op_attrs=None, filter_result_type=None, filter_operand_types=None, loc=None, ip=None) -> _ods_ir

.. py:class:: MultiTileSizesOp(low_size, high_size, split_point, target, dimension, target_size, *, divisor=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Emits the IR computing the tile sizes ``s1`` and ``s2`` such that:

   * there exists a combination of ``n`` tiles of size ``s1`` and ``m`` tiles of
   size ``s2`` that covers the entirety of the iteration space ``dimension`` of
   the target structured op;
   * ``s1``, ``s2`` is less than or equal to ``target_size``;
   * ``s1`` and ``s2`` are divisible by `divisor.

   For example, for a dimension of size 54 with target size 12 and divisor 2,
   this can emit the IR computing the tile size 10, used for 3 tiles, and 12,
   used for 2 tiles, totally 10*3 + 12*2 = 54. Note that when the divisor does
   not divide the original dimension size, it is impossible to compute such
   tile sizes. An assertion is emitted to guard against this in the dynamic
   case.

   Expects the target size and the divisor to be strictly positive. Folds the
   IR as much as possible, normally obtaining constant sizes and numbers of
   tiles for a statically known dimension.

   This does *not* consume the target handle and produces three handles each
   pointing to single-result index-typed operations (which may be arithmetic
   constant operations) defining the two respective tile sizes and the product
   of the first tile size with the number of tiles of that size (useful for
   splitting the iteration space).

   This operation composes with the regular tiling when applied per-dimension:

   .. code:: mlir

       %sz1, %sz2, %split = structured.multitile_sizes %target
                            { target_size = 10, dimension = 1 }
                          : !transform.any_op, !transform.param<i64>,
                            !transform.param<i64>, !transform.param<i64>
       %handles = structured.split %target after %split { dimension = 1 }
                   : !transform.any_op, !transform.param<i64>
       %low, %high = transform.split_handle %handles : (!transform.any_op)
                         -> (!transform.any_op, !transform.any_op)
       %tiled_low, %loop1 = structured.tile_using_for %low [0, %sz1]
                          : (!transform.any_op, !transform.param<i64>)
                         -> (!transform.any_op, !transform.any_op)
       %tiled_high, %loop2 = structured.tile_using_for %high [0, %sz2]
                           : (!transform.any_op, !transform.param<i64>)
                          -> (!transform.any_op, !transform.any_op)
       %common = merge_handles %tiled_low, %tiled_high : !transform.any_op
       
       %sz3, %sz4, %split = structured.multitile_size %target
                            { target_size = 42, dimension = 0 }
                          : !transform.any_op, !transform.any_op,
                            !transform.any_op, !transform.any_op
       %sz3r, %sz4r, %splitr = replicate num(%common) %sz3, %sz4, %splitr
                : !transform.any_op, !transform.any_op, !transform.any_op
       structured.split %common after %splitr { dimension = 0 }
                : !transform.any_op, !transform.any_op
       // ...


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.multitile_sizes'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: dimension() -> _ods_ir


   .. py:method:: target_size() -> _ods_ir


   .. py:method:: divisor() -> _ods_ir


   .. py:method:: low_size() -> _ods_ir


   .. py:method:: high_size() -> _ods_ir


   .. py:method:: split_point() -> _ods_ir


.. py:function:: structured_multitile_sizes(low_size, high_size, split_point, target, dimension, target_size, *, divisor=None, loc=None, ip=None) -> _ods_ir

.. py:class:: PackGreedilyOp(packed_op, target, matmul_packed_sizes, *, static_matmul_packed_sizes=None, matmul_padded_sizes_next_multiple_of=None, matmul_inner_dims_order=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Target a Linalg op and rewrite it into packed LinalgOp form by trying to
   infer whether a known suboperation is embedded

   Different packing strategies are applied in order, when one applies
   successfully, the transform returns:

   #. Matmul packing: Try to infer a matmul operation embedded in the target op.
   Specifically, this looks for 2 parallel dimensions that participate in
   an outer-product and 1 reduction dimension.
   These dimensions are referred as (m, n, k) to match canonical matmul
   terminology.The packed sizes for (m, n, k) are specified by ``matmul_packed_sizes``
   and the optional ``matmul_padded_sizes_next_multiple_of``.
   When an entry ``matmul_packed_sizes[i]`` is non-0, the corresponding
   dimension is packed by ``matmul_packed_sizes[i]``.
   Otherwise, the dimension is merely padded to the next multiple of
   ``matmul_padded_sizes_next_multiple_of[i]``.``matmul_padded_sizes_next_multiple_of`` is optional and is expected to
   either be empty or of size ``3``, matching the size of ``matmul_packed_sizes``.
   For each individual element of ``matmul_packed_sizes`` and
   ``matmul_padded_sizes_next_multiple_of``, only one of them is allowed to
   be non-zero.The ordering of the packed dimensions (mm, nn, kk) is specified by the
   ``matmul_inner_dims_order`` attribute.

   Packing occurs as follows:

   #. Find the dimensions to pack according to the strategy.
   #. The target is converted to linalg.generic form.
   #. An interchange transform is applied to isolate the dimensions to pack as
   the most minor indexing dimensions of the linalg.generic. The most minor
   dimensions are themselves ordered according to ``inner_dims_order``.
   #. An elementwise traversal of ``matmul_packed_sizes`` and
   ``matmul_padded_sizes_next_multiple_of`` is performed and for each
   dimension ``d``, either pack to ``matmul_packed_sizes[d]`` or pad to the
   ``matmul_padded_sizes_next_multiple_of[d]``.
   #. Packing/padding is performed by the amounts determined in step 4. and
   following ``inner_dims_order``.

   By normalizing the most minor dimensions to ``inner_dims_order``, the transform
   guarantees that packing immediately generates inner dimensions in a desirable
   layout.

   Outer dimension layout permutations are not controlled by this transform op
   at the moment and can be obtained by composing with the pack_transpose
   transformation.

   Return modes
   ------------

   This operation ignores non-Linalg ops and drops them in the return.
   It returns the list of packed Linalg ops or the original op when all available
   packing strategies failed to apply.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.pack_greedily'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: matmul_packed_sizes() -> _ods_ir


   .. py:method:: static_matmul_packed_sizes() -> _ods_ir


   .. py:method:: matmul_padded_sizes_next_multiple_of() -> _ods_ir


   .. py:method:: matmul_inner_dims_order() -> _ods_ir


   .. py:method:: packed_op() -> _ods_ir


.. py:function:: structured_pack_greedily(packed_op, target, matmul_packed_sizes, *, static_matmul_packed_sizes=None, matmul_padded_sizes_next_multiple_of=None, matmul_inner_dims_order=None, loc=None, ip=None) -> _ods_ir

.. py:class:: PackOp(packed_op, target, packed_sizes, *, static_packed_sizes=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Pack a LinalgOp by applying a data tiling transformation on the op and
   packing the operands according to the ``packed_sizes`` specification.

   Iterator dimensions are tiled in their canonical order in the op spec.
   Operands are packed according to the same canonical order of the op iterator
   dimensions.

   Specifying a packed size of 0 for an iterator removes it from consideration
   for packing.

   ``linalg.pack`` (resp. ``linalg.unpack``) operations are inserted for the operands
   (resp. results) that need to be packed (resp. unpacked) according to the
   ``packed_sizes`` specification.

   Example
   -------

   Consider a ``linalg.matmul`` with indexing maps:

   .. code::

         //              M   N   K       M   K
         // affine_map<(d0, d1, d2) -> (d0, d2)>
         //                              K   N
         // affine_map<(d0, d1, d2) -> (d2, d1)>
         //                              M   N
         // affine_map<(d0, d1, d2) -> (d0, d1)>
         %0 = linalg.matmul  ins(%A, %B: tensor<?x?xf32>, tensor<?x?xf32>)
                            outs(    %C: tensor<?x?xf32>)

   Specifying packed_sizes [2, 3, 4] results in tiling the iterator dimensions
   M, N and K, in this order, in both the op and its operands.

   .. code::

         //              M   N   K   m   n   k       M   K   m   k
         // affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>
         //                                          K   N   n   k
         // affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d1, d4, d5)>
         //                                          M   N   m   n
         // affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)>
         %0 = linalg.generic_representing_some_higher_d_matmul
               ins(%A, %B: tensor<?x?x2x4xf32>, tensor<?x?x4x3xf32>)
              outs(    %C: tensor<?x?x2x3xf32>)

   In particular, note that the second operand ``B`` has shape ``KxNxnxk`` (and not
   ``KxNxkxn`` as one could expect by looking **only** at the operand).

   Other layouts can be obtained unsurprisingly from this canonical
   transformation by composing the resulting operation with a
   ``transform.structured.pack_transpose`` op.
   This composition allows separating concerns and composes better compared
   to adding additional permutation attributes to this transform op.

   Return modes
   ------------

   This operation applies to a single Linalg op, otherwise it fails.
   This operation may produce a definite failure if the packing fails for any
   reason.

   The returned handle point to the packed LinalgOp.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.pack'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: packed_sizes() -> _ods_ir


   .. py:method:: static_packed_sizes() -> _ods_ir


   .. py:method:: packed_op() -> _ods_ir


.. py:function:: structured_pack(packed_op, target, packed_sizes, *, static_packed_sizes=None, loc=None, ip=None) -> _ods_ir

.. py:class:: PackTransposeOp(packed_op, pack_op, un_pack_op, target_pack_or_un_pack_op, target_linalg_op, *, outer_perm=None, inner_perm=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Apply a transposition to a single ``linalg.pack`` (resp. ``linalg.unpack``) and
   update the ``linalg.generic`` op that consumes (resp. produces) the operation.

   This transform allows composing a simple ``structured.pack`` with additional
   transpositions to e.g. match the data format required by a specific library
   call or ISA instruction.

   The transpose spec must specify at least one of ``outer_perm`` or ``inner_perm``
   attributes, which will act upon the ``outer_dims_perm`` or ``inner_dims_pos`` of
   the specified ``linalg.pack`` or ``linalg.unpack`` op.

   If the ``target`` of this op is a ``linalg.pack`` then a new ``tensor.empty`` will
   be created along with transposed versions of the ``linalg.pack`` and the
   consuming ``linalg.generic``, which is expected to be the sole consumer.

   If the ``target`` of this op is a ``linalg.unpack`` then the whole pack / compute
   / unpack chain will be transposed and transposed clones of ``linalg.pack``,
   the consuming ``linalg.generic`` and the tail ``linalg.pack`` will be created.

   Return modes
   ------------

   This operation targets a single ``linalg.pack`` / ``linalg.unpack`` op and a
   single matching ``linalg.generic`` that consumes / produces the op. Otherwise,
   it produces a silenceableFailure.

   This operation may produce a silenceableFailure if the transpose spec is
   ill-formed (i.e. ``outer_perm`` or ``inner_perm`` are not permutations of the
   proper rank) or if the transposition of all involved operations fails for any
   reason.

   This operation returns 3 handles, one to the transformed LinalgOp, one to
   the transformed ``linalg.pack`` and one to the transformed ``linalg.unpack``.
   The last handle for ``linalg.unpack`` is empty if ``target_pack_or_unpack_op``
   was not itself a ``linalg.unpack``.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.pack_transpose'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target_pack_or_un_pack_op() -> _ods_ir


   .. py:method:: target_linalg_op() -> _ods_ir


   .. py:method:: outer_perm() -> Optional[_ods_ir]


   .. py:method:: inner_perm() -> Optional[_ods_ir]


   .. py:method:: packed_op() -> _ods_ir


   .. py:method:: pack_op() -> _ods_ir


   .. py:method:: un_pack_op() -> _ods_ir


.. py:function:: structured_pack_transpose(packed_op, pack_op, un_pack_op, target_pack_or_un_pack_op, target_linalg_op, *, outer_perm=None, inner_perm=None, loc=None, ip=None) -> _ods_ir

.. py:class:: PadOp(padded, pad, copy, target, pad_to_multiple_of, *, padding_values=None, padding_dimensions=None, static_pad_to_multiple_of=None, nofold_flags=None, transpose_paddings=None, copy_back_op=None, use_prescribed_tensor_shapes=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Pads the operations pointed to by the target handle using the options
   provides as operation attributes. The operation returns a handle to the
   padded operation and to the padding operation ("tensor.pad").

   To preserve tensor SSA use-def chains, the unpadded result is copied back to
   the original destination tensor of the targeted op. The op that copies back
   the result can be customized with ``copy_back_op``:

   * "bufferization.materialize_in_destination" (default)
   * "linalg.copy"
   * "none" (no copy back)

   Return modes
   ------------

   This operation ignores non-Linalg ops and drops them in the return.
   This operation may produce a definite failure if the padding fails for any
   reason.

   If all the operations referred to by the ``target`` handle pad
   properly, the transform succeeds. Otherwise the transform produces a
   silenceable failure.
   The return handle points to only the subset of successfully produced
   padded operations, which can be empty.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.pad'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: pad_to_multiple_of() -> _ods_ir


   .. py:method:: padding_values() -> _ods_ir


   .. py:method:: padding_dimensions() -> _ods_ir


   .. py:method:: static_pad_to_multiple_of() -> Optional[_ods_ir]


   .. py:method:: nofold_flags() -> _ods_ir


   .. py:method:: transpose_paddings() -> _ods_ir


   .. py:method:: copy_back_op() -> _ods_ir


   .. py:method:: use_prescribed_tensor_shapes() -> bool


   .. py:method:: padded() -> _ods_ir


   .. py:method:: pad() -> _ods_ir


   .. py:method:: copy() -> _ods_ir


.. py:function:: structured_pad(padded, pad, copy, target, pad_to_multiple_of, *, padding_values=None, padding_dimensions=None, static_pad_to_multiple_of=None, nofold_flags=None, transpose_paddings=None, copy_back_op=None, use_prescribed_tensor_shapes=None, loc=None, ip=None) -> _ods_ir

.. py:class:: PadTilingInterfaceOp(padded, pad, target, padding_sizes, *, padding_values=None, static_padding_sizes=None, pad_to_multiple_of=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Pads the **iteration domain** of the operations pointed to by the target
   handle using the options provided as operation attributes. Padding the
   iteration domain induces a padding of the operands that is consistent
   across the op semantics and, unlike for simple elementwise ops, may not be
   trivially deducible or specifiable on operands only (e.g. convolutions).
   Currently, only a limited set of projected permutation maps are supported.

   The specification of ``padding_sizes`` follows that of ``tile_sizes`` during
   tiling: the value "0" on a particular iterator encode "no padding". Like in
   the case of tiling, an automatic completion by 0 to the operation rank
   occurs.

   This transformation returns a handle to the padded operation and to the
   padding operation ("tensor.pad").

   TODO: in the future this should be moved out of a specific Linalg
   implementation file and into a more general "Structured" file.

   Return modes
   ------------

   This operation ignores non-IndexingMapOpInterface ops and drops them in the
   return. In the future, this operation will support all TilingInterfaceOps
   for which the contract between iteration domain and operands can be
   reified.

   This operation may produce a definite failure if the padding fails for any
   reason.

   If all the operations referred to by the ``target`` handle pad properly, the
   transform succeeds. Otherwise the transform produces a silenceable failure.
   The return handle points to only the subset of successfully produced
   padded operations, which can be empty.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.pad_tiling_interface'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: padding_sizes() -> _ods_ir


   .. py:method:: padding_values() -> _ods_ir


   .. py:method:: static_padding_sizes() -> Optional[_ods_ir]


   .. py:method:: pad_to_multiple_of() -> bool


   .. py:method:: padded() -> _ods_ir


   .. py:method:: pad() -> _ods_ir


.. py:function:: structured_pad_tiling_interface(padded, pad, target, padding_sizes, *, padding_values=None, static_padding_sizes=None, pad_to_multiple_of=None, loc=None, ip=None) -> _ods_ir

.. py:class:: PromoteOp(transformed, target, *, operands_to_promote=None, use_full_tile_buffers=None, use_full_tiles_by_default=None, use_original_subview_size=None, use_alloca=None, memory_space=None, mapping=None, alignment=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Promotes the specified operands of the target into a separate memory buffer.

   At this point, this transform does not allow customizing alloc/dealloc
   functions nor the behavior on copy in/out operations.

   Return modes
   ------------

   This operation applies to a single Linalg op that satisfies the
   ``promoteSubviewsPrecondition``, otherwise it fails.

   If the operations referred to by the ``target`` handle promote
   properly, the transform succeeds.

   When successful, the return handle points to the $target operation that
   was modified inplace.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.promote'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: operands_to_promote() -> _ods_ir


   .. py:method:: use_full_tile_buffers() -> _ods_ir


   .. py:method:: use_full_tiles_by_default() -> bool


   .. py:method:: use_original_subview_size() -> bool


   .. py:method:: use_alloca() -> bool


   .. py:method:: memory_space() -> Optional[_ods_ir]


   .. py:method:: mapping() -> Optional[_ods_ir]


   .. py:method:: alignment() -> Optional[_ods_ir]


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_promote(transformed, target, *, operands_to_promote=None, use_full_tile_buffers=None, use_full_tiles_by_default=None, use_original_subview_size=None, use_alloca=None, memory_space=None, mapping=None, alignment=None, loc=None, ip=None) -> _ods_ir

.. py:class:: PromoteTensorOp(tensor, *, memory_space=None, results=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Requests that a tensor value lives in a specific memory space for its
   lifetime. This is achieved by allocating a new tensor in the desired
   memory space with ``bufferization.alloc_tensor`` and optionally materializing
   the source value into that allocation with
   ``bufferization.materialize_in_destination``. All uses of the original value
   are then redirected to the promoted value.

   The generated code for promoting tensor value %0 resembles the following:

   %1 = bufferization.alloc_tensor(<dynamic dims of %0>)
   { memory_space = memory_space }
   // Note: the materialization is omitted if %0 is never read and is only
   // written into (i.e., it behaves as a result tensor).
   %2 = bufferization.materialize_in_destination %0 in %1
   // ...
   <all users of %0 now use %2 instead>

   Deallocation is not handled by this transform.

   Return modes:

   * Produces a silenceable failure if the given handle does not point to
   tensor-typed values.
   * Succeeds otherwise and returns a handle to the promoted value(s), i.e.,
   the result of materialization if present and the allocation otherwise.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.promote_tensor'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: tensor() -> _ods_ir


   .. py:method:: memory_space() -> Optional[_ods_ir]


   .. py:method:: promoted() -> _ods_ir


.. py:function:: structured_promote_tensor(tensor, *, memory_space=None, results=None, loc=None, ip=None) -> _ods_ir

.. py:class:: ReplaceOp(replacement, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Replace all ``target`` payload ops with the single op that is contained in
   this op's region. All targets must have zero arguments and must be isolated
   from above.

   This op is for debugging/experiments only.

   Return modes
   ------------

   This operation consumes the ``target`` handle.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.replace'


   .. py:attribute:: _ODS_REGIONS
      :value: (1, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: replacement() -> _ods_ir


   .. py:method:: bodyRegion() -> _ods_ir


.. py:function:: structured_replace(replacement, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: RewriteInDestinationPassingStyleOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Rewrite a supported tensor operation that is not in destination-passing style
   into a form that is in destination-passing style.
   Currently supported operations are:

   * tensor.pad
   * tensor.generate
   * tensor.from_elements
   This dichotomy hints at a future interface, for now the implementation just
   switches between different implementation.

   Return modes
   ------------

   This operation ignores non-unsupported ops and drops them from the return.
   If all the operations referred to by the ``target`` handle generalize
   properly, the transform succeeds. Otherwise the transform produces a
   silenceable failure.
   The return handle points to a subset of successfully produced operations:

   * ``tensor.pad`` case, the returned handle points to the tensor.insert_slice.
   * ``tensor.generate`` case, the returned handle points to the linalg.generic.
   * ``tensor.from_elements`` case, the returned handle points to the last
   ``tensor.insert``.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.rewrite_in_destination_passing_style'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_rewrite_in_destination_passing_style(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: ScalarizeOp(result, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Indicates that ops of a specific kind in the given function should be
   scalarized (i.e. their dynamic dimensions tiled by 1).

   Return modes:
   -------------

   This operation ignores non-Linalg ops and drops them in the return.
   This operation produces definite failure if the scalarization fails for any
   reason.
   If all the operations referred to by the ``target`` handle scalarize
   properly, the transform succeeds. Otherwise the transform produces a
   silenceable failure.

   The return handle points to only the subset of successfully produced
   tiled-by-1 operations, which can be empty.

   This operation does not return handles to the tiled loop.
   We make this design choice because it is hard to know ahead of time the
   number of loops that will be produced (it depends on the number of dynamic
   dimensions after multiple transformations have been applied).
   Loops can always be recovered by navigating from the tiled operations if
   needed.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.scalarize'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: result() -> _ods_ir

      
      Shortcut to get an op result if it has only one (throws an error otherwise).


.. py:function:: structured_scalarize(result, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: SpecializeOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Transforms a generic operation into the equivalent named form.

   Return modes
   ------------

   This operation ignores non-Linalg ops and drops them in the return. If all
   the operations referred to by the ``target`` handle specialize, the transform
   succeeds; otherwise, the operation produces a silenceable failure.  The return
   handle points to only the subset of successfully produced equivalent named
   operations, which can be empty or contain the original ops if they were already
   in named form. The supported specialization to named Linalg operations are:

   * linalg.copy of any rank.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.specialize'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_specialize(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: SplitOp(split_list, target, dimension, static_chunk_sizes, *, dynamic_chunk_sizes=None, multiway=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Splits the given ``target`` op into two or more complementary
   parts, which combined cover the entire iteration domain of the original op.
   The split is performed along the iteration space dimension provided as
   chunk size attribute specifying the size of the lower part; the remaining
   range in the iteration space is assigned as the upper part. In case of
   dimension overflow, the transformation fails. The split is performed at the
   dimension iterator value specified as either the static chunk size
   attribute when it is known at transform IR construction time or
   as the handle to an operation producing a single index-typed value
   when it is computed by payload IR. In the latter case, the chunk size
   point must be set to ``ShapedType::kDynamic`` and the dynamic size handle
   must point to as many value-producing operations as there are structured
   operations pointed to by the target handle.

   The operation consumes the target handle, but preserves the chunk size
   handle if provided. Without the ``multiway`` attribute, it produces a
   new handle that is a list of the two parts of the structured op after
   splitting, whose lower index part corresponding to the part with lower
   iteration space indices.

   Multiway split mode is enabled by specifying the ``multiway`` attribute.
   In this mode a single ``target`` op is split into multiple parts covering
   the iteration space of the specified dimension. ``static_chunk_sizes`` and
   ``dynamic_chunk_sizes`` in this case is a list of chunk sizes that the given
   dimension should be split into. With ``multiway`` it also produces a handle;
   The result handle is a list of the multiple parts of the structured op
   after splitting, where the target dimensions for each linalg op in the
   list corresponds to the chunk sizes specfied in the input split list.
   If the chunk sizes do not cover the entire iteration space, the leftover
   chunk is the last payload in the result handle.

   As the result handle is most of time a list, an ``transform.split_handle``
   is needed to access individual handle.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.split'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: dynamic_chunk_sizes() -> Optional[_ods_ir]


   .. py:method:: dimension() -> _ods_ir


   .. py:method:: static_chunk_sizes() -> _ods_ir


   .. py:method:: multiway() -> bool


   .. py:method:: split_list() -> _ods_ir


.. py:function:: structured_split(split_list, target, dimension, static_chunk_sizes, *, dynamic_chunk_sizes=None, multiway=None, loc=None, ip=None) -> _ods_ir

.. py:class:: SplitReductionOp(init_or_alloc_op, fill_op, split_linalg_op, combining_linalg_op, target, *, split_factor=None, insert_split_dimension=None, inner_parallel=None, use_scaling_algorithm=None, use_alloc=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Indicates that the given ``target`` op should be transformed with the
   ``splitReduction`` transformation and split factor provided as attribute.

   The ``splitReduction`` transformation splits the first single linalg op
   reduction into a parallel and reduction dimension.
   A new ``linalg.generic`` op is created to perform the rest of the reduction.

   The transformation supports different configurations attributes:

   * split_factor: the factor by which to split (i.e. the size of the
   remaining reduction after splitting).
   * insert_split_dimension: the dimension in the temporary tensor into
   which the new parallel dimension is inserted.
   * inner_parallel: specifies whether the parallel dimension is before or
   after the reduction dimension in the splitting op.
   * use_scaling_algorithm: whether to use a scaling based formulation that
   does not create an ExpandShapeOp (default: do not use scaling)
   * use_alloc: whether to use an alloc op to allocate the temporary
   tensor (default: do not use alloc op)

   Return modes
   ------------

   This operation ignores non-Linalg ops and drops them in the return.
   This operation produces a definite failure if the splitting fails for any
   reason.

   If all the operations referred to by the ``target`` handle split
   properly, the transform succeeds. Otherwise the transform produces a
   silenceable failure.  The 4 returned handles points to only the subset of
   successfully produced computational operations, which can all be empty.
   This 4 returned handles point to:

   * the init op (or tensor_alloc op if use_alloc = true),
   * the fill op used to initialize the neutral element,
   * the split op and
   * the result-combining op.

   Example (default: ``use_scaling_algorithm = false, use_alloc = false``):
   ------------------

   .. code::

         %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
                                               affine_map<(d0) -> ()>],
               iterator_types = ["reduction"]}
         ins(%in : tensor<32xf32>)
         outs(%out : tensor<f32>) {
         ^bb0(%arg1: f32, %arg2: f32):
           %y = arith.addf %arg1, %arg2 : f32
           linalg.yield %y : f32
         } -> tensor<f32>

   is split into:

   .. code::

         %cst = arith.constant 0.000000e+00 : f32
         %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
         %1 = tensor.empty() : tensor<4xf32>
         %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
         %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                               affine_map<(d0, d1) -> (d0)>],
           iterator_types = ["parallel", "reduction"]}
           ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) {
           ^bb0(%arg3: f32, %arg5: f32):
           %5 = arith.addf %arg3, %arg4 : f32
           linalg.yield %5 : f32
         } -> tensor<4xf32>
         %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>,
                                               affine_map<(d0) -> ()>],
           iterator_types = ["reduction"]}
           ins(%3 : tensor<4xf32>) outs(%out : tensor<f32>) {
           ^bb0(%arg3: f32, %arg4: f32):
           %5 = arith.addf %arg3, %arg4 : f32
           linalg.yield %5 : f32
         } -> tensor<f32>

   Example (``use_scaling_algorithm = true, use_alloc = true``):
   ---------

   Instead of introducing an ExpandShapeOp, this scaling-based implementation
   rewrites a reduction dimension ``k`` into ``k * split_factor + kk``.
   The dimension ``kk`` is added as an extra parallel dimension to the
   intermediate output tensor at position ``insert_split_dimension``.

   Consider a minimal example where ``k`` is reduced:
   O(i, j) += I(i, j, k)
   Assume i=3, j=5, k=128, split_factor=16 and insert_split_dimension=0.
   The compute is rewritten as:
   a. O_i(kk, i, j) += I(i, j, 16 * k + kk)
   b. O(i, j) += O_i(kk, i, j)
   The intermediate tensor O_i is of shape (128/16)x3x5 == 8x3x5.

   Example:
   --------

   .. code::

        %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)
          outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>

   Is transformed to:

   .. code::

        #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d2 * 4 + d3)>
        #map1 = affine_map<(d0, d1, d2, d3) -> (d2 * 4 + d3, d1)>
        #map2 = affine_map<(d0, d1, d2, d3) -> (d2, d3)>
        #map3 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
        #map4 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
        #map5 = affine_map<(d0, d1, d2) -> (d0, d1)>
        %0 = tensor.empty() : tensor<16x32x64xf32>
        %cst = arith.constant 0.000000e+00 : f32
        %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) ->
           tensor<16x32x64xf32>
        %2 = tensor.empty() : tensor<64x4xi1>
       
        %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3],
          iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
          ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>)
          outs(%1 : tensor<16x32x64xf32>) {
            ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32):
              %5 = arith.mulf %arg3, %arg4 : f32
              %6 = arith.addf %arg6, %5 : f32
              linalg.yield %6 : f32
        } -> tensor<16x32x64xf32>
       
        %4 = linalg.generic {indexing_maps = [#map4, #map5],
          iterator_types = ["parallel", "parallel", "reduction"]}
          ins(%3 : tensor<16x32x64xf32>)
          outs(%C : tensor<16x32xf32>) {
            ^bb0(%arg3: f32, %arg4: f32):
              %5 = arith.addf %arg3, %arg4 : f32
              linalg.yield %5 : f32
        } -> tensor<16x32xf32>
       
        return %4 : tensor<16x32xf32>


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.split_reduction'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: split_factor() -> _ods_ir


   .. py:method:: insert_split_dimension() -> _ods_ir


   .. py:method:: inner_parallel() -> bool


   .. py:method:: use_scaling_algorithm() -> bool


   .. py:method:: use_alloc() -> bool


   .. py:method:: init_or_alloc_op() -> _ods_ir


   .. py:method:: fill_op() -> _ods_ir


   .. py:method:: split_linalg_op() -> _ods_ir


   .. py:method:: combining_linalg_op() -> _ods_ir


.. py:function:: structured_split_reduction(init_or_alloc_op, fill_op, split_linalg_op, combining_linalg_op, target, *, split_factor=None, insert_split_dimension=None, inner_parallel=None, use_scaling_algorithm=None, use_alloc=None, loc=None, ip=None) -> _ods_ir

.. py:class:: TileReductionUsingForOp(fill_op, split_op, combining_op, for_op, target, *, reduction_dims=None, tile_sizes=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Indicates that the given ``target`` op should be transformed with the
   ``tileReduction`` transformation with the tile size provided as attribute.

   This transformation tiles the ``target`` along the reduction dimensions. It
   creates a tensor initialized with the identity value. Then it creates nested
   loops with a parallel version of ``target`` op inside. The parallel op
   dimensions are less or equal to the tile size passed by user.
   After the loop a merge operation is created to do a final reduction with the
   partial reductions.
   The initial tensor always uses the tile size dimension. This may overallocate
   if the tile size is greater than the reduction dimension.

   Return modes
   ------------

   Returns 4 handles associated with (in order):

   * the fill op used to initialize the neutral element,
   * the parallel tiled op and
   * the result-combining op,
   * the parent ``for`` op.

   The ``reduction_dims`` can be used to specify the subset of reduction dimensions
   of the operation to tile. If left unspecified, all reduction dimensions are
   tiled.

   Example:
   --------

   .. code::

         %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                                 affine_map<(d0, d1) -> (d0)>],
         iterator_types = ["parallel", "reduction"]}
         ins(%arg0 : tensor<?x?xf32>)
         outs(%out : tensor<?xf32>) {
           ^bb0(%arg7: f32, %arg9: f32):
           %1 = arith.addf %arg7, %arg9 : f32
           linalg.yield %1 : f32
         } -> tensor<?xf32>
         return %red : tensor<?xf32>

   is transformed into:

   .. code::

         %0 = tensor.empty(%dim_1) : tensor<?x5xf32>
         %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
         %2 = scf.for %arg2 = %c0 to %dim_0 step %c5 iter_args(%arg3 = %1) -> (tensor<?x5xf32>) {
           %extracted_slice = tensor.extract_slice %1[0, 0] [%dim, 5] [1, 1] : tensor<?x5xf32> to tensor<?x5xf32>
           %extracted_slice_2 = tensor.extract_slice %arg0[0, %arg2] [%dim, 5] [1, 1] : tensor<?x?xf32> to tensor<?x5xf32>
           %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                                 affine_map<(d0, d1) -> (d0, d1)>],
           iterator_types = ["parallel", "parallel"]}
           ins(%extracted_slice_2 : tensor<?x5xf32>)
           outs(%extracted_slice : tensor<?x5xf32>) {
           ^bb0(%in: f32, %out: f32):
             %5 = arith.addf %in, %out : f32
             linalg.yield %5 : f32
           } -> tensor<?x5xf32>
           %dim_3 = tensor.dim %1, %c0 : tensor<?x5xf32>
           %inserted_slice = tensor.insert_slice %4 into %arg3[0, 0] [%dim_3, 5] [1, 1] : tensor<?x5xf32> into tensor<?x5xf32>
           scf.yield %inserted_slice : tensor<?x5xf32>
         }
         %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                               affine_map<(d0, d1) -> (d0)>],
         iterator_types = ["parallel", "reduction"]}
         ins(%2 : tensor<?x5xf32>)
         outs(%arg1 : tensor<?xf32>) {
         ^bb0(%in: f32, %out: f32):
           %4 = arith.addf %in, %out : f32
           linalg.yield %4 : f32
         } -> tensor<?xf32>


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.tile_reduction_using_for'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: reduction_dims() -> _ods_ir


   .. py:method:: tile_sizes() -> _ods_ir


   .. py:method:: fill_op() -> _ods_ir


   .. py:method:: split_op() -> _ods_ir


   .. py:method:: combining_op() -> _ods_ir


   .. py:method:: for_op() -> _ods_ir


.. py:function:: structured_tile_reduction_using_for(fill_op, split_op, combining_op, for_op, target, *, reduction_dims=None, tile_sizes=None, loc=None, ip=None) -> Union[_ods_ir, _ods_ir, TileReductionUsingForOp]

.. py:class:: TileReductionUsingForallOp(fill_op, split_op, combining_op, forall_op, target, *, reduction_dims=None, num_threads=None, tile_sizes=None, mapping=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Tile a PartialReductionOpInterface op to a tiled ``scf.forall`` doing
   partial reduction.

   This transformation tiles the ``target`` along the reduction dimensions. It
   creates a tensor initialized with the identity value. Then it creates a
   ``scf.forall`` loops with the number threads given by ``num_threads``.
   The op is tiled op with a size equal to ``floordiv(size, num_threads)``.
   All the partial reduction value is are parallel inserted to create a new
   tensor. After the loop a merge operation is created to do a final reduction
   with the partial reductions tensor.
   If an extra ``tile_sizes`` parameter is passed the tiles are cyclically
   distributed on the threads of the ``scf.foralls`` loop.

   Return modes
   ------------

   Returns 4 handles associated with (in order):

   * the fill op used to initialize the neutral element,
   * the parallel tiled op and
   * the result-combining op,
   * the parent ``forall`` op.

   Example:
   --------

   .. code::

         %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                                 affine_map<(d0, d1) -> (d0)>],
         iterator_types = ["parallel", "reduction"]}
         ins(%arg0 : tensor<?x?xf32>)
         outs(%out : tensor<?xf32>) {
           ^bb0(%arg7: f32, %arg9: f32):
           %1 = arith.addf %arg7, %arg9 : f32
           linalg.yield %1 : f32
         } -> tensor<?xf32>
         return %red : tensor<?xf32>

   is transformed into:

   .. code::

         %0 = tensor.empty(%dim_1) : tensor<?x5xf32>
         %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x5xf32>) -> tensor<?x5xf32>
         %2 = scf.forall (%arg2) in (%c5) shared_outs(%arg3 = %1) -> (tensor<?x5xf32>) {
           %4 = affine.min #map(%arg2)[%dim_0]
           %5 = affine.max #map1(%4)
           %extracted_slice = tensor.extract_slice %arg3[0, %arg2] [%dim, 1] [1, 1] : tensor<?x5xf32> to tensor<?xf32>
           %6 = affine.apply #map2(%arg2)[%dim_0]
           %extracted_slice_2 = tensor.extract_slice %arg0[0, %6] [%dim, %5] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
           %extracted_slice_3 = tensor.extract_slice %extracted_slice[0] [%dim] [1] : tensor<?xf32> to tensor<?xf32>
           %7 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%extracted_slice_2 : tensor<?x?xf32>) outs(%extracted_slice_3 : tensor<?xf32>) {
           ^bb0(%in: f32, %out: f32):
             %9 = arith.addf %in, %out : f32
             linalg.yield %9 : f32
           } -> tensor<?xf32>
           scf.forall.in_parallel {
             tensor.parallel_insert_slice %7 into %arg3[0, %arg2] [%dim, 1] [1, 1] : tensor<?xf32> into tensor<?x5xf32>
           }
         } {mapping = []}
         %3 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor<?x5xf32>) outs(%arg1 : tensor<?xf32>) {
         ^bb0(%in: f32, %out: f32):
           %4 = arith.addf %in, %out : f32
           linalg.yield %4 : f32
         } -> tensor<?xf32>


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.tile_reduction_using_forall'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: reduction_dims() -> _ods_ir


   .. py:method:: num_threads() -> _ods_ir


   .. py:method:: tile_sizes() -> _ods_ir


   .. py:method:: mapping() -> Optional[_ods_ir]


   .. py:method:: fill_op() -> _ods_ir


   .. py:method:: split_op() -> _ods_ir


   .. py:method:: combining_op() -> _ods_ir


   .. py:method:: forall_op() -> _ods_ir


.. py:function:: structured_tile_reduction_using_forall(fill_op, split_op, combining_op, forall_op, target, *, reduction_dims=None, num_threads=None, tile_sizes=None, mapping=None, loc=None, ip=None) -> Union[_ods_ir, _ods_ir, TileReductionUsingForallOp]

.. py:class:: TileUsingForOp(tiled_linalg_op, loops, target, dynamic_sizes, *, static_sizes=None, interchange=None, scalable_sizes=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Indicates that the given ``target`` op should be tiled with the given sizes.
   This transform generates a loop nest with a smaller ("tiled") target
   operation in its body. Currently limited to LinalgOps.

   Tile sizes may be known at transformation time, in which case they are
   expected to be provided in the ``static_size`` attribute, or not, in which
   case the tile value must be computed by the payload IR and the handle to the
   operation computing it must be provided through ``dynamic_sizes``. When the
   sizes are not known statically, the corresponding entry in the
   ``static_sizes`` attribute must be set to ``ShapedType::kDynamic``. Only
   the dynamic sizes must be provided in ``dynamic_sizes``, i.e., there should
   be as many handles as ``ShapedType::kDynamic`` values in the
   ``static_sizes`` attribute. A static size of ``0`` indicates that the dimension
   should not be tiled. No loop will be generated for such dimensions. If all
   tile sizes are ``0``, this transform is effectively a no-op.

   This op returns handles to the tiled op (in the generated loop nest) and the
   generated loops. The number of loops is the number of tile sizes that are
   statically known to be non-zero.

   Return modes
   ------------

   On success, the resulting handles are associated with co-indexed lists of
   tiled operations and loops around them.

   This operation only supports Linalg ops and produces a silenceable failure
   if the input contains any non-Linalg ops. The ops preceding it in the list
   associated with the ``target`` handle will have been tiled.

   This operation produces a silenceable failure if the ``dynamic_sizes`` handles
   are associated with lists of payload operations of a size different than
   that of the list associated with the ``target`` handle.

   If the internal implementation of tiling for any of the operations fails,
   produces a definite failure.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.tile_using_for'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: dynamic_sizes() -> _ods_ir


   .. py:method:: static_sizes() -> Optional[_ods_ir]


   .. py:method:: interchange() -> Optional[_ods_ir]


   .. py:method:: scalable_sizes() -> Optional[_ods_ir]


   .. py:method:: tiled_linalg_op() -> _ods_ir


   .. py:method:: loops() -> _ods_ir


.. py:function:: structured_tile_using_for(tiled_linalg_op, loops, target, dynamic_sizes, *, static_sizes=None, interchange=None, scalable_sizes=None, loc=None, ip=None) -> Union[_ods_ir, _ods_ir, TileUsingForOp]

.. py:class:: TileUsingForallOp(tiled_op, forall_op, target, num_threads, tile_sizes, *, packed_num_threads=None, packed_tile_sizes=None, static_num_threads=None, static_tile_sizes=None, mapping=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Tile a TilingInterface op to a tiled ``scf.forall``.

   Tiling is applied by either specifying ``num_threads`` or ``tile_size``. If
   ``num_threads`` is specified, then the tile size for each dimension ``i`` is
   calculated dynamically via ``ceilDiv(dimSize[i], num_threads[i])``.
   ``num_threads`` and ``tile_size`` can be either static index attributes or
   operation handles (or a mix thereof). Operation handles must be mapped to
   exactly one op that has exactly one result of index type.

   Static zero tile sizes indicate that the dimension is not tiled and can be
   thought of as tiling by the full size of data.

   It is the user's responsibility to ensure that ``num_threads/tile_sizes`` is
   a valid tiling specification (i.e. that only tiles parallel dimensions,
   e.g. in the Linalg case). If the dimension is not parallelizable, a warning
   is issued to notify the user that the generated code is not safe to
   parallelize.

   If non-empty, the ``mapping`` is added as an attribute to the
   resulting ``scf.forall``.

   Note: ``tile_sizes`` and ``num_threads`` are variadic. Each tile size/number of
   threads can be an index attribute or a transform handle that is mapped to
   exactly one payload op with exactly one index result.

   Return modes
   ------------

   This operation ignores ops that do not implement the TilingInterface and
   drops them in the return.

   If all the operations referred to by the ``target`` handle tile
   successfully, the transform succeeds.
   Otherwise the transform produces a silenceable failure.

   The two returned handles point to only the subset of successfully produced
   tiled operations, which can all be empty.

   These two returned handles point to:

   * the tiled op that implements TilingInterface,
   * the new scf.forall op.

   Example using ``num_threads``
   --------------

   .. code::

       %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
          : (!transform.any_op) -> !transform.any_op
       %3:2 = transform.structured.tile_using_forall %0 num_threads [10, 20]
          : (!transform.any_op) -> (!transform.any_op, !transform.any_op)

   Example using ``tile_sizes``
   --------------

   .. code::

       %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1
          : (!transform.any_op) -> !transform.any_op
       %sz = transform.structured.match ...
       %3:2 = transform.structured.tile_using_forall %0 tile_sizes [0, %sz, 20]
          : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.tile_using_forall'


   .. py:attribute:: _ODS_OPERAND_SEGMENTS


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: num_threads() -> _ods_ir


   .. py:method:: tile_sizes() -> _ods_ir


   .. py:method:: packed_num_threads() -> Optional[_ods_ir]


   .. py:method:: packed_tile_sizes() -> Optional[_ods_ir]


   .. py:method:: static_num_threads() -> Optional[_ods_ir]


   .. py:method:: static_tile_sizes() -> Optional[_ods_ir]


   .. py:method:: mapping() -> Optional[_ods_ir]


   .. py:method:: tiled_op() -> _ods_ir


   .. py:method:: forall_op() -> _ods_ir


.. py:function:: structured_tile_using_forall(tiled_op, forall_op, target, num_threads, tile_sizes, *, packed_num_threads=None, packed_tile_sizes=None, static_num_threads=None, static_tile_sizes=None, mapping=None, loc=None, ip=None) -> _ods_ir

.. py:class:: TransposeConv2DOp(transformed, target, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Convert linalg.conv_2d_nhwc_fhwc into linalg.conv_2d_nhwc_hwcf by introducing
   a linalg.transpose on the filter tensor/memref.

   Whilst the fhwc filter channel ordering can be desirable for certain targets
   and is a more direct mapping to higher level dialects such as TOSA (which only
   supports this ordering) hwcf is better suited for transformations such as
   img2col which can make use of optimized BLAS routines such as GEMM.

   Returns one handle:

   * The final operation of the sequence that replaces the original
   convolution.

   Return modes:
   -------------

   Returns a definite failure if target is not isolated from above.
   Returns a silenceable failure if the pattern application failed.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.transpose_conv2d'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_transpose_conv2d(transformed, target, *, loc=None, ip=None) -> _ods_ir

.. py:class:: TransposeMatmulOp(transformed, target, *, inputToTranspose=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Convert Linalg matmul ops to transposed variants.

   By default the LHS matrix is transposed. Specify ``<rhs>`` to instead
   transpose RHS matrix.

   Return modes:
   -------------

   This operation fails if ``target`` is unsupported, i.e., not a
   ``linalg.matmul`` or ``linalg.batch_matmul``. Otherwise, the operation succeeds
   and returns a handle to the transposed matmul op.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.transpose_matmul'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: inputToTranspose() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_transpose_matmul(transformed, target, *, input_to_transpose=None, loc=None, ip=None) -> _ods_ir

.. py:class:: VectorizeChildrenAndApplyPatternsOp(transformed, target, *, fold_type_extensions_into_contract=None, vectorize_padding=None, vectorize_nd_extract=None, flatten_1d_depthwise_conv=None, disable_multi_reduction_to_contract_patterns=None, disable_transfer_permutation_map_lowering_patterns=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Vectorizes all children contained in the given ``target`` using the
   configuration specified by the attributes of this op. This only vectorizes
   structured ops that operate on shaped types and does not vectorize loops or
   straight-line. Internally, it applies a set of rewrite patterns, some of
   which enable vectorization and some of which clean up the results.
   Therefore, it can only be applied to an op with the "isolated from above"
   property. This transformation only fails if the entire pattern rewriting
   failed, i.e., it does **not** fail when no ops were vectorized.

   Finer granularity can be achieved either with the ``VectorizeOp`` for
   individual ops or by outlining the target part of the payload IR into, e.g.,
   a function, performing this transformation, and inlining it back.

   Note that this transformation invalidates the handles to any payload IR
   operation that is contained inside the vectorization target.

   This transformation supports the following attributes:

   * ``fold_type_extensions_into_contract``: a ``UnitAttr`` to enable the folding of
   type extension operations into ``vector.contract`` to create a mixed precision
   operation.
   * ``vectorize_padding``: a ``UnitAttr`` to activate the vectorization of
   ``tensor.pad`` ops. Different pipelines may prefer to lower such ops to
   loops.
   * ``disable_multi_reduction_to_contract_patterns``: a ``UnitAttr`` to deactivate
   the rewrite of ``vector.multi_reduction`` to ``vector.contract``. This is
   intended to be used in tests only.
   * ``disable_transfer_permutation_map_lowering_patterns``: a ``UnitAttr`` to
   deactivate the rewrite of ``vector.transfer`` with permutation maps into
   explicit ``vector.transpose`` operations. This is intended to be used in
   tests only but may be promoted to a first class attribute in the future.

   Return modes:
   -------------

   This operation produces a definite failure if vectorization fails for any
   reason.
   The operation always returns the handle to the target op that is expected
   to be isolated from above.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.vectorize_children_and_apply_patterns'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: fold_type_extensions_into_contract() -> bool


   .. py:method:: vectorize_padding() -> bool


   .. py:method:: vectorize_nd_extract() -> bool


   .. py:method:: flatten_1d_depthwise_conv() -> bool


   .. py:method:: disable_multi_reduction_to_contract_patterns() -> bool


   .. py:method:: disable_transfer_permutation_map_lowering_patterns() -> bool


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_vectorize_children_and_apply_patterns(transformed, target, *, fold_type_extensions_into_contract=None, vectorize_padding=None, vectorize_nd_extract=None, flatten_1d_depthwise_conv=None, disable_multi_reduction_to_contract_patterns=None, disable_transfer_permutation_map_lowering_patterns=None, loc=None, ip=None) -> _ods_ir

.. py:class:: VectorizeOp(target, vector_sizes, *, static_vector_sizes=None, vectorize_nd_extract=None, assume_dynamic_dims_match_vec_sizes=None, create_named_contraction=None, scalable_sizes=None, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Vectorize the target ops, which must be Linalg ops.

   Use the optional vector sizes to specify exactly what configuration the
   vectorizer should use. It will then use masked vectors of the specified
   size to enforce this configuration ("masked vectorization"). If no vector
   sizes are specified, the vectorizer will infer the shapes to use from the
   target Linalg ops ("regular vectorization"). More specifically:

   .. code:: mlir

       # Masked vectorization - vector sizes are specified explicitly
       transform.structured.vectorize %target vector_sizes [1, 4] : !transform.any_op
       # Regular vectorization - vector sizes are inferred from the target Op
       transform.structured.vectorize %target : !transform.any_op

   The vector sizes can be either static or dynamic (SSA values). In case of
   SSA values, the handle must be mapped to exactly one payload op with
   exactly one index-typed result.

   Note: The input vector sizes must be bigger than or equal to their
   counterpart iteration space sizes.

   Typically this operator should be applied to linalg operations that have
   already been tiled to the appropriate sizes.

   Return modes:
   -------------

   This operation produces a silenceable failure if at least one target op is
   not a Linalg op or fails to vectorize. It produces a definite failure if
   the dynamic vector sizes (SSA values) do not satisfy the constraints
   mentioned above.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.vectorize'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: vector_sizes() -> _ods_ir


   .. py:method:: static_vector_sizes() -> Optional[_ods_ir]


   .. py:method:: vectorize_nd_extract() -> bool


   .. py:method:: assume_dynamic_dims_match_vec_sizes() -> bool


   .. py:method:: create_named_contraction() -> bool


   .. py:method:: scalable_sizes() -> Optional[_ods_ir]


.. py:function:: structured_vectorize(target, vector_sizes, *, static_vector_sizes=None, vectorize_nd_extract=None, assume_dynamic_dims_match_vec_sizes=None, create_named_contraction=None, scalable_sizes=None, loc=None, ip=None) -> VectorizeOp

.. py:class:: WinogradConv2DOp(transformed, target, fmr, *, loc=None, ip=None)

   Bases: :py:obj:`_ods_ir`


   Winograd Conv2D algorithm will convert linalg Conv2D operation into batched
   matrix multiply. Before the matrix multiply, it will convert filter and
   input into a format suitable for batched matrix multiply. After the matrix
   multiply, it will convert output to the final result tensor.

   The algorithm F(m x m, r x r) is

   Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A

   The size of output Y is m x m. The size of filter g is r x r. The size of
   input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
   transformation matrices.

   Return modes:
   -------------

   This operation produces a silenceable failure if ``target`` is unsupported.
   Otherwise, the operation succeeds and returns a handle of the sequence that
   replaces the original convolution.


   .. py:attribute:: OPERATION_NAME
      :value: 'transform.structured.winograd_conv2d'


   .. py:attribute:: _ODS_REGIONS
      :value: (0, True)


   .. py:method:: target() -> _ods_ir


   .. py:method:: fmr() -> _ods_ir


   .. py:method:: transformed() -> _ods_ir


.. py:function:: structured_winograd_conv2d(transformed, target, fmr, *, loc=None, ip=None) -> _ods_ir