![]() |
CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <tensor_op_multiplicand_sm75.h>
Public Types | |
| using | Index = int32_t |
| Index type used for coordinates. More... | |
| using | LongIndex = int64_t |
| Long index type used for offsets. More... | |
| using | TensorCoord = PitchLinearCoord |
| Logical coordinate. More... | |
| using | Stride = Coord< kStrideRank, Index, LongIndex > |
| Stride vector. More... | |
| using | TileShape = PitchLinearShape< 8, 4 > |
| Fundamental tile shape in units of vectors. More... | |
| using | PartitionShape = PitchLinearShape< 8, 4 > |
| Partitionshape is the same as TileShape for this layout. More... | |
| using | PartitionCount = PitchLinearShape< TileShape::kContiguous/PartitionShape::kContiguous, TileShape::kStrided/PartitionShape::kStrided > |
| using | AccessCount = PitchLinearShape< PartitionShape::kContiguous, PartitionShape::kStrided > |
Public Member Functions | |
| CUTLASS_HOST_DEVICE | TensorOpMultiplicandCongruous (Index ldm=0) |
| Ctor. More... | |
| CUTLASS_HOST_DEVICE | TensorOpMultiplicandCongruous (Stride stride) |
| Ctor. More... | |
| CUTLASS_HOST_DEVICE LongIndex | operator() (TensorCoord const &coord) const |
| CUTLASS_HOST_DEVICE Stride | stride () const |
| Returns the stride of the layout. More... | |
| CUTLASS_HOST_DEVICE Stride & | stride () |
| Returns the stride of the layout. More... | |
| CUTLASS_HOST_DEVICE LongIndex | capacity (TensorCoord const &extent) const |
Static Public Member Functions | |
| static CUTLASS_HOST_DEVICE TensorOpMultiplicandCongruous | packed (TensorCoord const &extent) |
| Helper returns a layout to a tightly packed tensor. More... | |
Static Public Attributes | |
| static int const | kRank = 2 |
| Logical rank of tensor. More... | |
| static int const | kStrideRank = 1 |
| Rank of stride vector. More... | |
| static int const | kAccessSize = 128 |
| This layout is optimized for 128b accesses. More... | |
| static int const | kElementSize = 32 |
| static int const | kElementsPerAccess = kAccessSize / kElementSize |
Template based on element size (in bits) - defined in terms of pitch-linear memory and Crosswise size (in elements).
| using cutlass::layout::TensorOpMultiplicandCongruous< 32, Crosswise >::AccessCount = PitchLinearShape<PartitionShape::kContiguous, PartitionShape::kStrided> |
| using cutlass::layout::TensorOpMultiplicandCongruous< 32, Crosswise >::Index = int32_t |
| using cutlass::layout::TensorOpMultiplicandCongruous< 32, Crosswise >::LongIndex = int64_t |
| using cutlass::layout::TensorOpMultiplicandCongruous< 32, Crosswise >::PartitionCount = PitchLinearShape<TileShape::kContiguous / PartitionShape::kContiguous, TileShape::kStrided / PartitionShape::kStrided> |
| using cutlass::layout::TensorOpMultiplicandCongruous< 32, Crosswise >::PartitionShape = PitchLinearShape<8, 4> |
| using cutlass::layout::TensorOpMultiplicandCongruous< 32, Crosswise >::Stride = Coord<kStrideRank, Index, LongIndex> |
| using cutlass::layout::TensorOpMultiplicandCongruous< 32, Crosswise >::TensorCoord = PitchLinearCoord |
| using cutlass::layout::TensorOpMultiplicandCongruous< 32, Crosswise >::TileShape = PitchLinearShape<8, 4> |
|
inline |
|
inline |
|
inline |
Compute the number of contiguous elements needed to store a tensor with the given size
|
inline |
Returns the offset of a coordinate in linear memory. Assumes coordinate has convention (contiguous, strided)
|
inlinestatic |
|
inline |
|
inline |
|
static |
|
static |
|
static |
|
static |
|
static |
1.8.11