|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
#include <default_mma_core_sm70.h>
| Static Public Attributes | |
| static int const | kWarpSize = warp::WarpSize<arch::OpClassTensorOp>::value | 
| Number of threads per warp.  More... | |
| static int const | kThreads = WarpCount::kCount * kWarpSize | 
| Number of threads total.  More... | |
| static int const | kAccessSizeInBits = 128 | 
| Size of a threadblock-scoped access.  More... | |
Partial specialization:
A: column-major B: column-major Operator: tensor op class
This uses the default warp-level operator given tile sizes
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA = ElementA_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB = ElementB_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC = ElementC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::InstructionShape = GemmShape<8, 8, 4> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::IteratorThreadMapA = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape<Shape::kM, Shape::kK>, kThreads, layout::PitchLinearShape<8, 4>, kAccessSizeInBits / sizeof_bits<ElementA>::value > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::IteratorThreadMapB = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape<Shape::kK, Shape::kN>, kThreads, layout::PitchLinearShape<4, 8>, kAccessSizeInBits / sizeof_bits<ElementB>::value > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutA = layout::ColumnMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutB = layout::ColumnMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC = LayoutC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy = MmaPolicy< MmaTensorOp, MatrixShape<0, 0>, MatrixShape<0, 0>, WarpCount::kK > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaTensorOp = cutlass::gemm::warp::MmaVoltaTensorOp< WarpShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, Policy > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator = Operator_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass = arch::OpClassTensorOp | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Policy = cutlass::gemm::warp::MmaTensorOpPolicy< cutlass::arch::Mma< cutlass::gemm::GemmShape<16, 16, 4>, 32, ElementA, LayoutA, ElementB, LayoutB, ElementC, cutlass::layout::RowMajor, cutlass::arch::OpMultiplyAdd >, cutlass::MatrixShape<1, 1> > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape = Shape_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::SmemIteratorA = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kM, Shape::kK>, ElementA, SmemLayoutA, 1, IteratorThreadMapA > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::SmemIteratorB = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kK, Shape::kN>, ElementB, SmemLayoutB, 1, IteratorThreadMapB > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::SmemLayoutA = layout::ColumnMajorVoltaTensorOpMultiplicandCongruous< sizeof_bits<ElementA>::value> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::SmemLayoutB = layout::ColumnMajorVoltaTensorOpMultiplicandCrosswise< sizeof_bits<ElementB>::value, Shape::kK> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpCount = GemmShape< Shape::kM / WarpShape::kM, Shape::kN / WarpShape::kN, Shape::kK / WarpShape::kK > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 8, 8, 4 >, ElementA_, layout::ColumnMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape = WarpShape_ | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
 1.8.11
 1.8.11