![]() |
CUTLASS
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Reference implementation for GEMM in host-side code. More...
#include "cutlass/coord.h"#include "cutlass/numeric_types.h"#include "cutlass/functional.h"#include "cutlass/numeric_conversion.h"#include "cutlass/matrix_traits.h"#include "cutlass/tensor_view.h"#include "cutlass/gemm/gemm.h"#include "cutlass/arch/mma.h"

Go to the source code of this file.
Classes | |
| struct | cutlass::reference::host::Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, InnerProductOp > |
| struct | cutlass::reference::host::Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpMultiplyAdd > |
| Partial specialization for multiply-add. More... | |
| struct | cutlass::reference::host::Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpMultiplyAddSaturate > |
| Partial specialization for multiply-add-saturate. More... | |
| struct | cutlass::reference::host::Gemm< ElementA, LayoutA, ElementB, LayoutB, ElementC, LayoutC, ScalarType, ComputeType, arch::OpXorPopc > |
| Partial specialization for XOR-popc. More... | |
Namespaces | |
| cutlass | |
| cutlass::reference | |
| cutlass::reference::host | |
Functions | |
| template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>> | |
| void | cutlass::reference::host::compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, TensorRef< ElementC, LayoutC > tensor_d, ComputeType initial_accum) |
| template<typename ElementA , typename LayoutA , typename ElementB , typename LayoutB , typename ElementC , typename LayoutC , typename ScalarType , typename ComputeType , typename InnerProductOp = multiply_add<ComputeType>, typename ConvertOp = NumericConverter<ElementC, ScalarType>> | |
| void | cutlass::reference::host::compute_gemm (gemm::GemmCoord problem_size, ScalarType alpha, TensorRef< ElementA, LayoutA > tensor_a, TensorRef< ElementB, LayoutB > tensor_b, ScalarType beta, TensorRef< ElementC, LayoutC > tensor_c, ComputeType initial_accum) |
| template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType > | |
| void | cutlass::reference::host::BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c, AccumulatorType initial_accum) |
| Computes a batch of GEMMs over a set of matrices of common dimension. More... | |
| template<typename TensorRefCollectionA , typename TensorRefCollectionB , typename TensorRefCollectionC , typename ScalarType , typename AccumulatorType > | |
| void | cutlass::reference::host::BatchedGemm (gemm::GemmCoord problem_size, int batch_count, ScalarType alpha, TensorRefCollectionA const &tensor_a, TensorRefCollectionB const &tensor_b, ScalarType beta, TensorRefCollectionC &tensor_c) |
1.8.11