Loading src/Benchmarks/SpMV/CMakeLists.txt +15 −1 Original line number Diff line number Diff line # CSR5 does not work properly yet: # # https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5/issues/9 # https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5/issues/10 # # We can build it with TNL but it crashes with many CUDA errors. We should first check it # with the original build. # #include( cmake/BuildCSR5.cmake ) if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu ) cuda_include_directories( ${CXX_BENCHMARKS_INCLUDE_DIRS} ) message( STATUS ${CXX_BENCHMARKS_FLAGS} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu OPTIONS ${CXX_BENCHMARKS_FLAGS} ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ) else() ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp ) target_compile_options( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_FLAGS} ) target_include_directories( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_INCLUDE_DIRS} ) endif() install( TARGETS tnl-benchmark-spmv RUNTIME DESTINATION bin ) src/Benchmarks/SpMV/ReferenceFormats/CSR5Benchmark.h 0 → 100644 +136 −0 Original line number Diff line number Diff line /*************************************************************************** CSR5Benchmark.h - description ------------------- begin : Apr 23, 2021 copyright : (C) 2021 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ /*** * Wrapper of original CSR5 kernels for TNL benchmarks. */ #include <stdexcept> namespace TNL { ///// // Currently CSR5 for CUDA cannot be build because of conflict of atomicAdd for `double` type: // https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5/issues/9 // The solution is to insert whole benchmark into separate namespace. In this case, however, // CSR5 does not work with `float`. So far, this seems to be the best solution. namespace CSR5Benchmark { #ifdef HAVE_CSR5 #include <CSR5_cuda/anonymouslib_cuda.h> #endif #ifdef HAVE_CSR5 template< typename CsrMatrix, typename Real = typename CsrMatrix::RealType > struct CSR5SpMVCaller { static_assert( std::is_same< typename CsrMatrix::DeviceType, TNL::Devices::Cuda >::value, "Only CUDA device is allowed for CSR matrix for CSR5 benchmark." ); using RealType = typename CsrMatrix::RealType; using DeviceType = TNL::Devices::Cuda; using IndexType = typename CsrMatrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; using VectorView = typename VectorType::ViewType; using CSR5Type = anonymouslibHandle< IndexType, typename std::make_unsigned< IndexType >::type, RealType >; static void spmv( CSR5Type& csr5, VectorView& outVector ) { csr5.spmv( ( RealType ) 1.0, outVector.getData() ); }; }; template< typename CsrMatrix > struct CSR5SpMVCaller< CsrMatrix, float > { static_assert( std::is_same< typename CsrMatrix::DeviceType, TNL::Devices::Cuda >::value, "Only CUDA device is allowed for CSR matrix for CSR5 benchmark." ); using RealType = typename CsrMatrix::RealType; using DeviceType = TNL::Devices::Cuda; using IndexType = typename CsrMatrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; using VectorView = typename VectorType::ViewType; using CSR5Type = anonymouslibHandle< IndexType, typename std::make_unsigned< IndexType >::type, RealType >; static void spmv( CSR5Type& csr5, VectorView& outVector ) { //csr5.spmv( ( RealType ) 1.0, outVector.getData() ); }; }; #endif template< typename CsrMatrix > struct CSR5Benchmark { static_assert( std::is_same< typename CsrMatrix::DeviceType, TNL::Devices::Cuda >::value, "Only CUDA device is allowed for CSR matrix for CSR5 benchmark." ); using RealType = typename CsrMatrix::RealType; using DeviceType = TNL::Devices::Cuda; using IndexType = typename CsrMatrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; using VectorView = typename VectorType::ViewType; #ifdef HAVE_CSR5 using CSR5Type = anonymouslibHandle< IndexType, typename std::make_unsigned< IndexType >::type, RealType >; #endif CSR5Benchmark( CsrMatrix& matrix, VectorType& inVector, VectorType& outVector ) : #ifdef HAVE_CSR5 csr5( matrix.getRows(), matrix.getColumns() ), #endif inVectorView( inVector ), outVectorView( outVector ) { #ifdef HAVE_CSR5 // err = A.inputCSR(nnzA, d_csrRowPtrA, d_csrColIdxA, d_csrValA); //cout << "inputCSR err = " << err << endl; this->csr5.inputCSR( matrix.getValues().getSize(), matrix.getRowPointers().getData(), matrix.getColumnIndexes().getData(), matrix.getValues().getData() ); //err = A.setX(d_x); // you only need to do it once! //cout << "setX err = " << err << endl; this->csr5.setX( inVector.getData() ); this->csr5.setSigma(ANONYMOUSLIB_AUTO_TUNED_SIGMA); // warmup device this->csr5.warmup(); // conversion ... probably this->csr5.asCSR5(); #endif } void vectorProduct() { #ifdef HAVE_CSR5 CSR5SpMVCaller< CsrMatrix >::spmv( this->csr5, outVectorView ); #endif } const VectorView& getCudaOutVector() { return this->outVectorView; } ~CSR5Benchmark() { #ifdef HAVE_CSR5 this->csr5.destroy(); #endif } protected: #ifdef HAVE_CSR5 CSR5Type csr5; #endif VectorView inVectorView, outVectorView; }; } // namespace CSR5Benchmark } // namespace TNL src/Benchmarks/SpMV/cmake/BuildCSR5.cmake 0 → 100644 +28 −0 Original line number Diff line number Diff line # compatibility with the CSR5 package set( CUDA_SAMPLES_DIR $ENV{CUDA_SAMPLES_DIR} ) if( NOT DEFINED CUDA_SAMPLES_DIR ) message( WARNING "CUDA_SAMPLES_DIR variable was not set and it is required by CSR5 benchmark - CSR5 benchmark is disabled.") else() # Download and unpack CSR5 at configure time message( STATUS "CUDA_SAMPLES_DIR set to ${CUDA_SAMPLES_DIR}") configure_file(cmake/CSR5.cmake.in csr5-download/CMakeLists.txt) execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . RESULT_VARIABLE result WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-download ) if(result) message(WARNING "CMake step for CSR5 failed: ${result}") else() execute_process(COMMAND ${CMAKE_COMMAND} --build . RESULT_VARIABLE result WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-download ) if(result) message( ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-download ) message(WARNING "Build step for CSR5 failed: ${result}") else() set( CXX_BENCHMARKS_FLAGS ${CXX_BENCHMARKS_FLAGS} "-DHAVE_CSR5" ) set( CXX_BENCHMARKS_INCLUDE_DIRS ${CXX_BENCHMARKS_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-src ${CUDA_SAMPLES_DIR}/common/inc) message( STATUS "CSR5 build was succesfull.") endif() endif() endif() src/Benchmarks/SpMV/cmake/CSR5.cmake.in 0 → 100644 +24 −0 Original line number Diff line number Diff line # vim: ft=cmake # This is a separate template for CMakeLists.txt to build gtest as a separate project cmake_minimum_required(VERSION 2.8.2) project(csr5-download NONE) include(ExternalProject) ExternalProject_Add(csr5 GIT_REPOSITORY https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5.git #GIT_TAG master # build from a stable branch instead of master (which gets broken pretty often) #GIT_TAG v1.10.x SOURCE_DIR "${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-src" BINARY_DIR "${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-build" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" # Disable update of the external project in an offline build # reference: https://stackoverflow.com/a/40423683 UPDATE_DISCONNECTED ${OFFLINE_BUILD} ) src/Benchmarks/SpMV/spmv.h +25 −3 Original line number Diff line number Diff line Loading @@ -39,9 +39,9 @@ #include <TNL/Algorithms/Segments/BiEllpack.h> // Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation #define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES #define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES #define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS //#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES //#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES //#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS // Uncomment the following line to enable benchmarking the sandbox sparse matrix. //#define WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX Loading @@ -54,6 +54,7 @@ using namespace TNL::Matrices; #include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrix.h> #include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h> #include <Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h> #include <Benchmarks/SpMV/ReferenceFormats/CSR5Benchmark.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -470,7 +471,28 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults ); #ifdef HAVE_CSR5 //// // Perform benchmark on CUDA device with CSR5 as a reference GPU format // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "CSR5" ) } } )); CudaVector cudaOutVector2( cudaOutVector ); CSR5Benchmark::CSR5Benchmark< CSRCudaMatrix > csr5Benchmark( csrCudaMatrix, cudaInVector, cudaOutVector ); auto csr5SpMV = [&]() { csr5Benchmark.vectorProduct(); }; benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cusparseBenchmarkResults ); std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl; csrCudaMatrix.reset(); #endif //// // Perform benchmark on CUDA device with LightSpMV as a reference GPU format Loading Loading
src/Benchmarks/SpMV/CMakeLists.txt +15 −1 Original line number Diff line number Diff line # CSR5 does not work properly yet: # # https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5/issues/9 # https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5/issues/10 # # We can build it with TNL but it crashes with many CUDA errors. We should first check it # with the original build. # #include( cmake/BuildCSR5.cmake ) if( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu ) cuda_include_directories( ${CXX_BENCHMARKS_INCLUDE_DIRS} ) message( STATUS ${CXX_BENCHMARKS_FLAGS} ) CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu OPTIONS ${CXX_BENCHMARKS_FLAGS} ) TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} ) else() ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp ) target_compile_options( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_FLAGS} ) target_include_directories( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_INCLUDE_DIRS} ) endif() install( TARGETS tnl-benchmark-spmv RUNTIME DESTINATION bin )
src/Benchmarks/SpMV/ReferenceFormats/CSR5Benchmark.h 0 → 100644 +136 −0 Original line number Diff line number Diff line /*************************************************************************** CSR5Benchmark.h - description ------------------- begin : Apr 23, 2021 copyright : (C) 2021 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ /* See Copyright Notice in tnl/Copyright */ /*** * Wrapper of original CSR5 kernels for TNL benchmarks. */ #include <stdexcept> namespace TNL { ///// // Currently CSR5 for CUDA cannot be build because of conflict of atomicAdd for `double` type: // https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5/issues/9 // The solution is to insert whole benchmark into separate namespace. In this case, however, // CSR5 does not work with `float`. So far, this seems to be the best solution. namespace CSR5Benchmark { #ifdef HAVE_CSR5 #include <CSR5_cuda/anonymouslib_cuda.h> #endif #ifdef HAVE_CSR5 template< typename CsrMatrix, typename Real = typename CsrMatrix::RealType > struct CSR5SpMVCaller { static_assert( std::is_same< typename CsrMatrix::DeviceType, TNL::Devices::Cuda >::value, "Only CUDA device is allowed for CSR matrix for CSR5 benchmark." ); using RealType = typename CsrMatrix::RealType; using DeviceType = TNL::Devices::Cuda; using IndexType = typename CsrMatrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; using VectorView = typename VectorType::ViewType; using CSR5Type = anonymouslibHandle< IndexType, typename std::make_unsigned< IndexType >::type, RealType >; static void spmv( CSR5Type& csr5, VectorView& outVector ) { csr5.spmv( ( RealType ) 1.0, outVector.getData() ); }; }; template< typename CsrMatrix > struct CSR5SpMVCaller< CsrMatrix, float > { static_assert( std::is_same< typename CsrMatrix::DeviceType, TNL::Devices::Cuda >::value, "Only CUDA device is allowed for CSR matrix for CSR5 benchmark." ); using RealType = typename CsrMatrix::RealType; using DeviceType = TNL::Devices::Cuda; using IndexType = typename CsrMatrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; using VectorView = typename VectorType::ViewType; using CSR5Type = anonymouslibHandle< IndexType, typename std::make_unsigned< IndexType >::type, RealType >; static void spmv( CSR5Type& csr5, VectorView& outVector ) { //csr5.spmv( ( RealType ) 1.0, outVector.getData() ); }; }; #endif template< typename CsrMatrix > struct CSR5Benchmark { static_assert( std::is_same< typename CsrMatrix::DeviceType, TNL::Devices::Cuda >::value, "Only CUDA device is allowed for CSR matrix for CSR5 benchmark." ); using RealType = typename CsrMatrix::RealType; using DeviceType = TNL::Devices::Cuda; using IndexType = typename CsrMatrix::IndexType; using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >; using VectorView = typename VectorType::ViewType; #ifdef HAVE_CSR5 using CSR5Type = anonymouslibHandle< IndexType, typename std::make_unsigned< IndexType >::type, RealType >; #endif CSR5Benchmark( CsrMatrix& matrix, VectorType& inVector, VectorType& outVector ) : #ifdef HAVE_CSR5 csr5( matrix.getRows(), matrix.getColumns() ), #endif inVectorView( inVector ), outVectorView( outVector ) { #ifdef HAVE_CSR5 // err = A.inputCSR(nnzA, d_csrRowPtrA, d_csrColIdxA, d_csrValA); //cout << "inputCSR err = " << err << endl; this->csr5.inputCSR( matrix.getValues().getSize(), matrix.getRowPointers().getData(), matrix.getColumnIndexes().getData(), matrix.getValues().getData() ); //err = A.setX(d_x); // you only need to do it once! //cout << "setX err = " << err << endl; this->csr5.setX( inVector.getData() ); this->csr5.setSigma(ANONYMOUSLIB_AUTO_TUNED_SIGMA); // warmup device this->csr5.warmup(); // conversion ... probably this->csr5.asCSR5(); #endif } void vectorProduct() { #ifdef HAVE_CSR5 CSR5SpMVCaller< CsrMatrix >::spmv( this->csr5, outVectorView ); #endif } const VectorView& getCudaOutVector() { return this->outVectorView; } ~CSR5Benchmark() { #ifdef HAVE_CSR5 this->csr5.destroy(); #endif } protected: #ifdef HAVE_CSR5 CSR5Type csr5; #endif VectorView inVectorView, outVectorView; }; } // namespace CSR5Benchmark } // namespace TNL
src/Benchmarks/SpMV/cmake/BuildCSR5.cmake 0 → 100644 +28 −0 Original line number Diff line number Diff line # compatibility with the CSR5 package set( CUDA_SAMPLES_DIR $ENV{CUDA_SAMPLES_DIR} ) if( NOT DEFINED CUDA_SAMPLES_DIR ) message( WARNING "CUDA_SAMPLES_DIR variable was not set and it is required by CSR5 benchmark - CSR5 benchmark is disabled.") else() # Download and unpack CSR5 at configure time message( STATUS "CUDA_SAMPLES_DIR set to ${CUDA_SAMPLES_DIR}") configure_file(cmake/CSR5.cmake.in csr5-download/CMakeLists.txt) execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . RESULT_VARIABLE result WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-download ) if(result) message(WARNING "CMake step for CSR5 failed: ${result}") else() execute_process(COMMAND ${CMAKE_COMMAND} --build . RESULT_VARIABLE result WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-download ) if(result) message( ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-download ) message(WARNING "Build step for CSR5 failed: ${result}") else() set( CXX_BENCHMARKS_FLAGS ${CXX_BENCHMARKS_FLAGS} "-DHAVE_CSR5" ) set( CXX_BENCHMARKS_INCLUDE_DIRS ${CXX_BENCHMARKS_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-src ${CUDA_SAMPLES_DIR}/common/inc) message( STATUS "CSR5 build was succesfull.") endif() endif() endif()
src/Benchmarks/SpMV/cmake/CSR5.cmake.in 0 → 100644 +24 −0 Original line number Diff line number Diff line # vim: ft=cmake # This is a separate template for CMakeLists.txt to build gtest as a separate project cmake_minimum_required(VERSION 2.8.2) project(csr5-download NONE) include(ExternalProject) ExternalProject_Add(csr5 GIT_REPOSITORY https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5.git #GIT_TAG master # build from a stable branch instead of master (which gets broken pretty often) #GIT_TAG v1.10.x SOURCE_DIR "${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-src" BINARY_DIR "${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-build" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" # Disable update of the external project in an offline build # reference: https://stackoverflow.com/a/40423683 UPDATE_DISCONNECTED ${OFFLINE_BUILD} )
src/Benchmarks/SpMV/spmv.h +25 −3 Original line number Diff line number Diff line Loading @@ -39,9 +39,9 @@ #include <TNL/Algorithms/Segments/BiEllpack.h> // Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation #define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES #define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES #define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS //#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES //#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES //#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS // Uncomment the following line to enable benchmarking the sandbox sparse matrix. //#define WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX Loading @@ -54,6 +54,7 @@ using namespace TNL::Matrices; #include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrix.h> #include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h> #include <Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h> #include <Benchmarks/SpMV/ReferenceFormats/CSR5Benchmark.h> namespace TNL { namespace Benchmarks { Loading Loading @@ -470,7 +471,28 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() ); benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults ); #ifdef HAVE_CSR5 //// // Perform benchmark on CUDA device with CSR5 as a reference GPU format // benchmark.setMetadataColumns( Benchmark::MetadataColumns({ { "matrix name", convertToString( inputFileName ) }, { "rows", convertToString( csrHostMatrix.getRows() ) }, { "columns", convertToString( csrHostMatrix.getColumns() ) }, { "matrix format", String( "CSR5" ) } } )); CudaVector cudaOutVector2( cudaOutVector ); CSR5Benchmark::CSR5Benchmark< CSRCudaMatrix > csr5Benchmark( csrCudaMatrix, cudaInVector, cudaOutVector ); auto csr5SpMV = [&]() { csr5Benchmark.vectorProduct(); }; benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cusparseBenchmarkResults ); std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl; csrCudaMatrix.reset(); #endif //// // Perform benchmark on CUDA device with LightSpMV as a reference GPU format Loading