Commit 751c3d1c authored by Tomáš Oberhuber's avatar Tomáš Oberhuber Committed by Jakub Klinkovský
Browse files

Added CSR5 to SpMV benchmark - it does not work yet.

parent 1d894e61
Loading
Loading
Loading
Loading
+15 −1
Original line number Diff line number Diff line
# CSR5 does not work properly yet:
#
# https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5/issues/9
# https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5/issues/10
#
# We can build it with TNL but it crashes with many CUDA errors. We should first check it
# with the original build.
#
#include( cmake/BuildCSR5.cmake )

if( BUILD_CUDA )
    CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu )
    cuda_include_directories( ${CXX_BENCHMARKS_INCLUDE_DIRS} )
    message( STATUS ${CXX_BENCHMARKS_FLAGS} )
    CUDA_ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cu OPTIONS ${CXX_BENCHMARKS_FLAGS} )
    TARGET_LINK_LIBRARIES( tnl-benchmark-spmv ${CUDA_cusparse_LIBRARY} ${CUDA_cudadevrt_LIBRARY} )
else()
    ADD_EXECUTABLE( tnl-benchmark-spmv tnl-benchmark-spmv.cpp )
    target_compile_options( tnl-benchmark-spmv  PRIVATE ${CXX_BENCHMARKS_FLAGS} )
    target_include_directories( tnl-benchmark-spmv PRIVATE ${CXX_BENCHMARKS_INCLUDE_DIRS} )
endif()

install( TARGETS tnl-benchmark-spmv RUNTIME DESTINATION bin )
+136 −0
Original line number Diff line number Diff line
/***************************************************************************
                          CSR5Benchmark.h  -  description
                             -------------------
    begin                : Apr 23, 2021
    copyright            : (C) 2021 by Tomas Oberhuber et al.
    email                : tomas.oberhuber@fjfi.cvut.cz
 ***************************************************************************/

/* See Copyright Notice in tnl/Copyright */

/***
 * Wrapper of original CSR5 kernels for TNL benchmarks.
 */

#include <stdexcept>


namespace TNL {
/////
// Currently CSR5 for CUDA cannot be build because of conflict of atomicAdd for `double` type:
//   https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5/issues/9
// The solution is to insert whole benchmark into separate namespace. In this case, however,
// CSR5 does not work with `float`. So far, this seems to be the best solution.
namespace CSR5Benchmark {

#ifdef HAVE_CSR5
#include <CSR5_cuda/anonymouslib_cuda.h>
#endif

#ifdef HAVE_CSR5
template< typename CsrMatrix,
          typename Real = typename CsrMatrix::RealType >
struct CSR5SpMVCaller
{
   static_assert( std::is_same< typename CsrMatrix::DeviceType, TNL::Devices::Cuda >::value, "Only CUDA device is allowed for CSR matrix for CSR5 benchmark." );
   using RealType = typename CsrMatrix::RealType;
   using DeviceType = TNL::Devices::Cuda;
   using IndexType = typename CsrMatrix::IndexType;
   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
   using VectorView = typename VectorType::ViewType;
   using CSR5Type = anonymouslibHandle< IndexType, typename std::make_unsigned< IndexType >::type, RealType >;

   static void spmv( CSR5Type& csr5, VectorView& outVector ) {
      csr5.spmv( ( RealType ) 1.0, outVector.getData() );
   };
};

template< typename CsrMatrix >
struct CSR5SpMVCaller< CsrMatrix, float >
{
   static_assert( std::is_same< typename CsrMatrix::DeviceType, TNL::Devices::Cuda >::value, "Only CUDA device is allowed for CSR matrix for CSR5 benchmark." );
   using RealType = typename CsrMatrix::RealType;
   using DeviceType = TNL::Devices::Cuda;
   using IndexType = typename CsrMatrix::IndexType;
   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
   using VectorView = typename VectorType::ViewType;
   using CSR5Type = anonymouslibHandle< IndexType, typename std::make_unsigned< IndexType >::type, RealType >;

   static void spmv( CSR5Type& csr5, VectorView& outVector )
   {
      //csr5.spmv( ( RealType ) 1.0, outVector.getData() );
   };
};
#endif


template< typename CsrMatrix >
struct CSR5Benchmark
{
   static_assert( std::is_same< typename CsrMatrix::DeviceType, TNL::Devices::Cuda >::value, "Only CUDA device is allowed for CSR matrix for CSR5 benchmark." );
   using RealType = typename CsrMatrix::RealType;
   using DeviceType = TNL::Devices::Cuda;
   using IndexType = typename CsrMatrix::IndexType;
   using VectorType = TNL::Containers::Vector< RealType, DeviceType, IndexType >;
   using VectorView = typename VectorType::ViewType;
#ifdef HAVE_CSR5
   using CSR5Type = anonymouslibHandle< IndexType, typename std::make_unsigned< IndexType >::type, RealType >;
#endif

   CSR5Benchmark( CsrMatrix& matrix, VectorType& inVector, VectorType& outVector )
   :
#ifdef HAVE_CSR5
   csr5( matrix.getRows(), matrix.getColumns() ),
#endif
     inVectorView( inVector ), outVectorView( outVector )
   {
#ifdef HAVE_CSR5
      // err = A.inputCSR(nnzA, d_csrRowPtrA, d_csrColIdxA, d_csrValA);
      //cout << "inputCSR err = " << err << endl;
      this->csr5.inputCSR( matrix.getValues().getSize(),
                           matrix.getRowPointers().getData(),
                           matrix.getColumnIndexes().getData(),
                           matrix.getValues().getData() );

      //err = A.setX(d_x); // you only need to do it once!
      //cout << "setX err = " << err << endl;
      this->csr5.setX( inVector.getData() );

      this->csr5.setSigma(ANONYMOUSLIB_AUTO_TUNED_SIGMA);

      // warmup device
      this->csr5.warmup();

      // conversion ... probably
      this->csr5.asCSR5();
#endif
   }

   void vectorProduct()
   {
#ifdef HAVE_CSR5
      CSR5SpMVCaller< CsrMatrix >::spmv( this->csr5, outVectorView );
#endif
   }

   const VectorView& getCudaOutVector()
   {
      return this->outVectorView;
   }

   ~CSR5Benchmark()
   {
#ifdef HAVE_CSR5
      this->csr5.destroy();
#endif
   }

   protected:
#ifdef HAVE_CSR5
      CSR5Type csr5;
#endif
      VectorView inVectorView, outVectorView;
};

   } // namespace CSR5Benchmark
} // namespace TNL
+28 −0
Original line number Diff line number Diff line
# compatibility with the CSR5 package

set( CUDA_SAMPLES_DIR $ENV{CUDA_SAMPLES_DIR} )
if( NOT DEFINED CUDA_SAMPLES_DIR )
    message( WARNING "CUDA_SAMPLES_DIR variable was not set and it is required by CSR5 benchmark - CSR5 benchmark is disabled.")
else()
    # Download and unpack CSR5 at configure time
    message( STATUS "CUDA_SAMPLES_DIR set to ${CUDA_SAMPLES_DIR}")
    configure_file(cmake/CSR5.cmake.in csr5-download/CMakeLists.txt)
    execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
    RESULT_VARIABLE result
    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-download )
    if(result)
        message(WARNING "CMake step for CSR5 failed: ${result}")
    else()
        execute_process(COMMAND ${CMAKE_COMMAND} --build .
            RESULT_VARIABLE result
            WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-download )
        if(result)
            message( ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-download )
            message(WARNING "Build step for CSR5 failed: ${result}")
        else()
            set( CXX_BENCHMARKS_FLAGS ${CXX_BENCHMARKS_FLAGS} "-DHAVE_CSR5" )
            set( CXX_BENCHMARKS_INCLUDE_DIRS ${CXX_BENCHMARKS_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-src ${CUDA_SAMPLES_DIR}/common/inc)
            message( STATUS "CSR5 build was succesfull.")
        endif()
    endif()
endif()
+24 −0
Original line number Diff line number Diff line
# vim: ft=cmake

# This is a separate template for CMakeLists.txt to build gtest as a separate project

cmake_minimum_required(VERSION 2.8.2)

project(csr5-download NONE)

include(ExternalProject)
ExternalProject_Add(csr5
  GIT_REPOSITORY    https://github.com/weifengliu-ssslab/Benchmark_SpMV_using_CSR5.git
  #GIT_TAG           master
  # build from a stable branch instead of master (which gets broken pretty often)
  #GIT_TAG           v1.10.x
  SOURCE_DIR        "${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-src"
  BINARY_DIR        "${CMAKE_BINARY_DIR}/src/Benchmarks/SpMV/csr5-build"
  CONFIGURE_COMMAND ""
  BUILD_COMMAND     ""
  INSTALL_COMMAND   ""
  TEST_COMMAND      ""
  # Disable update of the external project in an offline build
  # reference: https://stackoverflow.com/a/40423683
  UPDATE_DISCONNECTED ${OFFLINE_BUILD}
)
+25 −3
Original line number Diff line number Diff line
@@ -39,9 +39,9 @@
#include <TNL/Algorithms/Segments/BiEllpack.h>

// Comment the following to turn off some groups of SpMV benchmarks and speed-up the compilation
#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS
//#define WITH_TNL_BENCHMARK_SPMV_GENERAL_MATRICES
//#define WITH_TNL_BENCHMARK_SPMV_SYMMETRIC_MATRICES
//#define WITH_TNL_BENCHMARK_SPMV_LEGACY_FORMATS

// Uncomment the following line to enable benchmarking the sandbox sparse matrix.
//#define WITH_TNL_BENCHMARK_SPMV_SANDBOX_MATRIX
@@ -54,6 +54,7 @@ using namespace TNL::Matrices;
#include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrix.h>
#include <Benchmarks/SpMV/ReferenceFormats/cusparseCSRMatrixLegacy.h>
#include <Benchmarks/SpMV/ReferenceFormats/LightSpMVBenchmark.h>
#include <Benchmarks/SpMV/ReferenceFormats/CSR5Benchmark.h>

namespace TNL {
   namespace Benchmarks {
@@ -470,7 +471,28 @@ benchmarkSpmvSynthetic( Benchmark& benchmark,

   SpmvBenchmarkResult< Real, Devices::Host, int > cusparseBenchmarkResults( hostOutVector, hostOutVector, csrHostMatrix.getNonzeroElementsCount() );
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", spmvCusparse, cusparseBenchmarkResults );

#ifdef HAVE_CSR5
   ////
   // Perform benchmark on CUDA device with CSR5 as a reference GPU format
   //
   benchmark.setMetadataColumns( Benchmark::MetadataColumns({
      { "matrix name", convertToString( inputFileName ) },
      { "rows", convertToString( csrHostMatrix.getRows() ) },
      { "columns", convertToString( csrHostMatrix.getColumns() ) },
      { "matrix format", String( "CSR5" ) }
   } ));

   CudaVector cudaOutVector2( cudaOutVector );
   CSR5Benchmark::CSR5Benchmark< CSRCudaMatrix > csr5Benchmark( csrCudaMatrix, cudaInVector, cudaOutVector );

   auto csr5SpMV = [&]() {
       csr5Benchmark.vectorProduct();
   };
   benchmark.time< Devices::Cuda >( resetCusparseVectors, "GPU", csr5SpMV, cusparseBenchmarkResults );
   std::cerr << "CSR5 error = " << max( abs( cudaOutVector - cudaOutVector2 ) ) << std::endl;
   csrCudaMatrix.reset();
#endif

   ////
   // Perform benchmark on CUDA device with LightSpMV as a reference GPU format