Loading src/Benchmarks/SpMV/spmv.h +7 −9 Original line number Diff line number Diff line Loading @@ -32,9 +32,9 @@ using namespace TNL::Matrices; namespace TNL { namespace Benchmarks { // silly alias to match the number of template parameters with other formats // Alias to match the number of template parameters with other formats template< typename Real, typename Device, typename Index > using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >; using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >; // Get the name (with extension) of input matrix file std::string getMatrixFileName( const String& InputFileName ) Loading @@ -52,7 +52,7 @@ std::string getMatrixFileName( const String& InputFileName ) template< typename Matrix > std::string getMatrixFormat( const Matrix& matrix ) { std::string mtrxFullType = matrix.getType(); std::string mtrxFullType = getType( matrix ); std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) ); std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 ); Loading @@ -72,7 +72,7 @@ void printMatrixInfo( const Matrix& matrix, template< typename Real, template< typename, typename, typename > class Matrix, template< typename, typename, typename > class Vector = Containers::Vector > template< typename, typename, typename, typename > class Vector = Containers::Vector > bool benchmarkSpMV( Benchmark& benchmark, const String& inputFileName, Loading Loading @@ -142,9 +142,6 @@ benchmarkSpMV( Benchmark& benchmark, return false; } #ifdef HAVE_CUDA deviceMatrix = hostMatrix; #endif // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS), // because we need the matrix loaded first to get the rows and columns Loading @@ -160,6 +157,7 @@ benchmarkSpMV( Benchmark& benchmark, hostVector2.setSize( hostMatrix.getRows() ); #ifdef HAVE_CUDA deviceMatrix = hostMatrix; deviceVector.setSize( hostMatrix.getColumns() ); deviceVector2.setSize( hostMatrix.getRows() ); #endif Loading Loading @@ -242,7 +240,6 @@ benchmarkSpMV( Benchmark& benchmark, char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ]; char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ]; #endif // Difference between CPU and GPU results for the current format Loading @@ -262,6 +259,7 @@ benchmarkSpMV( Benchmark& benchmark, // Print result differences of GPU of current format and GPU with cuSPARSE. std::cout << GPUcuSparse_absMax << std::endl; std::cout << GPUcuSparse_lpNorm << std::endl; #endif std::cout << std::endl; return true; Loading @@ -277,7 +275,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, bool result = true; result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::SlicedEllpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); // AdEllpack is broken Loading src/TNL/Matrices/AdEllpack_impl.h +58 −57 Original line number Diff line number Diff line Loading @@ -1423,7 +1423,6 @@ void AdEllpackVectorProductCuda32( const AdEllpack< Real, Devices::Cuda, Index > } #endif #ifdef HAVE_CUDA template<> class AdEllpackDeviceDependentCode< Devices::Cuda > { Loading @@ -1439,12 +1438,14 @@ public: const InVector& inVector, OutVector& outVector ) { #ifdef HAVE_CUDA typedef AdEllpack< Real, Devices::Cuda, Index > Matrix; typedef typename Matrix::IndexType IndexType; Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); TNL_CHECK_CUDA_DEVICE; if( matrix.totalLoad < 2 ) { dim3 blockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); Loading Loading @@ -1560,11 +1561,11 @@ public: Devices::Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } #endif // HAVE_CUDA } }; #endif } // namespace Matrices } // namespace TNL src/TNL/Matrices/BiEllpack_impl.h +2 −1 Original line number Diff line number Diff line Loading @@ -94,7 +94,8 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths ) DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); DeviceDependentCode::computeColumnSizes( *this, rowLengths ); this->groupPointers.computeExclusivePrefixSum(); //this->groupPointers.computeExclusivePrefixSum(); this->groupPointers.template scan< Algorithms::ScanType::Exclusive >(); DeviceDependentCode::verifyRowPerm( *this, rowLengths ); DeviceDependentCode::verifyRowLengths( *this, rowLengths ); Loading src/TNL/Matrices/ChunkedEllpack_impl.h +1 −3 Original line number Diff line number Diff line Loading @@ -43,9 +43,7 @@ String ChunkedEllpack< Real, Device, Index >::getSerializationType() { return String( "Matrices::ChunkedEllpack< ") + getType< Real >() + String( ", " ) + String( Device :: getDeviceType() ) + String( ", " ) + String( ", [any device], " ) + String( TNL::getType< Index >() ) + String( " >" ); } Loading src/UnitTests/Matrices/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -66,7 +66,8 @@ ENDIF( BUILD_CUDA ) ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) # TODO: Uncomment the following when AdEllpack works #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) Loading Loading
src/Benchmarks/SpMV/spmv.h +7 −9 Original line number Diff line number Diff line Loading @@ -32,9 +32,9 @@ using namespace TNL::Matrices; namespace TNL { namespace Benchmarks { // silly alias to match the number of template parameters with other formats // Alias to match the number of template parameters with other formats template< typename Real, typename Device, typename Index > using SlicedEllpack = Matrices::SlicedEllpack< Real, Device, Index >; using SlicedEllpackAlias = Matrices::SlicedEllpack< Real, Device, Index >; // Get the name (with extension) of input matrix file std::string getMatrixFileName( const String& InputFileName ) Loading @@ -52,7 +52,7 @@ std::string getMatrixFileName( const String& InputFileName ) template< typename Matrix > std::string getMatrixFormat( const Matrix& matrix ) { std::string mtrxFullType = matrix.getType(); std::string mtrxFullType = getType( matrix ); std::string mtrxType = mtrxFullType.substr( 0, mtrxFullType.find( "<" ) ); std::string format = mtrxType.substr( mtrxType.find( ':' ) + 2 ); Loading @@ -72,7 +72,7 @@ void printMatrixInfo( const Matrix& matrix, template< typename Real, template< typename, typename, typename > class Matrix, template< typename, typename, typename > class Vector = Containers::Vector > template< typename, typename, typename, typename > class Vector = Containers::Vector > bool benchmarkSpMV( Benchmark& benchmark, const String& inputFileName, Loading Loading @@ -142,9 +142,6 @@ benchmarkSpMV( Benchmark& benchmark, return false; } #ifdef HAVE_CUDA deviceMatrix = hostMatrix; #endif // Setup MetaData here (not in tnl-benchmark-spmv.h, as done in Benchmarks/BLAS), // because we need the matrix loaded first to get the rows and columns Loading @@ -160,6 +157,7 @@ benchmarkSpMV( Benchmark& benchmark, hostVector2.setSize( hostMatrix.getRows() ); #ifdef HAVE_CUDA deviceMatrix = hostMatrix; deviceVector.setSize( hostMatrix.getColumns() ); deviceVector2.setSize( hostMatrix.getRows() ); #endif Loading Loading @@ -242,7 +240,6 @@ benchmarkSpMV( Benchmark& benchmark, char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ]; char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ]; #endif // Difference between CPU and GPU results for the current format Loading @@ -262,6 +259,7 @@ benchmarkSpMV( Benchmark& benchmark, // Print result differences of GPU of current format and GPU with cuSPARSE. std::cout << GPUcuSparse_absMax << std::endl; std::cout << GPUcuSparse_lpNorm << std::endl; #endif std::cout << std::endl; return true; Loading @@ -277,7 +275,7 @@ benchmarkSpmvSynthetic( Benchmark& benchmark, bool result = true; result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::SlicedEllpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, SlicedEllpackAlias >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); // AdEllpack is broken Loading
src/TNL/Matrices/AdEllpack_impl.h +58 −57 Original line number Diff line number Diff line Loading @@ -1423,7 +1423,6 @@ void AdEllpackVectorProductCuda32( const AdEllpack< Real, Devices::Cuda, Index > } #endif #ifdef HAVE_CUDA template<> class AdEllpackDeviceDependentCode< Devices::Cuda > { Loading @@ -1439,12 +1438,14 @@ public: const InVector& inVector, OutVector& outVector ) { #ifdef HAVE_CUDA typedef AdEllpack< Real, Devices::Cuda, Index > Matrix; typedef typename Matrix::IndexType IndexType; Matrix* kernel_this = Devices::Cuda::passToDevice( matrix ); InVector* kernel_inVector = Devices::Cuda::passToDevice( inVector ); OutVector* kernel_outVector = Devices::Cuda::passToDevice( outVector ); TNL_CHECK_CUDA_DEVICE; if( matrix.totalLoad < 2 ) { dim3 blockSize( 256 ), cudaGridSize( Cuda::getMaxGridSize() ); Loading Loading @@ -1560,11 +1561,11 @@ public: Devices::Cuda::freeFromDevice( kernel_outVector ); TNL_CHECK_CUDA_DEVICE; } #endif // HAVE_CUDA } }; #endif } // namespace Matrices } // namespace TNL
src/TNL/Matrices/BiEllpack_impl.h +2 −1 Original line number Diff line number Diff line Loading @@ -94,7 +94,8 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths ) DeviceDependentCode::performRowBubbleSort( *this, rowLengths ); DeviceDependentCode::computeColumnSizes( *this, rowLengths ); this->groupPointers.computeExclusivePrefixSum(); //this->groupPointers.computeExclusivePrefixSum(); this->groupPointers.template scan< Algorithms::ScanType::Exclusive >(); DeviceDependentCode::verifyRowPerm( *this, rowLengths ); DeviceDependentCode::verifyRowLengths( *this, rowLengths ); Loading
src/TNL/Matrices/ChunkedEllpack_impl.h +1 −3 Original line number Diff line number Diff line Loading @@ -43,9 +43,7 @@ String ChunkedEllpack< Real, Device, Index >::getSerializationType() { return String( "Matrices::ChunkedEllpack< ") + getType< Real >() + String( ", " ) + String( Device :: getDeviceType() ) + String( ", " ) + String( ", [any device], " ) + String( TNL::getType< Index >() ) + String( " >" ); } Loading
src/UnitTests/Matrices/CMakeLists.txt +2 −1 Original line number Diff line number Diff line Loading @@ -66,7 +66,8 @@ ENDIF( BUILD_CUDA ) ADD_TEST( SparseMatrixCopyTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixCopyTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) # TODO: Uncomment the following when AdEllpack works #ADD_TEST( SparseMatrixTest_AdEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_AdEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_BiEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_BiEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_ChunkedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_ChunkedEllpack${CMAKE_EXECUTABLE_SUFFIX} ) ADD_TEST( SparseMatrixTest_CSR ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR${CMAKE_EXECUTABLE_SUFFIX} ) Loading