Loading src/Benchmarks/SpMV/spmv.h +19 −19 Original line number Diff line number Diff line Loading @@ -100,11 +100,11 @@ benchmarkSpMV( Benchmark & benchmark, return false; } #ifdef HAVE_CUDA // cuSPARSE handle setup cusparseHandle_t cusparseHandle; cusparseCreate( &cusparseHandle ); #ifdef HAVE_CUDA // cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device CSRdeviceMatrix = CSRhostMatrix; Loading Loading @@ -185,9 +185,11 @@ benchmarkSpMV( Benchmark & benchmark, auto spmvCuda = [&]() { deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); }; #ifdef HAVE_CUDA auto spmvCusparse = [&]() { cusparseCSR.vectorProduct( deviceVector, deviceVector2 ); }; #endif benchmark.setOperation( datasetSize ); benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); Loading @@ -201,18 +203,6 @@ benchmarkSpMV( Benchmark & benchmark, // Copy the values resultHostVector2 = hostVector2; #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); // Initialize the device vector to be compared. // (The values in deviceVector2 will be reset when spmvCusparse starts) HostVector resultDeviceVector2; resultDeviceVector2.setSize( deviceVector2.getSize() ); resultDeviceVector2.setValue( 0.0 ); resultDeviceVector2 = deviceVector2; #endif // Setup cuSPARSE MetaData, since it has the same header as CSR, // and therefore will not get its own headers (rows, cols, speedup etc.) in log. // * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten. Loading @@ -225,6 +215,16 @@ benchmarkSpMV( Benchmark & benchmark, } )); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); // Initialize the device vector to be compared. // (The values in deviceVector2 will be reset when spmvCusparse starts) HostVector resultDeviceVector2; resultDeviceVector2.setSize( deviceVector2.getSize() ); resultDeviceVector2.setValue( 0.0 ); resultDeviceVector2 = deviceVector2; benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse ); HostVector resultcuSPARSEDeviceVector2; Loading @@ -232,7 +232,6 @@ benchmarkSpMV( Benchmark & benchmark, resultcuSPARSEDeviceVector2.setValue( 0.0 ); resultcuSPARSEDeviceVector2 = deviceVector2; #endif // Difference between GPU (curent format) and GPU-cuSPARSE results Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 ); Loading @@ -243,6 +242,7 @@ benchmarkSpMV( Benchmark & benchmark, char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ]; char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ]; #endif // Difference between CPU and GPU results for the current format Loading Loading @@ -277,7 +277,7 @@ benchmarkSpmvSynthetic( Benchmark & benchmark, bool result = true; result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::SlicedEllpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); // AdEllpack is broken Loading src/TNL/Matrices/AdEllpack.h +4 −4 Original line number Diff line number Diff line Loading @@ -195,7 +195,7 @@ public: typename = typename Enabler< Device2 >::type > AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix ); bool save( File& file ) const; void save( File& file ) const; void load( File& file ); Loading @@ -207,13 +207,13 @@ public: bool balanceLoad( const RealType average, ConstCompressedRowLengthsVectorView rowLengths, warpList< ThisType >* list ); warpList< AdEllpack >* list ); void computeWarps( const IndexType SMs, const IndexType threadsPerSM, warpList< ThisType >* list ); warpList< AdEllpack >* list ); bool createArrays( warpList< ThisType >* list ); bool createArrays( warpList< AdEllpack >* list ); void performRowTest(); Loading src/TNL/Matrices/AdEllpack_impl.h +6 −28 Original line number Diff line number Diff line Loading @@ -162,28 +162,6 @@ AdEllpack< Real, Device, Index >::AdEllpack() warpSize( 32 ) {} template< typename Real, typename Device, typename Index > String AdEllpack< Real, Device, Index >::getTypeVirtual() const { return this->getType(); } template< typename Real, typename Device, typename Index > String AdEllpack< Real, Device, Index >::getType() { return String( "Matrices::AdEllpack< ") + String( TNL::getType< Real >() ) + String( ", " ) + String( Device::getDeviceType() ) + String( ", " ) + String( TNL::getType< Index >() ) + String( " >" ); } template< typename Real, typename Device, typename Index > Loading @@ -204,7 +182,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) average /= ( RealType ) this->getRows(); this->totalLoad = average; warpList< ThisType >* list = new warpList< ThisType >(); warpList< AdEllpack >* list = new warpList< AdEllpack >(); if( !this->balanceLoad( average, rowLengths, list ) ) throw 0; // TODO: Make better exception Loading Loading @@ -766,7 +744,7 @@ template< typename Real, typename Index > bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average, ConstCompressedRowLengthsVectorView rowLengths, warpList< ThisType >* list ) warpList< AdEllpack >* list ) { IndexType offset, rowOffset, localLoad, reduceMap[ 32 ]; Loading Loading @@ -882,10 +860,10 @@ template< typename Real, typename Index > void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs, const IndexType threadsPerSM, warpList< ThisType >* list ) warpList< AdEllpack >* list ) { IndexType averageLoad = 0; warpInfo< ThisType >* temp = list->getHead()->next; warpInfo< AdEllpack >* temp = list->getHead()->next; while( temp/*->next*/ != list->getTail() ) { Loading Loading @@ -918,7 +896,7 @@ void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs, template< typename Real, typename Device, typename Index > bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list ) bool AdEllpack< Real, Device, Index >::createArrays( warpList< AdEllpack >* list ) { IndexType length = list->getNumberOfWarps(); Loading @@ -928,7 +906,7 @@ bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list this->reduceMap.setSize( length * this->warpSize ); IndexType iteration = 0; warpInfo< ThisType >* warp = list->getHead()->next; warpInfo< AdEllpack >* warp = list->getHead()->next; while( warp != list->getTail() ) { this->offset.setElement( iteration, warp->offset ); Loading src/TNL/Matrices/BiEllpack_impl.h +4 −31 Original line number Diff line number Diff line Loading @@ -45,28 +45,6 @@ BiEllpack< Real, Device, Index >::BiEllpack() logWarpSize( 5 ) {} template< typename Real, typename Device, typename Index > String BiEllpack< Real, Device, Index >::getType() { return String( "Matrices::BiEllpack< ") + String( TNL::getType< Real >() ) + String( ", " ) + String( Device :: getDeviceType() ) + String( ", " ) + String( TNL::getType< Index >() ) + String( " >" ); } template< typename Real, typename Device, typename Index > String BiEllpack< Real, Device, Index >::getTypeVirtual() const { return this->getType(); } template< typename Real, typename Device, typename Index > Loading Loading @@ -715,18 +693,13 @@ BiEllpack< Real, Device, Index >::operator=( const BiEllpack< Real2, Device2, In this->virtualRows = matrix.virtualRows; this->rowPermArray = matrix.rowPermArray; this->groupPointers = matrix.groupPointers; if( std::is_same< Device, Devices::MIC >::value ) { throw std::runtime_error("Not Implemented yet for MIC"); } return *this; } template< typename Real, typename Device, typename Index > bool BiEllpack< Real, Device, Index >::save( File& file ) const void BiEllpack< Real, Device, Index >::save( File& file ) const { Sparse< Real, Device, Index >::save( file ); file << this->groupPointers << this->rowPermArray; Loading @@ -735,7 +708,7 @@ bool BiEllpack< Real, Device, Index >::save( File& file ) const template< typename Real, typename Device, typename Index > bool BiEllpack< Real, Device, Index >::load( File& file ) void BiEllpack< Real, Device, Index >::load( File& file ) { Sparse< Real, Device, Index >::load( file ); file >> this->groupPointers >> this->rowPermArray; Loading @@ -744,7 +717,7 @@ bool BiEllpack< Real, Device, Index >::load( File& file ) template< typename Real, typename Device, typename Index > bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const void BiEllpack< Real, Device, Index >::save( const String& fileName ) const { Object::save( fileName ); } Loading @@ -752,7 +725,7 @@ bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const template< typename Real, typename Device, typename Index > bool BiEllpack< Real, Device, Index >::load( const String& fileName ) void BiEllpack< Real, Device, Index >::load( const String& fileName ) { Object::load( fileName ); } Loading src/TNL/Matrices/CSR_impl.h +1 −3 Original line number Diff line number Diff line Loading @@ -45,9 +45,7 @@ String CSR< Real, Device, Index >::getSerializationType() { return String( "Matrices::CSR< ") + TNL::getType< Real>() + String( ", " ) + String( Device :: getDeviceType() ) + String( ", " ) + ", [any_device], " + String( TNL::getType< Index >() ) + String( " >" ); } Loading Loading
src/Benchmarks/SpMV/spmv.h +19 −19 Original line number Diff line number Diff line Loading @@ -100,11 +100,11 @@ benchmarkSpMV( Benchmark & benchmark, return false; } #ifdef HAVE_CUDA // cuSPARSE handle setup cusparseHandle_t cusparseHandle; cusparseCreate( &cusparseHandle ); #ifdef HAVE_CUDA // cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device CSRdeviceMatrix = CSRhostMatrix; Loading Loading @@ -185,9 +185,11 @@ benchmarkSpMV( Benchmark & benchmark, auto spmvCuda = [&]() { deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); }; #ifdef HAVE_CUDA auto spmvCusparse = [&]() { cusparseCSR.vectorProduct( deviceVector, deviceVector2 ); }; #endif benchmark.setOperation( datasetSize ); benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); Loading @@ -201,18 +203,6 @@ benchmarkSpMV( Benchmark & benchmark, // Copy the values resultHostVector2 = hostVector2; #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); // Initialize the device vector to be compared. // (The values in deviceVector2 will be reset when spmvCusparse starts) HostVector resultDeviceVector2; resultDeviceVector2.setSize( deviceVector2.getSize() ); resultDeviceVector2.setValue( 0.0 ); resultDeviceVector2 = deviceVector2; #endif // Setup cuSPARSE MetaData, since it has the same header as CSR, // and therefore will not get its own headers (rows, cols, speedup etc.) in log. // * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten. Loading @@ -225,6 +215,16 @@ benchmarkSpMV( Benchmark & benchmark, } )); #ifdef HAVE_CUDA benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); // Initialize the device vector to be compared. // (The values in deviceVector2 will be reset when spmvCusparse starts) HostVector resultDeviceVector2; resultDeviceVector2.setSize( deviceVector2.getSize() ); resultDeviceVector2.setValue( 0.0 ); resultDeviceVector2 = deviceVector2; benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse ); HostVector resultcuSPARSEDeviceVector2; Loading @@ -232,7 +232,6 @@ benchmarkSpMV( Benchmark & benchmark, resultcuSPARSEDeviceVector2.setValue( 0.0 ); resultcuSPARSEDeviceVector2 = deviceVector2; #endif // Difference between GPU (curent format) and GPU-cuSPARSE results Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 ); Loading @@ -243,6 +242,7 @@ benchmarkSpMV( Benchmark & benchmark, char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ]; char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ]; #endif // Difference between CPU and GPU results for the current format Loading Loading @@ -277,7 +277,7 @@ benchmarkSpmvSynthetic( Benchmark & benchmark, bool result = true; result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::SlicedEllpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); // AdEllpack is broken Loading
src/TNL/Matrices/AdEllpack.h +4 −4 Original line number Diff line number Diff line Loading @@ -195,7 +195,7 @@ public: typename = typename Enabler< Device2 >::type > AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix ); bool save( File& file ) const; void save( File& file ) const; void load( File& file ); Loading @@ -207,13 +207,13 @@ public: bool balanceLoad( const RealType average, ConstCompressedRowLengthsVectorView rowLengths, warpList< ThisType >* list ); warpList< AdEllpack >* list ); void computeWarps( const IndexType SMs, const IndexType threadsPerSM, warpList< ThisType >* list ); warpList< AdEllpack >* list ); bool createArrays( warpList< ThisType >* list ); bool createArrays( warpList< AdEllpack >* list ); void performRowTest(); Loading
src/TNL/Matrices/AdEllpack_impl.h +6 −28 Original line number Diff line number Diff line Loading @@ -162,28 +162,6 @@ AdEllpack< Real, Device, Index >::AdEllpack() warpSize( 32 ) {} template< typename Real, typename Device, typename Index > String AdEllpack< Real, Device, Index >::getTypeVirtual() const { return this->getType(); } template< typename Real, typename Device, typename Index > String AdEllpack< Real, Device, Index >::getType() { return String( "Matrices::AdEllpack< ") + String( TNL::getType< Real >() ) + String( ", " ) + String( Device::getDeviceType() ) + String( ", " ) + String( TNL::getType< Index >() ) + String( " >" ); } template< typename Real, typename Device, typename Index > Loading @@ -204,7 +182,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) average /= ( RealType ) this->getRows(); this->totalLoad = average; warpList< ThisType >* list = new warpList< ThisType >(); warpList< AdEllpack >* list = new warpList< AdEllpack >(); if( !this->balanceLoad( average, rowLengths, list ) ) throw 0; // TODO: Make better exception Loading Loading @@ -766,7 +744,7 @@ template< typename Real, typename Index > bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average, ConstCompressedRowLengthsVectorView rowLengths, warpList< ThisType >* list ) warpList< AdEllpack >* list ) { IndexType offset, rowOffset, localLoad, reduceMap[ 32 ]; Loading Loading @@ -882,10 +860,10 @@ template< typename Real, typename Index > void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs, const IndexType threadsPerSM, warpList< ThisType >* list ) warpList< AdEllpack >* list ) { IndexType averageLoad = 0; warpInfo< ThisType >* temp = list->getHead()->next; warpInfo< AdEllpack >* temp = list->getHead()->next; while( temp/*->next*/ != list->getTail() ) { Loading Loading @@ -918,7 +896,7 @@ void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs, template< typename Real, typename Device, typename Index > bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list ) bool AdEllpack< Real, Device, Index >::createArrays( warpList< AdEllpack >* list ) { IndexType length = list->getNumberOfWarps(); Loading @@ -928,7 +906,7 @@ bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list this->reduceMap.setSize( length * this->warpSize ); IndexType iteration = 0; warpInfo< ThisType >* warp = list->getHead()->next; warpInfo< AdEllpack >* warp = list->getHead()->next; while( warp != list->getTail() ) { this->offset.setElement( iteration, warp->offset ); Loading
src/TNL/Matrices/BiEllpack_impl.h +4 −31 Original line number Diff line number Diff line Loading @@ -45,28 +45,6 @@ BiEllpack< Real, Device, Index >::BiEllpack() logWarpSize( 5 ) {} template< typename Real, typename Device, typename Index > String BiEllpack< Real, Device, Index >::getType() { return String( "Matrices::BiEllpack< ") + String( TNL::getType< Real >() ) + String( ", " ) + String( Device :: getDeviceType() ) + String( ", " ) + String( TNL::getType< Index >() ) + String( " >" ); } template< typename Real, typename Device, typename Index > String BiEllpack< Real, Device, Index >::getTypeVirtual() const { return this->getType(); } template< typename Real, typename Device, typename Index > Loading Loading @@ -715,18 +693,13 @@ BiEllpack< Real, Device, Index >::operator=( const BiEllpack< Real2, Device2, In this->virtualRows = matrix.virtualRows; this->rowPermArray = matrix.rowPermArray; this->groupPointers = matrix.groupPointers; if( std::is_same< Device, Devices::MIC >::value ) { throw std::runtime_error("Not Implemented yet for MIC"); } return *this; } template< typename Real, typename Device, typename Index > bool BiEllpack< Real, Device, Index >::save( File& file ) const void BiEllpack< Real, Device, Index >::save( File& file ) const { Sparse< Real, Device, Index >::save( file ); file << this->groupPointers << this->rowPermArray; Loading @@ -735,7 +708,7 @@ bool BiEllpack< Real, Device, Index >::save( File& file ) const template< typename Real, typename Device, typename Index > bool BiEllpack< Real, Device, Index >::load( File& file ) void BiEllpack< Real, Device, Index >::load( File& file ) { Sparse< Real, Device, Index >::load( file ); file >> this->groupPointers >> this->rowPermArray; Loading @@ -744,7 +717,7 @@ bool BiEllpack< Real, Device, Index >::load( File& file ) template< typename Real, typename Device, typename Index > bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const void BiEllpack< Real, Device, Index >::save( const String& fileName ) const { Object::save( fileName ); } Loading @@ -752,7 +725,7 @@ bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const template< typename Real, typename Device, typename Index > bool BiEllpack< Real, Device, Index >::load( const String& fileName ) void BiEllpack< Real, Device, Index >::load( const String& fileName ) { Object::load( fileName ); } Loading
src/TNL/Matrices/CSR_impl.h +1 −3 Original line number Diff line number Diff line Loading @@ -45,9 +45,7 @@ String CSR< Real, Device, Index >::getSerializationType() { return String( "Matrices::CSR< ") + TNL::getType< Real>() + String( ", " ) + String( Device :: getDeviceType() ) + String( ", " ) + ", [any_device], " + String( TNL::getType< Index >() ) + String( " >" ); } Loading