From 2b9409c876ba4c4f5d888dbc57137c12befa1bd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Tue, 26 Nov 2019 19:14:53 +0100 Subject: [PATCH] Fixes after rebase. --- src/Benchmarks/SpMV/spmv.h | 38 +++++++++++++------------- src/TNL/Matrices/AdEllpack.h | 8 +++--- src/TNL/Matrices/AdEllpack_impl.h | 34 ++++------------------- src/TNL/Matrices/BiEllpack_impl.h | 35 +++--------------------- src/TNL/Matrices/CSR_impl.h | 4 +-- src/TNL/Matrices/ChunkedEllpack_impl.h | 5 ---- src/TNL/Matrices/Dense_impl.h | 22 --------------- src/TNL/Matrices/Ellpack_impl.h | 18 ++++++------ src/TNL/Matrices/SlicedEllpack_impl.h | 4 +-- 9 files changed, 43 insertions(+), 125 deletions(-) diff --git a/src/Benchmarks/SpMV/spmv.h b/src/Benchmarks/SpMV/spmv.h index 6a9dab96a4..b7579386e5 100644 --- a/src/Benchmarks/SpMV/spmv.h +++ b/src/Benchmarks/SpMV/spmv.h @@ -74,8 +74,8 @@ template< typename Real, template< typename, typename, typename > class Matrix, template< typename, typename, typename > class Vector = Containers::Vector > bool -benchmarkSpMV( Benchmark & benchmark, - const String & inputFileName, +benchmarkSpMV( Benchmark& benchmark, + const String& inputFileName, bool verboseMR ) { // Setup CSR for cuSPARSE. It will compared to the format given as a template parameter to this function @@ -100,11 +100,11 @@ benchmarkSpMV( Benchmark & benchmark, return false; } +#ifdef HAVE_CUDA // cuSPARSE handle setup cusparseHandle_t cusparseHandle; cusparseCreate( &cusparseHandle ); -#ifdef HAVE_CUDA // cuSPARSE (in TNL's CSR) only works for device, copy the matrix from host to device CSRdeviceMatrix = CSRhostMatrix; @@ -185,9 +185,11 @@ benchmarkSpMV( Benchmark & benchmark, auto spmvCuda = [&]() { deviceMatrix.vectorProduct( deviceVector, deviceVector2 ); }; +#ifdef HAVE_CUDA auto spmvCusparse = [&]() { cusparseCSR.vectorProduct( deviceVector, deviceVector2 ); }; +#endif benchmark.setOperation( datasetSize ); benchmark.time< Devices::Host >( reset, "CPU", spmvHost ); @@ -201,18 +203,6 @@ benchmarkSpMV( Benchmark & benchmark, // Copy the values resultHostVector2 = hostVector2; -#ifdef HAVE_CUDA - benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); - - // Initialize the device vector to be compared. - // (The values in deviceVector2 will be reset when spmvCusparse starts) - HostVector resultDeviceVector2; - resultDeviceVector2.setSize( deviceVector2.getSize() ); - resultDeviceVector2.setValue( 0.0 ); - - resultDeviceVector2 = deviceVector2; -#endif - // Setup cuSPARSE MetaData, since it has the same header as CSR, // and therefore will not get its own headers (rows, cols, speedup etc.) in log. // * Not setting this up causes (among other undiscovered errors) the speedup from CPU to GPU on the input format to be overwritten. @@ -223,8 +213,18 @@ benchmarkSpMV( Benchmark & benchmark, { "columns", convertToString( hostMatrix.getColumns() ) }, { "matrix format", convertToString( "CSR-cuSPARSE" ) } } )); - + #ifdef HAVE_CUDA + benchmark.time< Devices::Cuda >( reset, "GPU", spmvCuda ); + + // Initialize the device vector to be compared. + // (The values in deviceVector2 will be reset when spmvCusparse starts) + HostVector resultDeviceVector2; + resultDeviceVector2.setSize( deviceVector2.getSize() ); + resultDeviceVector2.setValue( 0.0 ); + + resultDeviceVector2 = deviceVector2; + benchmark.time< Devices::Cuda >( reset, "GPU", spmvCusparse ); HostVector resultcuSPARSEDeviceVector2; @@ -232,7 +232,6 @@ benchmarkSpMV( Benchmark & benchmark, resultcuSPARSEDeviceVector2.setValue( 0.0 ); resultcuSPARSEDeviceVector2 = deviceVector2; - #endif // Difference between GPU (curent format) and GPU-cuSPARSE results Real cuSparseDifferenceAbsMax = resultDeviceVector2.differenceAbsMax( resultcuSPARSEDeviceVector2 ); @@ -243,6 +242,7 @@ benchmarkSpMV( Benchmark & benchmark, char *GPUcuSparse_absMax = &GPUxGPUcuSparse_resultDifferenceAbsMax[ 0u ]; char *GPUcuSparse_lpNorm = &GPUxGPUcuSparse_resultDifferenceLpNorm[ 0u ]; + #endif // Difference between CPU and GPU results for the current format @@ -270,14 +270,14 @@ benchmarkSpMV( Benchmark & benchmark, template< typename Real = double, typename Index = int > bool -benchmarkSpmvSynthetic( Benchmark & benchmark, +benchmarkSpmvSynthetic( Benchmark& benchmark, const String& inputFileName, bool verboseMR ) { bool result = true; result |= benchmarkSpMV< Real, Matrices::CSR >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::Ellpack >( benchmark, inputFileName, verboseMR ); - result |= benchmarkSpMV< Real, SlicedEllpack >( benchmark, inputFileName, verboseMR ); + result |= benchmarkSpMV< Real, Matrices::SlicedEllpack >( benchmark, inputFileName, verboseMR ); result |= benchmarkSpMV< Real, Matrices::ChunkedEllpack >( benchmark, inputFileName, verboseMR ); // AdEllpack is broken diff --git a/src/TNL/Matrices/AdEllpack.h b/src/TNL/Matrices/AdEllpack.h index 34b0819146..f011e6c804 100644 --- a/src/TNL/Matrices/AdEllpack.h +++ b/src/TNL/Matrices/AdEllpack.h @@ -195,7 +195,7 @@ public: typename = typename Enabler< Device2 >::type > AdEllpack& operator=( const AdEllpack< Real2, Device2, Index2 >& matrix ); - bool save( File& file ) const; + void save( File& file ) const; void load( File& file ); @@ -207,13 +207,13 @@ public: bool balanceLoad( const RealType average, ConstCompressedRowLengthsVectorView rowLengths, - warpList< ThisType >* list ); + warpList< AdEllpack >* list ); void computeWarps( const IndexType SMs, const IndexType threadsPerSM, - warpList< ThisType >* list ); + warpList< AdEllpack >* list ); - bool createArrays( warpList< ThisType >* list ); + bool createArrays( warpList< AdEllpack >* list ); void performRowTest(); diff --git a/src/TNL/Matrices/AdEllpack_impl.h b/src/TNL/Matrices/AdEllpack_impl.h index b01e9041e6..510c1e19b1 100644 --- a/src/TNL/Matrices/AdEllpack_impl.h +++ b/src/TNL/Matrices/AdEllpack_impl.h @@ -162,28 +162,6 @@ AdEllpack< Real, Device, Index >::AdEllpack() warpSize( 32 ) {} -template< typename Real, - typename Device, - typename Index > -String AdEllpack< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - -template< typename Real, - typename Device, - typename Index > -String AdEllpack< Real, Device, Index >::getType() -{ - return String( "Matrices::AdEllpack< ") + - String( TNL::getType< Real >() ) + - String( ", " ) + - String( Device::getDeviceType() ) + - String( ", " ) + - String( TNL::getType< Index >() ) + - String( " >" ); -} - template< typename Real, typename Device, typename Index > @@ -204,7 +182,7 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) average /= ( RealType ) this->getRows(); this->totalLoad = average; - warpList< ThisType >* list = new warpList< ThisType >(); + warpList< AdEllpack >* list = new warpList< AdEllpack >(); if( !this->balanceLoad( average, rowLengths, list ) ) throw 0; // TODO: Make better exception @@ -766,7 +744,7 @@ template< typename Real, typename Index > bool AdEllpack< Real, Device, Index >::balanceLoad( const RealType average, ConstCompressedRowLengthsVectorView rowLengths, - warpList< ThisType >* list ) + warpList< AdEllpack >* list ) { IndexType offset, rowOffset, localLoad, reduceMap[ 32 ]; @@ -882,10 +860,10 @@ template< typename Real, typename Index > void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs, const IndexType threadsPerSM, - warpList< ThisType >* list ) + warpList< AdEllpack >* list ) { IndexType averageLoad = 0; - warpInfo< ThisType >* temp = list->getHead()->next; + warpInfo< AdEllpack >* temp = list->getHead()->next; while( temp/*->next*/ != list->getTail() ) { @@ -918,7 +896,7 @@ void AdEllpack< Real, Device, Index >::computeWarps( const IndexType SMs, template< typename Real, typename Device, typename Index > -bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list ) +bool AdEllpack< Real, Device, Index >::createArrays( warpList< AdEllpack >* list ) { IndexType length = list->getNumberOfWarps(); @@ -928,7 +906,7 @@ bool AdEllpack< Real, Device, Index >::createArrays( warpList< ThisType >* list this->reduceMap.setSize( length * this->warpSize ); IndexType iteration = 0; - warpInfo< ThisType >* warp = list->getHead()->next; + warpInfo< AdEllpack >* warp = list->getHead()->next; while( warp != list->getTail() ) { this->offset.setElement( iteration, warp->offset ); diff --git a/src/TNL/Matrices/BiEllpack_impl.h b/src/TNL/Matrices/BiEllpack_impl.h index e20b5cd230..5a8f67d7fc 100644 --- a/src/TNL/Matrices/BiEllpack_impl.h +++ b/src/TNL/Matrices/BiEllpack_impl.h @@ -45,28 +45,6 @@ BiEllpack< Real, Device, Index >::BiEllpack() logWarpSize( 5 ) {} -template< typename Real, - typename Device, - typename Index > -String BiEllpack< Real, Device, Index >::getType() -{ - return String( "Matrices::BiEllpack< ") + - String( TNL::getType< Real >() ) + - String( ", " ) + - String( Device :: getDeviceType() ) + - String( ", " ) + - String( TNL::getType< Index >() ) + - String( " >" ); -} - -template< typename Real, - typename Device, - typename Index > -String BiEllpack< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index > @@ -715,18 +693,13 @@ BiEllpack< Real, Device, Index >::operator=( const BiEllpack< Real2, Device2, In this->virtualRows = matrix.virtualRows; this->rowPermArray = matrix.rowPermArray; this->groupPointers = matrix.groupPointers; - - if( std::is_same< Device, Devices::MIC >::value ) { - throw std::runtime_error("Not Implemented yet for MIC"); - } - return *this; } template< typename Real, typename Device, typename Index > -bool BiEllpack< Real, Device, Index >::save( File& file ) const +void BiEllpack< Real, Device, Index >::save( File& file ) const { Sparse< Real, Device, Index >::save( file ); file << this->groupPointers << this->rowPermArray; @@ -735,7 +708,7 @@ bool BiEllpack< Real, Device, Index >::save( File& file ) const template< typename Real, typename Device, typename Index > -bool BiEllpack< Real, Device, Index >::load( File& file ) +void BiEllpack< Real, Device, Index >::load( File& file ) { Sparse< Real, Device, Index >::load( file ); file >> this->groupPointers >> this->rowPermArray; @@ -744,7 +717,7 @@ bool BiEllpack< Real, Device, Index >::load( File& file ) template< typename Real, typename Device, typename Index > -bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const +void BiEllpack< Real, Device, Index >::save( const String& fileName ) const { Object::save( fileName ); } @@ -752,7 +725,7 @@ bool BiEllpack< Real, Device, Index >::save( const String& fileName ) const template< typename Real, typename Device, typename Index > -bool BiEllpack< Real, Device, Index >::load( const String& fileName ) +void BiEllpack< Real, Device, Index >::load( const String& fileName ) { Object::load( fileName ); } diff --git a/src/TNL/Matrices/CSR_impl.h b/src/TNL/Matrices/CSR_impl.h index 3164a7fff3..db31d6dcde 100644 --- a/src/TNL/Matrices/CSR_impl.h +++ b/src/TNL/Matrices/CSR_impl.h @@ -45,9 +45,7 @@ String CSR< Real, Device, Index >::getSerializationType() { return String( "Matrices::CSR< ") + TNL::getType< Real>() + - String( ", " ) + - String( Device :: getDeviceType() ) + - String( ", " ) + + ", [any_device], " + String( TNL::getType< Index >() ) + String( " >" ); } diff --git a/src/TNL/Matrices/ChunkedEllpack_impl.h b/src/TNL/Matrices/ChunkedEllpack_impl.h index 3826a8574f..9752ee4316 100644 --- a/src/TNL/Matrices/ChunkedEllpack_impl.h +++ b/src/TNL/Matrices/ChunkedEllpack_impl.h @@ -1278,11 +1278,6 @@ ChunkedEllpack< Real, Device, Index >::operator=( const ChunkedEllpack< Real2, D } } } - - if( std::is_same< Device, Devices::MIC >::value ) { - throw std::runtime_error("Not Implemented yet for MIC"); - } - return *this; } diff --git a/src/TNL/Matrices/Dense_impl.h b/src/TNL/Matrices/Dense_impl.h index f690946e88..246bd09edb 100644 --- a/src/TNL/Matrices/Dense_impl.h +++ b/src/TNL/Matrices/Dense_impl.h @@ -24,28 +24,6 @@ Dense< Real, Device, Index >::Dense() { } -template< typename Real, - typename Device, - typename Index > -String Dense< Real, Device, Index >::getType() -{ - return String( "Matrices::Dense< " ) + - String( TNL::getType< Real >() ) + - String( ", " ) + - String( Device :: getDeviceType() ) + - String( ", " ) + - String( TNL::getType< Index >() ) + - String( " >" ); -} - -template< typename Real, - typename Device, - typename Index > -String Dense< Real, Device, Index >::getTypeVirtual() const -{ - return this->getType(); -} - template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/Ellpack_impl.h b/src/TNL/Matrices/Ellpack_impl.h index b99dbc88b5..5ae12f4087 100644 --- a/src/TNL/Matrices/Ellpack_impl.h +++ b/src/TNL/Matrices/Ellpack_impl.h @@ -16,7 +16,7 @@ #include <TNL/Exceptions/NotImplementedError.h> namespace TNL { -namespace Matrices { +namespace Matrices { template< typename Real, typename Device, @@ -33,9 +33,7 @@ String Ellpack< Real, Device, Index >::getSerializationType() { return String( "Matrices::Ellpack< " ) + String( TNL::getType< Real >() ) + - String( ", " ) + - String( Device :: getDeviceType() ) + - String( ", " ) + + ", [any device], " + getType< Index >() + String( " >" ); } @@ -59,21 +57,21 @@ void Ellpack< Real, Device, Index >::setDimensions( const IndexType rows, << " columns = " << columns << std::endl ); this->rows = rows; this->columns = columns; - + if( std::is_same< Device, Devices::Cuda >::value ) { - this->alignedRows = roundToMultiple( columns, Devices::Cuda::getWarpSize() ); + this->alignedRows = roundToMultiple( columns, Cuda::getWarpSize() ); if( this->rows - this->alignedRows > 0 ) { IndexType missingRows = this->rows - this->alignedRows; - - missingRows = roundToMultiple( missingRows, Devices::Cuda::getWarpSize() ); + + missingRows = roundToMultiple( missingRows, Cuda::getWarpSize() ); this->alignedRows += missingRows; } } else this->alignedRows = rows; - + if( this->rowLengths != 0 ) allocateElements(); } @@ -87,7 +85,7 @@ void Ellpack< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRow TNL_ASSERT_GT( this->getColumns(), 0, "cannot set row lengths of an empty matrix" ); TNL_ASSERT_EQ( this->getRows(), rowLengths.getSize(), "wrong size of the rowLengths vector" ); - this->rowLengths = this->maxRowLength = rowLengths.max(); + this->rowLengths = this->maxRowLength = max( rowLengths ); allocateElements(); } diff --git a/src/TNL/Matrices/SlicedEllpack_impl.h b/src/TNL/Matrices/SlicedEllpack_impl.h index 772360c8c0..8c629b563c 100644 --- a/src/TNL/Matrices/SlicedEllpack_impl.h +++ b/src/TNL/Matrices/SlicedEllpack_impl.h @@ -34,9 +34,7 @@ String SlicedEllpack< Real, Device, Index, SliceSize >::getSerializationType() { return String( "Matrices::SlicedEllpack< ") + TNL::getType< Real >() + - String( ", " ) + - String( Device :: getDeviceType() ) + - String( ", " ) + + ", [any_device], " + String( TNL::getType< Index >() ) + String( " >" ); } -- GitLab