From 322ac56e385cc398fb44a2cef78067d2cdaf82b6 Mon Sep 17 00:00:00 2001 From: Tomas Oberhuber <tomas.oberhuber@fjfi.cvut.cz> Date: Wed, 4 Dec 2019 18:08:16 +0100 Subject: [PATCH] Ellpack segments work well. --- src/TNL/Containers/Segments/Ellpack.h | 1 + src/TNL/Containers/Segments/Ellpack.hpp | 50 +++++++++++----- src/TNL/Matrices/SparseMatrix.hpp | 12 ++-- src/UnitTests/Matrices/CMakeLists.txt | 3 + .../SparseMatrixTest_Ellpack_segments.h | 59 +++++++++++-------- 5 files changed, 83 insertions(+), 42 deletions(-) diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 772566f518..dc1a717b3a 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -28,6 +28,7 @@ class Ellpack using IndexType = Index; using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; static constexpr int getAlignment() { return Alignment; } + static constexpr bool getRowMajorOrder() { return RowMajorOrder; } Ellpack(); diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 42d7eb8c1c..8a23693ec8 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -63,7 +63,7 @@ setSizes( const SizesHolder& sizes ) if( RowMajorOrder ) this->alignedSize = this->size; else - this->alignedSize = roundUpDivision( size / this->getAlignment() ) * this->getAlignment(); + this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); } template< typename Device, @@ -186,17 +186,35 @@ void Ellpack< Device, Index, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { - using RealType = decltype( fetch( IndexType(), IndexType() ) ); - const auto offsetsView = this->offsets.getConstView(); - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { - const IndexType begin = offsetsView[ i ]; - const IndexType end = offsetsView[ i + 1 ]; - RealType aux( zero ); - for( IndexType j = begin; j < end; j++ ) - reduction( aux, fetch( i, j, args... ) ); - keeper( i, aux ); - }; - Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + if( RowMajorOrder ) + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j++ ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + using RealType = decltype( fetch( IndexType(), IndexType() ) ); + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) mutable { + const IndexType begin = i; + const IndexType end = storageSize; + RealType aux( zero ); + for( IndexType j = begin; j < end; j += alignedSize ) + reduction( aux, fetch( i, j, args... ) ); + keeper( i, aux ); + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } } template< typename Device, @@ -219,7 +237,9 @@ void Ellpack< Device, Index, RowMajorOrder, Alignment >:: save( File& file ) const { - file << this->offsets; + file.save( &segmentSize ); + file.save( &size ); + file.save( &alignedSize ); } template< typename Device, @@ -230,7 +250,9 @@ void Ellpack< Device, Index, RowMajorOrder, Alignment >:: load( File& file ) { - file >> this->offsets; + file.load( &segmentSize ); + file.load( &size ); + file.load( &alignedSize ); } } // namespace Segments diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index a43ddba829..9bc8d7fb73 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -489,12 +489,16 @@ vectorProduct( const InVector& inVector, const RealType& matrixMultiplicator, const RealType& inVectorAddition ) const { - auto inVectorView = inVector.getConstView(); + const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); - auto valuesView = this->values.getConstView(); - auto columnIndexesView = this->columnIndexes.getConstView(); + const auto valuesView = this->values.getConstView(); + const auto columnIndexesView = this->columnIndexes.getConstView(); + const IndexType paddingIndex = this->getPaddingIndex(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType offset ) -> RealType { - return valuesView[ offset ] * inVectorView[ columnIndexesView[ offset ] ]; + const IndexType column = columnIndexesView[ offset ]; + if( column == paddingIndex ) + return 0.0; + return valuesView[ offset ] * inVectorView[ column ]; }; auto reduction = [] __cuda_callable__ ( RealType& sum, const RealType& value ) { sum += value; diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index 996dd0430c..ef1f043710 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -31,6 +31,9 @@ IF( BUILD_CUDA ) CUDA_ADD_EXECUTABLE( SparseMatrixTest_CSR_segments SparseMatrixTest_CSR_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) + CUDA_ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cu OPTIONS ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} ) + ELSE( BUILD_CUDA ) ADD_EXECUTABLE( SparseMatrixCopyTest SparseMatrixCopyTest.cpp ) TARGET_COMPILE_OPTIONS( SparseMatrixCopyTest PRIVATE ${CXX_TESTS_FLAGS} ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h index 79cdf06cff..c54aab9486 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -26,34 +26,45 @@ protected: using EllpackMatrixType = Matrix; }; +//// +// Row-major format is used for the host system +template< typename Device, typename Index > +using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, true, 32 >; + + +//// +// Column-major format is used for GPUs +template< typename Device, typename Index > +using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, false, 32 >; + // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types < - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long > + TNL::Matrices::SparseMatrix< int, RowMajorEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< long, RowMajorEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< float, RowMajorEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< double, RowMajorEllpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< int, RowMajorEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< long, RowMajorEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< float, RowMajorEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< double, RowMajorEllpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< int, RowMajorEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< long, RowMajorEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< float, RowMajorEllpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< double, RowMajorEllpack, TNL::Devices::Host, long > #ifdef HAVE_CUDA - ,TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, - TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, - TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long > + ,TNL::Matrices::SparseMatrix< int, ColumnMajorEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< long, ColumnMajorEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< float, ColumnMajorEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< double, ColumnMajorEllpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< int, ColumnMajorEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< long, ColumnMajorEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< float, ColumnMajorEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< double, ColumnMajorEllpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< int, ColumnMajorEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< long, ColumnMajorEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< float, ColumnMajorEllpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< double, ColumnMajorEllpack, TNL::Devices::Cuda, long > #endif >; -- GitLab