From 3abf57d2212c9290f44fe2521d9c481370d6964c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Tue, 3 Dec 2019 21:36:17 +0100 Subject: [PATCH] Implementing Ellpack segments. --- src/TNL/Containers/Segments/CSR.hpp | 2 +- src/TNL/Containers/Segments/Ellpack.h | 1 + src/TNL/Containers/Segments/Ellpack.hpp | 156 ++++++++++++------ src/UnitTests/Matrices/CMakeLists.txt | 6 + .../Matrices/SparseMatrixTest_CSR_segments.h | 4 +- .../SparseMatrixTest_Ellpack_segments.cpp | 1 + .../SparseMatrixTest_Ellpack_segments.cu | 1 + .../SparseMatrixTest_Ellpack_segments.h | 141 ++++++++++++++++ 8 files changed, 255 insertions(+), 57 deletions(-) create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu create mode 100644 src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h diff --git a/src/TNL/Containers/Segments/CSR.hpp b/src/TNL/Containers/Segments/CSR.hpp index ecd52190c7..b40524e5e9 100644 --- a/src/TNL/Containers/Segments/CSR.hpp +++ b/src/TNL/Containers/Segments/CSR.hpp @@ -204,6 +204,6 @@ load( File& file ) file >> this->offsets; } - } // namespace Segements + } // namespace Segments } // namespace Conatiners } // namespace TNL diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 49f859afb4..772566f518 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -18,6 +18,7 @@ namespace TNL { template< typename Device, typename Index, + bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, int Alignment = 32 > class Ellpack { diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index 0b62405146..42d7eb8c1c 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -20,128 +20,170 @@ namespace TNL { template< typename Device, - typename Index > -Ellpack< Device, Index >:: -Ellpack() : size( 0 ), rowLength( 0 ) + typename Index, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack() + : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { } template< typename Device, - typename Index > -Ellpack< Device, Index >:: -Ellpack( const Ellpack& ellpack ) : offsets( ellpack.offsets ) + typename Index, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack( const Ellpack& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { } template< typename Device, - typename Index > -Ellpack< Device, Index >:: -Ellpack( const Ellpack&& ellpack ) : offsets( std::move( ellpack.offsets ) ) + typename Index, + bool RowMajorOrder, + int Alignment > +Ellpack< Device, Index, RowMajorOrder, Alignment >:: +Ellpack( const Ellpack&& ellpack ) + : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { - } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename SizesHolder > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: setSizes( const SizesHolder& sizes ) { this->segmentSize = max( sizes ); this->size = sizes.getSize(); + if( RowMajorOrder ) + this->alignedSize = this->size; + else + this->alignedSize = roundUpDivision( size / this->getAlignment() ) * this->getAlignment(); } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ Index -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getSize() const { - return this->offsets.getSize() - 1; + return this->size; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ Index -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getSegmentSize( const IndexType segmentIdx ) const { return this->segmentSize; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ Index -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getStorageSize() const { - return this->size * this->segmentSize; + return this->alignedSize * this->segmentSize; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ Index -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const { - if( ! std::is_same< DeviceType, Devices::Host >::value ) - { -#ifdef __CUDA_ARCH__ - return offsets[ segmentIdx ] + localIdx; -#else - return offsets.getElement( segmentIdx ) + localIdx; -#endif - } - return offsets[ segmentIdx ] + localIdx; + if( RowMajorOrder ) + return segmentIdx * this->segmentSize + localIdx; + else + return segmentIdx + this->alignedSize * localIdx; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > __cuda_callable__ void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto offsetsView = this->offsets.getView(); - auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { - const IndexType begin = offsetsView[ i ]; - const IndexType end = offsetsView[ i + 1 ]; - for( IndexType j = begin; j < end; j++ ) - if( ! f( i, j, args... ) ) - break; - }; - Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + if( RowMajorOrder ) + { + const IndexType segmentSize = this->segmentSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + const IndexType begin = i * segmentSize; + const IndexType end = begin + segmentSize; + for( IndexType j = begin; j < end; j++ ) + if( ! f( i, j, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } + else + { + const IndexType storageSize = this->getStorageSize(); + const IndexType alignedSize = this->alignedSize; + auto l = [=] __cuda_callable__ ( const IndexType i, Args... args ) { + const IndexType begin = i; + const IndexType end = storageSize; + for( IndexType j = begin; j < end; j += alignedSize ) + if( ! f( i, j, args... ) ) + break; + }; + Algorithms::ParallelFor< Device >::exec( first, last, l, args... ); + } } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSize(), f, args... ); } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = decltype( fetch( IndexType(), IndexType() ) ); @@ -158,33 +200,39 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, Reduction& red } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: allReduction( Fetch& fetch, Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSize(), fetch, reduction, keeper, zero, args... ); } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: save( File& file ) const { file << this->offsets; } template< typename Device, - typename Index > + typename Index, + bool RowMajorOrder, + int Alignment > void -Ellpack< Device, Index >:: +Ellpack< Device, Index, RowMajorOrder, Alignment >:: load( File& file ) { file >> this->offsets; } - } // namespace Segements + } // namespace Segments } // namespace Conatiners } // namespace TNL diff --git a/src/UnitTests/Matrices/CMakeLists.txt b/src/UnitTests/Matrices/CMakeLists.txt index f278934a6a..996dd0430c 100644 --- a/src/UnitTests/Matrices/CMakeLists.txt +++ b/src/UnitTests/Matrices/CMakeLists.txt @@ -74,6 +74,11 @@ ELSE( BUILD_CUDA ) TARGET_COMPILE_OPTIONS( SparseMatrixTest_CSR_segments PRIVATE ${CXX_TESTS_FLAGS} ) TARGET_LINK_LIBRARIES( SparseMatrixTest_CSR_segments ${GTEST_BOTH_LIBRARIES} ) + ADD_EXECUTABLE( SparseMatrixTest_Ellpack_segments SparseMatrixTest_Ellpack_segments.cpp ) + TARGET_COMPILE_OPTIONS( SparseMatrixTest_Ellpack_segments PRIVATE ${CXX_TESTS_FLAGS} ) + TARGET_LINK_LIBRARIES( SparseMatrixTest_Ellpack_segments ${GTEST_BOTH_LIBRARIES} ) + + ENDIF( BUILD_CUDA ) @@ -92,6 +97,7 @@ ADD_TEST( SparseMatrixTest_SlicedEllpack ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixT #### # Segments tests ADD_TEST( SparseMatrixTest_CSR_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_CSR_segments${CMAKE_EXECUTABLE_SUFFIX} ) +ADD_TEST( SparseMatrixTest_Ellpack_segments ${EXECUTABLE_OUTPUT_PATH}/SparseMatrixTest_Ellpack_segments${CMAKE_EXECUTABLE_SUFFIX} ) if( ${BUILD_MPI} ) if( BUILD_CUDA ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h index a738af0e2a..b533584699 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_CSR_segments.h @@ -1,8 +1,8 @@ /*************************************************************************** SparseMatrixTest_CSR.h - description ------------------- - begin : Nov 2, 2018 - copyright : (C) 2018 by Tomas Oberhuber et al. + begin : Dec 2, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. email : tomas.oberhuber@fjfi.cvut.cz ***************************************************************************/ diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp new file mode 100644 index 0000000000..63219e9b07 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cpp @@ -0,0 +1 @@ +#include "SparseMatrixTest_Ellpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu new file mode 100644 index 0000000000..63219e9b07 --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.cu @@ -0,0 +1 @@ +#include "SparseMatrixTest_Ellpack_segments.h" diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h new file mode 100644 index 0000000000..79cdf06cff --- /dev/null +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack_segments.h @@ -0,0 +1,141 @@ +/*************************************************************************** + SparseMatrixTest_Ellpack.h - description + ------------------- + begin : Dec 3, 2019 + copyright : (C) 2019 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#include <TNL/Containers/Segments/Ellpack.h> +#include <TNL/Matrices/SparseMatrix.h> + + +#include "SparseMatrixTest.hpp" +#include <iostream> + +#ifdef HAVE_GTEST +#include <gtest/gtest.h> + +// test fixture for typed tests +template< typename Matrix > +class EllpackMatrixTest : public ::testing::Test +{ +protected: + using EllpackMatrixType = Matrix; +}; + +// types for which MatrixTest is instantiated +using EllpackMatrixTypes = ::testing::Types +< + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, short >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, int >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Host, long > +#ifdef HAVE_CUDA + ,TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, short >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, int >, + TNL::Matrices::SparseMatrix< int, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< long, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< float, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long >, + TNL::Matrices::SparseMatrix< double, TNL::Containers::Segments::Ellpack, TNL::Devices::Cuda, long > +#endif +>; + +TYPED_TEST_SUITE( EllpackMatrixTest, EllpackMatrixTypes); + +TYPED_TEST( EllpackMatrixTest, setDimensionsTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetDimensions< EllpackMatrixType >(); +} + +//TYPED_TEST( EllpackMatrixTest, setCompressedRowLengthsTest ) +//{ +//// using EllpackMatrixType = typename TestFixture::EllpackMatrixType; +// +//// test_SetCompressedRowLengths< EllpackMatrixType >(); +// +// bool testRan = false; +// EXPECT_TRUE( testRan ); +// std::cout << "\nTEST DID NOT RUN. NOT WORKING.\n\n"; +// std::cout << " This test is dependent on the input format. \n"; +// std::cout << " Almost every format allocates elements per row differently.\n\n"; +// std::cout << "\n TODO: Finish implementation of getNonZeroRowLength (Only non-zero elements, not the number of allocated elements.)\n\n"; +//} + +TYPED_TEST( EllpackMatrixTest, setLikeTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetLike< EllpackMatrixType, EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, resetTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Reset< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, setElementTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetElement< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, addElementTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_AddElement< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, setRowTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SetRow< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, vectorProductTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_VectorProduct< EllpackMatrixType >(); +} + +TYPED_TEST( EllpackMatrixTest, saveAndLoadTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_SaveAndLoad< EllpackMatrixType >( "test_SparseMatrixTest_Ellpack_segments" ); +} + +TYPED_TEST( EllpackMatrixTest, printTest ) +{ + using EllpackMatrixType = typename TestFixture::EllpackMatrixType; + + test_Print< EllpackMatrixType >(); +} + +#endif + +#include "../main.h" -- GitLab