From 7effd31d31a10bf59e3fe2426b06164d0868daa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sun, 6 Dec 2020 09:14:38 +0100 Subject: [PATCH 1/8] Fixed missing GridEntity refresh in tnl-grid-to-mesh --- src/Tools/tnl-grid-to-mesh.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Tools/tnl-grid-to-mesh.cpp b/src/Tools/tnl-grid-to-mesh.cpp index 13bf7d47c..003a59f5c 100644 --- a/src/Tools/tnl-grid-to-mesh.cpp +++ b/src/Tools/tnl-grid-to-mesh.cpp @@ -84,7 +84,8 @@ struct MeshCreator< Meshes::Grid< 1, Real, Device, Index > > } for( Index i = 0; i < numberOfCells; i++ ) { - const auto cell = grid.template getEntity< typename GridType::Cell >( i ); + auto cell = grid.template getEntity< typename GridType::Cell >( i ); + cell.refresh(); const auto neighbors = cell.template getNeighborEntities< 0 >(); meshBuilder.getCellSeed( i ).setCornerId( 0, neighbors.template getEntityIndex< -1 >() ); meshBuilder.getCellSeed( i ).setCornerId( 1, neighbors.template getEntityIndex< 1 >() ); -- GitLab From 002dd913c240dd0540b4928abd73d76218344ff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 17 Dec 2020 22:13:10 +0100 Subject: [PATCH 2/8] Cleaned up Math.h - the sum function was unused - inline is useless for template functions --- src/TNL/Math.h | 59 ++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 33 deletions(-) diff --git a/src/TNL/Math.h b/src/TNL/Math.h index 01fd527cb..07aa51c1d 100644 --- a/src/TNL/Math.h +++ b/src/TNL/Math.h @@ -19,13 +19,6 @@ namespace TNL { -template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type > -__cuda_callable__ inline -ResultType sum( const T1& a, const T2& b ) -{ - return a + b; -} - /** * \brief This function returns minimum of two numbers. * @@ -35,7 +28,7 @@ ResultType sum( const T1& a, const T2& b ) template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type, // enable_if is necessary to avoid ambiguity in vector expressions std::enable_if_t< ! HasSubscriptOperator::value && ! HasSubscriptOperator::value, bool > = true > -__cuda_callable__ inline +__cuda_callable__ ResultType min( const T1& a, const T2& b ) { #if __cplusplus >= 201402L @@ -80,7 +73,7 @@ ResultType max( const T1& a, const T2& b ) */ template< class T, std::enable_if_t< ! std::is_unsigned::value && ! std::is_class::value, bool > = true > -__cuda_callable__ inline +__cuda_callable__ T abs( const T& n ) { #if defined(__CUDA_ARCH__) @@ -98,7 +91,7 @@ T abs( const T& n ) */ template< class T, std::enable_if_t< std::is_unsigned::value, bool > = true > -__cuda_callable__ inline +__cuda_callable__ T abs( const T& n ) { return n; @@ -108,7 +101,7 @@ T abs( const T& n ) * \brief This function returns argument of minimum of two numbers. */ template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type > -__cuda_callable__ inline +__cuda_callable__ ResultType argMin( const T1& a, const T2& b ) { return ( a < b ) ? a : b; @@ -128,7 +121,7 @@ ResultType argMax( const T1& a, const T2& b ) * \brief This function returns argument of minimum of absolute values of two numbers. */ template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type > -__cuda_callable__ inline +__cuda_callable__ ResultType argAbsMin( const T1& a, const T2& b ) { return ( TNL::abs( a ) < TNL::abs( b ) ) ? a : b; @@ -150,7 +143,7 @@ ResultType argAbsMax( const T1& a, const T2& b ) template< typename T1, typename T2, typename ResultType = typename std::common_type< T1, T2 >::type, // enable_if is necessary to avoid ambiguity in vector expressions std::enable_if_t< ! std::is_class::value && ! std::is_class::value, bool > = true > -__cuda_callable__ inline +__cuda_callable__ ResultType pow( const T1& base, const T2& exp ) { #if defined(__CUDA_ARCH__) @@ -164,7 +157,7 @@ ResultType pow( const T1& base, const T2& exp ) * \brief This function returns the base-e exponential of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto exp( const T& value ) -> decltype( std::exp(value) ) { #if defined(__CUDA_ARCH__) @@ -178,7 +171,7 @@ auto exp( const T& value ) -> decltype( std::exp(value) ) * \brief This function returns square root of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto sqrt( const T& value ) -> decltype( std::sqrt(value) ) { #if defined(__CUDA_ARCH__) @@ -192,7 +185,7 @@ auto sqrt( const T& value ) -> decltype( std::sqrt(value) ) * \brief This function returns cubic root of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto cbrt( const T& value ) -> decltype( std::cbrt(value) ) { #if defined(__CUDA_ARCH__) @@ -206,7 +199,7 @@ auto cbrt( const T& value ) -> decltype( std::cbrt(value) ) * \brief This function returns the natural logarithm of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto log( const T& value ) -> decltype( std::log(value) ) { #if defined(__CUDA_ARCH__) @@ -220,7 +213,7 @@ auto log( const T& value ) -> decltype( std::log(value) ) * \brief This function returns the common logarithm of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto log10( const T& value ) -> decltype( std::log10(value) ) { #if defined(__CUDA_ARCH__) @@ -234,7 +227,7 @@ auto log10( const T& value ) -> decltype( std::log10(value) ) * \brief This function returns the binary logarithm of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto log2( const T& value ) -> decltype( std::log2(value) ) { #if defined(__CUDA_ARCH__) @@ -248,7 +241,7 @@ auto log2( const T& value ) -> decltype( std::log2(value) ) * \brief This function returns sine of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto sin( const T& value ) -> decltype( std::sin(value) ) { #if defined(__CUDA_ARCH__) @@ -262,7 +255,7 @@ auto sin( const T& value ) -> decltype( std::sin(value) ) * \brief This function returns cosine of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto cos( const T& value ) -> decltype( std::cos(value) ) { #if defined(__CUDA_ARCH__) @@ -276,7 +269,7 @@ auto cos( const T& value ) -> decltype( std::cos(value) ) * \brief This function returns tangent of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto tan( const T& value ) -> decltype( std::tan(value) ) { #if defined(__CUDA_ARCH__) @@ -290,7 +283,7 @@ auto tan( const T& value ) -> decltype( std::tan(value) ) * \brief This function returns the arc sine of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto asin( const T& value ) -> decltype( std::asin(value) ) { #if defined(__CUDA_ARCH__) @@ -304,7 +297,7 @@ auto asin( const T& value ) -> decltype( std::asin(value) ) * \brief This function returns the arc cosine of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto acos( const T& value ) -> decltype( std::acos(value) ) { #if defined(__CUDA_ARCH__) @@ -318,7 +311,7 @@ auto acos( const T& value ) -> decltype( std::acos(value) ) * \brief This function returns the arc tangent of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto atan( const T& value ) -> decltype( std::atan(value) ) { #if defined(__CUDA_ARCH__) @@ -332,7 +325,7 @@ auto atan( const T& value ) -> decltype( std::atan(value) ) * \brief This function returns the hyperbolic sine of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto sinh( const T& value ) -> decltype( std::sinh(value) ) { #if defined(__CUDA_ARCH__) @@ -346,7 +339,7 @@ auto sinh( const T& value ) -> decltype( std::sinh(value) ) * \brief This function returns the hyperbolic cosine of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto cosh( const T& value ) -> decltype( std::cosh(value) ) { #if defined(__CUDA_ARCH__) @@ -360,7 +353,7 @@ auto cosh( const T& value ) -> decltype( std::cosh(value) ) * \brief This function returns the hyperbolic tangent of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto tanh( const T& value ) -> decltype( std::tanh(value) ) { #if defined(__CUDA_ARCH__) @@ -374,7 +367,7 @@ auto tanh( const T& value ) -> decltype( std::tanh(value) ) * \brief This function returns the inverse hyperbolic sine of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto asinh( const T& value ) -> decltype( std::asinh(value) ) { #if defined(__CUDA_ARCH__) @@ -388,7 +381,7 @@ auto asinh( const T& value ) -> decltype( std::asinh(value) ) * \brief This function returns the inverse hyperbolic cosine of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto acosh( const T& value ) -> decltype( std::acosh(value) ) { #if defined(__CUDA_ARCH__) @@ -402,7 +395,7 @@ auto acosh( const T& value ) -> decltype( std::acosh(value) ) * \brief This function returns the inverse hyperbolic tangent of the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto atanh( const T& value ) -> decltype( std::atanh(value) ) { #if defined(__CUDA_ARCH__) @@ -416,7 +409,7 @@ auto atanh( const T& value ) -> decltype( std::atanh(value) ) * \brief This function returns largest integer value not greater than the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto floor( const T& value ) -> decltype( std::floor(value) ) { #if defined(__CUDA_ARCH__) @@ -430,7 +423,7 @@ auto floor( const T& value ) -> decltype( std::floor(value) ) * \brief This function returns the smallest integer value not less than the given \e value. */ template< typename T > -__cuda_callable__ inline +__cuda_callable__ auto ceil( const T& value ) -> decltype( std::ceil(value) ) { #if defined(__CUDA_ARCH__) -- GitLab From 2d2fa9c25336546b5d0b8cd3af9dc9291ce78dda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 17 Dec 2020 22:34:24 +0100 Subject: [PATCH 3/8] Added variadic min and max functions --- src/TNL/Math.h | 27 ++++++++++++ src/UnitTests/CMakeLists.txt | 2 +- src/UnitTests/MathTest.cpp | 79 ++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 src/UnitTests/MathTest.cpp diff --git a/src/TNL/Math.h b/src/TNL/Math.h index 07aa51c1d..cb583c03c 100644 --- a/src/TNL/Math.h +++ b/src/TNL/Math.h @@ -43,6 +43,19 @@ ResultType min( const T1& a, const T2& b ) #endif } +/** + * \brief This function returns minimum of a variadic number of inputs. + * + * The inputs are folded with the \ref min function from the left to the right. + */ +template< typename T1, typename T2, typename T3, typename... Ts > +__cuda_callable__ +typename std::common_type< T1, T2, T3, Ts... >::type +min( T1&& val1, T2&& val2, T3&& val3, Ts&&... vs ) +{ + return min( min( std::forward(val1), std::forward(val2) ), + std::forward(val3), std::forward(vs)... ); +} /** * \brief This function returns maximum of two numbers. @@ -68,6 +81,20 @@ ResultType max( const T1& a, const T2& b ) #endif } +/** + * \brief This function returns minimum of a variadic number of inputs. + * + * The inputs are folded with the \ref max function from the left to the right. + */ +template< typename T1, typename T2, typename T3, typename... Ts > +__cuda_callable__ +typename std::common_type< T1, T2, T3, Ts... >::type +max( T1&& val1, T2&& val2, T3&& val3, Ts&&... vs ) +{ + return max( max( std::forward(val1), std::forward(val2) ), + std::forward(val3), std::forward(vs)... ); +} + /** * \brief This function returns absolute value of given number \e n. */ diff --git a/src/UnitTests/CMakeLists.txt b/src/UnitTests/CMakeLists.txt index a37e1b7d7..8e4ac7249 100644 --- a/src/UnitTests/CMakeLists.txt +++ b/src/UnitTests/CMakeLists.txt @@ -5,7 +5,7 @@ ADD_SUBDIRECTORY( Functions ) ADD_SUBDIRECTORY( Meshes ) ADD_SUBDIRECTORY( Pointers ) -set( CPP_TESTS AssertTest FileNameTest StringTest ObjectTest TimerTest TypeInfoTest ) +set( CPP_TESTS AssertTest FileNameTest MathTest ObjectTest StringTest TimerTest TypeInfoTest ) set( CUDA_TESTS AssertCudaTest ) if( BUILD_CUDA ) set( CUDA_TESTS ${CUDA_TESTS} AllocatorsTest FileTest ) diff --git a/src/UnitTests/MathTest.cpp b/src/UnitTests/MathTest.cpp new file mode 100644 index 000000000..5b61b8d03 --- /dev/null +++ b/src/UnitTests/MathTest.cpp @@ -0,0 +1,79 @@ +/*************************************************************************** + MathTest.cpp - description + ------------------- + begin : Dec 17, 2020 + copyright : (C) 2020 by Tomas Oberhuber et al. + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#ifdef HAVE_GTEST +#include +#endif + +#include + +#ifdef HAVE_GTEST +TEST( MathTest, variadic_min ) +{ + using TNL::min; + + EXPECT_EQ( min(1, 2, 3, 4), 1 ); + EXPECT_EQ( min(1, 2, 4, 3), 1 ); + EXPECT_EQ( min(1, 3, 2, 4), 1 ); + EXPECT_EQ( min(1, 3, 4, 2), 1 ); + EXPECT_EQ( min(1, 4, 2, 3), 1 ); + EXPECT_EQ( min(1, 4, 3, 2), 1 ); + EXPECT_EQ( min(2, 1, 3, 4), 1 ); + EXPECT_EQ( min(2, 1, 4, 3), 1 ); + EXPECT_EQ( min(2, 3, 1, 4), 1 ); + EXPECT_EQ( min(2, 3, 4, 1), 1 ); + EXPECT_EQ( min(2, 4, 1, 3), 1 ); + EXPECT_EQ( min(2, 4, 3, 1), 1 ); + EXPECT_EQ( min(3, 1, 2, 4), 1 ); + EXPECT_EQ( min(3, 1, 4, 2), 1 ); + EXPECT_EQ( min(3, 2, 1, 4), 1 ); + EXPECT_EQ( min(3, 2, 4, 1), 1 ); + EXPECT_EQ( min(3, 4, 1, 2), 1 ); + EXPECT_EQ( min(3, 4, 2, 1), 1 ); + EXPECT_EQ( min(4, 1, 2, 3), 1 ); + EXPECT_EQ( min(4, 1, 3, 2), 1 ); + EXPECT_EQ( min(4, 2, 1, 3), 1 ); + EXPECT_EQ( min(4, 2, 3, 1), 1 ); + EXPECT_EQ( min(4, 3, 1, 2), 1 ); + EXPECT_EQ( min(4, 3, 2, 1), 1 ); +} + +TEST( MathTest, variadic_max ) +{ + using TNL::max; + + EXPECT_EQ( max(1, 2, 3, 4), 4 ); + EXPECT_EQ( max(1, 2, 4, 3), 4 ); + EXPECT_EQ( max(1, 3, 2, 4), 4 ); + EXPECT_EQ( max(1, 3, 4, 2), 4 ); + EXPECT_EQ( max(1, 4, 2, 3), 4 ); + EXPECT_EQ( max(1, 4, 3, 2), 4 ); + EXPECT_EQ( max(2, 1, 3, 4), 4 ); + EXPECT_EQ( max(2, 1, 4, 3), 4 ); + EXPECT_EQ( max(2, 3, 1, 4), 4 ); + EXPECT_EQ( max(2, 3, 4, 1), 4 ); + EXPECT_EQ( max(2, 4, 1, 3), 4 ); + EXPECT_EQ( max(2, 4, 3, 1), 4 ); + EXPECT_EQ( max(3, 1, 2, 4), 4 ); + EXPECT_EQ( max(3, 1, 4, 2), 4 ); + EXPECT_EQ( max(3, 2, 1, 4), 4 ); + EXPECT_EQ( max(3, 2, 4, 1), 4 ); + EXPECT_EQ( max(3, 4, 1, 2), 4 ); + EXPECT_EQ( max(3, 4, 2, 1), 4 ); + EXPECT_EQ( max(4, 1, 2, 3), 4 ); + EXPECT_EQ( max(4, 1, 3, 2), 4 ); + EXPECT_EQ( max(4, 2, 1, 3), 4 ); + EXPECT_EQ( max(4, 2, 3, 1), 4 ); + EXPECT_EQ( max(4, 3, 1, 2), 4 ); + EXPECT_EQ( max(4, 3, 2, 1), 4 ); +} +#endif + +#include "main.h" -- GitLab From 02b7563690d734906d392ddbfd860dcadd33a549 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Thu, 24 Dec 2020 18:10:49 +0100 Subject: [PATCH 4/8] Added overloads of l2Norm and lpNorm for 1D vectors to avoid unnecessary sqrt Also removed unnecessary getVectorLength function from getEntityMesure.h Fixes #71 --- .../Expressions/StaticExpressionTemplates.h | 29 +++++++++++++++++-- src/TNL/Meshes/Geometry/getEntityMeasure.h | 19 +----------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h index 9ae232a06..da2c8cdd2 100644 --- a/src/TNL/Containers/Expressions/StaticExpressionTemplates.h +++ b/src/TNL/Containers/Expressions/StaticExpressionTemplates.h @@ -749,7 +749,8 @@ l1Norm( const ET1& a ) } template< typename ET1, - typename..., typename = EnableIfStaticUnaryExpression_t< ET1 > > + typename..., typename = EnableIfStaticUnaryExpression_t< ET1 >, + std::enable_if_t< (ET1::getSize() > 1), bool > = true > __cuda_callable__ auto l2Norm( const ET1& a ) @@ -758,9 +759,21 @@ l2Norm( const ET1& a ) return sqrt( sum( a * a ) ); } +template< typename ET1, + typename..., typename = EnableIfStaticUnaryExpression_t< ET1 >, + std::enable_if_t< ET1::getSize() == 1, bool > = true > +__cuda_callable__ +auto +l2Norm( const ET1& a ) +{ + // avoid sqrt for 1D vectors (l1 and l2 norms are identical in 1D) + return l1Norm( a ); +} + template< typename ET1, typename Real, - typename..., typename = EnableIfStaticUnaryExpression_t< ET1 > > + typename..., typename = EnableIfStaticUnaryExpression_t< ET1 >, + std::enable_if_t< (ET1::getSize() > 1), bool > = true > __cuda_callable__ auto lpNorm( const ET1& a, const Real& p ) @@ -776,6 +789,18 @@ lpNorm( const ET1& a, const Real& p ) return pow( sum( pow( abs( a ), p ) ), 1.0 / p ); } +template< typename ET1, + typename Real, + typename..., typename = EnableIfStaticUnaryExpression_t< ET1 >, + std::enable_if_t< ET1::getSize() == 1, bool > = true > +__cuda_callable__ +auto +lpNorm( const ET1& a, const Real& p ) +{ + // avoid sqrt and pow for 1D vectors (all lp norms are identical in 1D) + return l1Norm( a ); +} + template< typename ET1, typename..., typename = EnableIfStaticUnaryExpression_t< ET1 > > __cuda_callable__ diff --git a/src/TNL/Meshes/Geometry/getEntityMeasure.h b/src/TNL/Meshes/Geometry/getEntityMeasure.h index 0728cc447..70d5614ce 100644 --- a/src/TNL/Meshes/Geometry/getEntityMeasure.h +++ b/src/TNL/Meshes/Geometry/getEntityMeasure.h @@ -50,23 +50,6 @@ getEntityMeasure( const Mesh< MeshConfig, Device > & mesh, return 1.0; } -// TODO: move to StaticVector::norm -template< typename Real > -__cuda_callable__ -Real -getVectorLength( const TNL::Containers::StaticVector< 1, Real > & vector ) -{ - return TNL::abs( vector[ 0 ] ); -} - -template< typename VectorExpression > -__cuda_callable__ -typename VectorExpression::RealType -getVectorLength( const VectorExpression& expr ) -{ - return TNL::sqrt( TNL::dot( expr, expr ) ); -} - // Edge template< typename MeshConfig, typename Device > __cuda_callable__ @@ -76,7 +59,7 @@ getEntityMeasure( const Mesh< MeshConfig, Device > & mesh, { const auto& v0 = mesh.getPoint( entity.template getSubentityIndex< 0 >( 0 ) ); const auto& v1 = mesh.getPoint( entity.template getSubentityIndex< 0 >( 1 ) ); - return getVectorLength( v1 - v0 ); + return l2Norm( v1 - v0 ); } // Triangle -- GitLab From 22471eef06942896d59f4ddea7bf0bd30b694d73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sat, 26 Dec 2020 14:45:55 +0100 Subject: [PATCH 5/8] Replaced the deprecated method setCompressedRowLengths with setRowCapacities --- .../SpMV/ReferenceFormats/Legacy/BiEllpack.h | 2 + .../ReferenceFormats/Legacy/BiEllpack_impl.h | 10 + .../ReferenceFormats/Legacy/ChunkedEllpack.h | 2 + .../Legacy/ChunkedEllpack_impl.h | 8 + .../SpMV/ReferenceFormats/Legacy/Ellpack.h | 2 + .../ReferenceFormats/Legacy/Ellpack_impl.h | 8 + .../ReferenceFormats/Legacy/SlicedEllpack.h | 2 + .../Legacy/SlicedEllpack_impl.h | 9 + src/TNL/Matrices/Legacy/AdEllpack.h | 2 + src/TNL/Matrices/Legacy/AdEllpack_impl.h | 10 + src/TNL/Matrices/Legacy/CSR.h | 6 +- src/TNL/Matrices/Legacy/CSR_impl.h | 29 ++- src/TNL/Matrices/Legacy/Multidiagonal.h | 2 + src/TNL/Matrices/Legacy/Multidiagonal_impl.h | 8 + src/TNL/Matrices/MatrixReader_impl.h | 2 +- src/TNL/Matrices/SparseMatrix.h | 243 +++++++++--------- src/TNL/Matrices/SparseOperations_impl.h | 6 +- .../Linear/Preconditioners/ILU0_impl.h | 4 +- .../Matrices/BinarySparseMatrixCopyTest.h | 6 +- .../Matrices/BinarySparseMatrixTest.hpp | 26 +- .../Matrices/BinarySparseMatrixTest_CSR.h | 4 +- .../Matrices/BinarySparseMatrixTest_Ellpack.h | 4 +- .../BinarySparseMatrixTest_SlicedEllpack.h | 4 +- src/UnitTests/Matrices/SparseMatrixCopyTest.h | 6 +- src/UnitTests/Matrices/SparseMatrixTest.h | 6 +- src/UnitTests/Matrices/SparseMatrixTest.hpp | 28 +- .../Matrices/SymmetricSparseMatrixTest.h | 4 +- .../Matrices/SymmetricSparseMatrixTest.hpp | 4 +- 28 files changed, 258 insertions(+), 189 deletions(-) diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/BiEllpack.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/BiEllpack.h index dd173cea1..0b4534be0 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/BiEllpack.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/BiEllpack.h @@ -64,6 +64,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; IndexType getRowLength( const IndexType row ) const; diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/BiEllpack_impl.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/BiEllpack_impl.h index afda8c2a5..5a0c9450b 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/BiEllpack_impl.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/BiEllpack_impl.h @@ -104,6 +104,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView constRowLengths ) return this->allocateMatrixElements( this->warpSize * this->groupPointers.getElement( strips * ( this->logWarpSize + 1 ) ) ); } +template< typename Real, + typename Device, + typename Index > +void +BiEllpack< Real, Device, Index >:: +setRowCapacities( ConstCompressedRowLengthsVectorView constRowLengths ) +{ + setCompressedRowLengths( constRowLengths ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/ChunkedEllpack.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/ChunkedEllpack.h index 10fce9f71..5d5baeb59 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/ChunkedEllpack.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/ChunkedEllpack.h @@ -101,6 +101,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ); + IndexType getRowLength( const IndexType row ) const; __cuda_callable__ diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/ChunkedEllpack_impl.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/ChunkedEllpack_impl.h index 99c3ef547..0e7b8c723 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/ChunkedEllpack_impl.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/ChunkedEllpack_impl.h @@ -245,6 +245,14 @@ void ChunkedEllpack< Real, Device, Index >::setCompressedRowLengths( ConstCompre Sparse< Real, Device, Index >::allocateMatrixElements( elementsToAllocation ); } +template< typename Real, + typename Device, + typename Index > +void ChunkedEllpack< Real, Device, Index >::setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ) +{ + setCompressedRowLengths( rowLengths ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Ellpack.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Ellpack.h index 7ddb4bb04..12359f75e 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Ellpack.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Ellpack.h @@ -61,6 +61,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; void setConstantCompressedRowLengths( const IndexType& rowLengths ); diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Ellpack_impl.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Ellpack_impl.h index 1ca524701..d900de2a8 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Ellpack_impl.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/Ellpack_impl.h @@ -91,6 +91,14 @@ void Ellpack< Real, Device, Index >::setCompressedRowLengths( ConstCompressedRow allocateElements(); } +template< typename Real, + typename Device, + typename Index > +void Ellpack< Real, Device, Index >::setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ) +{ + setCompressedRowLengths( rowLengths ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/SlicedEllpack.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/SlicedEllpack.h index e0bcd3c75..65c162312 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/SlicedEllpack.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/SlicedEllpack.h @@ -91,6 +91,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; IndexType getRowLength( const IndexType row ) const; diff --git a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/SlicedEllpack_impl.h b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/SlicedEllpack_impl.h index 6bd8b87aa..ef8ae1334 100644 --- a/src/Benchmarks/SpMV/ReferenceFormats/Legacy/SlicedEllpack_impl.h +++ b/src/Benchmarks/SpMV/ReferenceFormats/Legacy/SlicedEllpack_impl.h @@ -84,6 +84,15 @@ void SlicedEllpack< Real, Device, Index, SliceSize >::setCompressedRowLengths( C this->allocateMatrixElements( this->slicePointers.getElement( slices ) ); } +template< typename Real, + typename Device, + typename Index, + int SliceSize > +void SlicedEllpack< Real, Device, Index, SliceSize >::setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ) +{ + setCompressedRowLengths( rowLengths ); +} + template< typename Real, typename Device, typename Index, diff --git a/src/TNL/Matrices/Legacy/AdEllpack.h b/src/TNL/Matrices/Legacy/AdEllpack.h index f1a023007..14c83c3ce 100644 --- a/src/TNL/Matrices/Legacy/AdEllpack.h +++ b/src/TNL/Matrices/Legacy/AdEllpack.h @@ -134,6 +134,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; IndexType getWarp( const IndexType row ) const; diff --git a/src/TNL/Matrices/Legacy/AdEllpack_impl.h b/src/TNL/Matrices/Legacy/AdEllpack_impl.h index a1deb6cf8..af6595874 100644 --- a/src/TNL/Matrices/Legacy/AdEllpack_impl.h +++ b/src/TNL/Matrices/Legacy/AdEllpack_impl.h @@ -221,6 +221,16 @@ setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ) } } +template< typename Real, + typename Device, + typename Index > +void +AdEllpack< Real, Device, Index >:: +setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ) +{ + setCompressedRowLengths( rowLengths ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/Legacy/CSR.h b/src/TNL/Matrices/Legacy/CSR.h index d7a9092cf..7570eac8b 100644 --- a/src/TNL/Matrices/Legacy/CSR.h +++ b/src/TNL/Matrices/Legacy/CSR.h @@ -106,7 +106,7 @@ public: Containers::Vector< Block, Device, Index > blocks; - + /* Configuration of CSR SpMV kernels ----------------------------------------- */ /* Block sizes */ @@ -130,7 +130,7 @@ public: /* Number of elements in shared memory per one warp */ static constexpr Index SHARED_PER_WARP = SHARED / WARPS; /* -------------------------------------------------------------------------- */ - + using Sparse< Real, Device, Index >::getAllocatedElementsCount; @@ -145,6 +145,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ); + void getCompressedRowLengths( CompressedRowLengthsVectorView rowLengths ) const; IndexType getRowLength( const IndexType row ) const; diff --git a/src/TNL/Matrices/Legacy/CSR_impl.h b/src/TNL/Matrices/Legacy/CSR_impl.h index e03e4db6d..580b63456 100644 --- a/src/TNL/Matrices/Legacy/CSR_impl.h +++ b/src/TNL/Matrices/Legacy/CSR_impl.h @@ -114,6 +114,15 @@ void CSR< Real, Device, Index, KernelType >::setCompressedRowLengths( ConstCompr this->setBlocks(); } +template< typename Real, + typename Device, + typename Index, + CSRKernel KernelType > +void CSR< Real, Device, Index, KernelType >::setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ) +{ + setCompressedRowLengths( rowLengths ); +} + /* Find limit of block */ template< typename Real, typename Index, @@ -1013,7 +1022,7 @@ void SpMVCSRLight( const Real *inVector, /*use two threads to fetch the row offset*/ if (laneId < 2) space[vectorId][laneId] = rowPointers[row + laneId]; - + rowStart = space[vectorId][0]; rowEnd = space[vectorId][1]; @@ -1048,7 +1057,7 @@ void SpMVCSRLight( const Real *inVector, /*get a new row index*/ if(warpLaneId == 0) row = atomicAdd(rowCnt, 32 / groupSize); - + /*broadcast the row index to the other threads in the same warp and compute the row index of each vetor*/ row = __shfl_sync(0xFFFFFFFF, row, 0) + warpVectorId; @@ -1076,7 +1085,7 @@ void SpMVCSRLight2( const Real *inVector, /*get the row index*/ if (warpLaneId == 0) row = atomicAdd(rowCnt, 32 / groupSize); - + /*broadcast the value to other threads in the same warp and compute the row index of each vector*/ row = __shfl_sync(0xFFFFFFFF, row, 0) + warpVectorId; @@ -1117,7 +1126,7 @@ void SpMVCSRLight2( const Real *inVector, /*get a new row index*/ if(warpLaneId == 0) row = atomicAdd(rowCnt, 32 / groupSize); - + /*broadcast the row index to the other threads in the same warp and compute the row index of each vetor*/ row = __shfl_sync(0xFFFFFFFF, row, 0) + warpVectorId; @@ -1145,14 +1154,14 @@ void SpMVCSRLight3( const Real *inVector, /*get the row index*/ if (warpLaneId == 0) row = atomicAdd(rowCnt, 32 / groupSize); - + /*broadcast the value to other threads in the same warp and compute the row index of each vector*/ row = __shfl_sync(0xFFFFFFFF, row, 0) + warpVectorId; /*check the row range*/ while (row < rows) { sum = 0; - + /*compute dot product*/ rowEnd = rowPointers[row + 1]; for (i = rowPointers[row] + laneId; i < rowEnd; i += groupSize) @@ -1417,7 +1426,7 @@ void SpMVCSRLightPrepare( const Real *inVector, /* Get info about GPU */ cudaDeviceProp properties; cudaGetDeviceProperties( &properties, Cuda::DeviceInfo::getActiveDevice() ); - const Index blocks = + const Index blocks = properties.multiProcessorCount * properties.maxThreadsPerMultiProcessor / threads; const Index nnz = roundUpDivision(matrix.getValues().getSize(), rows); // non zeroes per row @@ -1563,7 +1572,7 @@ void SpMVCSRLightWithoutAtomicPrepare( const Real *inVector, neededThreads = groupSize * rows; else neededThreads = rows * (groupSize > 32 ? 32 : groupSize); - + /* Execute kernels on device */ for (Index grid = 0; neededThreads != 0; ++grid) { if (MAX_X_DIM * threads >= neededThreads) { @@ -1753,9 +1762,9 @@ void SpMVCSRAdaptivePrepare( const Real *inVector, neededThreads -= MAX_X_DIM * threads; } - SpMVCSRAdaptive< Real, Index, warpSize, + SpMVCSRAdaptive< Real, Index, warpSize, matrix.WARPS, - matrix.SHARED_PER_WARP, + matrix.SHARED_PER_WARP, matrix.MAX_ELEMENTS_PER_WARP > <<>>( inVector, diff --git a/src/TNL/Matrices/Legacy/Multidiagonal.h b/src/TNL/Matrices/Legacy/Multidiagonal.h index 31488a61e..27ea18bc3 100644 --- a/src/TNL/Matrices/Legacy/Multidiagonal.h +++ b/src/TNL/Matrices/Legacy/Multidiagonal.h @@ -58,6 +58,8 @@ public: void setCompressedRowLengths( ConstCompressedRowLengthsVectorView rowLengths ); + void setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ); + IndexType getRowLength( const IndexType row ) const; __cuda_callable__ diff --git a/src/TNL/Matrices/Legacy/Multidiagonal_impl.h b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h index 87c7e8e41..4ab0aed1d 100644 --- a/src/TNL/Matrices/Legacy/Multidiagonal_impl.h +++ b/src/TNL/Matrices/Legacy/Multidiagonal_impl.h @@ -78,6 +78,14 @@ void Multidiagonal< Real, Device, Index >::setCompressedRowLengths( ConstCompres */ } +template< typename Real, + typename Device, + typename Index > +void Multidiagonal< Real, Device, Index >::setRowCapacities( ConstCompressedRowLengthsVectorView rowLengths ) +{ + setCompressedRowLengths( rowLengths ); +} + template< typename Real, typename Device, typename Index > diff --git a/src/TNL/Matrices/MatrixReader_impl.h b/src/TNL/Matrices/MatrixReader_impl.h index 862d4a285..0ea7d8b2a 100644 --- a/src/TNL/Matrices/MatrixReader_impl.h +++ b/src/TNL/Matrices/MatrixReader_impl.h @@ -62,7 +62,7 @@ void MatrixReader< Matrix >::readMtxFileHostMatrix( std::istream& file, computeCompressedRowLengthsFromMtxFile( file, rowLengths, columns, rows, symmetricMatrix, verbose ); - matrix.setCompressedRowLengths( rowLengths ); + matrix.setRowCapacities( rowLengths ); readMatrixElementsFromMtxFile( file, matrix, symmetricMatrix, verbose, symReader ); } diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index fc2584064..3bb7a3e58 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -24,10 +24,10 @@ namespace Matrices { /** * \brief Implementation of sparse matrix, i.e. matrix storing only non-zero elements. - * - * \tparam Real is a type of matrix elements. If \e Real equals \e bool the matrix is treated + * + * \tparam Real is a type of matrix elements. If \e Real equals \e bool the matrix is treated * as binary and so the matrix elements values are not stored in the memory since we need - * to remember only coordinates of non-zero elements( which equal one). + * to remember only coordinates of non-zero elements( which equal one). * \tparam Device is a device where the matrix is allocated. * \tparam Index is a type for indexing of the matrix elements. * \tparam MatrixType specifies a symmetry of matrix. See \ref MatrixType. Symmetric @@ -73,14 +73,14 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Test of symmetric matrix type. - * + * * \return \e true if the matrix is stored as symmetric and \e false otherwise. */ static constexpr bool isSymmetric() { return MatrixType::isSymmetric(); }; /** * \brief Test of binary matrix type. - * + * * \return \e true if the matrix is stored as binary and \e false otherwise. */ static constexpr bool isBinary() { return std::is_same< Real, bool >::value; }; @@ -135,15 +135,15 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > using IndexAllocatorType = IndexAllocator; /** - * \brief Type of related matrix view. - * + * \brief Type of related matrix view. + * * See \ref SparseMatrixView. */ using ViewType = SparseMatrixView< Real, Device, Index, MatrixType, SegmentsViewTemplate >; /** * \brief Matrix view type for constant instances. - * + * * See \ref SparseMatrixView. */ using ConstViewType = SparseMatrixView< std::add_const_t< Real >, Device, Index, MatrixType, SegmentsViewTemplate >; @@ -173,7 +173,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Constructor only with values and column indexes allocators. - * + * * \param realAllocator is used for allocation of matrix elements values. * \param indexAllocator is used for allocation of matrix elements column indexes. */ @@ -182,21 +182,21 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Copy constructor. - * + * * \param matrix is the source matrix */ SparseMatrix( const SparseMatrix& matrix1 ) = default; /** * \brief Move constructor. - * + * * \param matrix is the source matrix */ SparseMatrix( SparseMatrix&& matrix ) = default; /** * \brief Constructor with matrix dimensions. - * + * * \param rows is number of matrix rows. * \param columns is number of matrix columns. * \param realAllocator is used for allocation of matrix elements values. @@ -209,16 +209,16 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Constructor with matrix rows capacities and number of columns. - * + * * The number of matrix rows is given by the size of \e rowCapacities list. - * + * * \tparam ListIndex is the initializer list values type. * \param rowCapacities is a list telling how many matrix elements must be * allocated in each row. * \param columns is the number of matrix columns. * \param realAllocator is used for allocation of matrix elements values. * \param indexAllocator is used for allocation of matrix elements column indexes. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_1.cpp * \par Output @@ -232,18 +232,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Constructor with matrix dimensions and data in initializer list. - * + * * The matrix elements values are given as a list \e data of triples: * { { row1, column1, value1 }, * { row2, column2, value2 }, * ... }. - * + * * \param rows is number of matrix rows. * \param columns is number of matrix columns. * \param data is a list of matrix elements values. * \param realAllocator is used for allocation of matrix elements values. * \param indexAllocator is used for allocation of matrix elements column indexes. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_init_list_2.cpp * \par Output @@ -257,20 +257,20 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Constructor with matrix dimensions and data in std::map. - * + * * The matrix elements values are given as a map \e data where keys are * std::pair of matrix coordinates ( {row, column} ) and value is the * matrix element value. - * + * * \tparam MapIndex is a type for indexing rows and columns. * \tparam MapValue is a type for matrix elements values in the map. - * + * * \param rows is number of matrix rows. * \param columns is number of matrix columns. * \param map is std::map containing matrix elements. * \param realAllocator is used for allocation of matrix elements values. * \param indexAllocator is used for allocation of matrix elements column indexes. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_Constructor_std_map.cpp * \par Output @@ -286,29 +286,29 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Returns a modifiable view of the sparse matrix. - * + * * See \ref SparseMatrixView. - * + * * \return sparse matrix view. */ ViewType getView() const; // TODO: remove const /** * \brief Returns a non-modifiable view of the sparse matrix. - * + * * See \ref SparseMatrixView. - * + * * \return sparse matrix view. */ ConstViewType getConstView() const; /** * \brief Returns string with serialization type. - * + * * The string has a form `Matrices::SparseMatrix< RealType, [any_device], IndexType, General/Symmetric, Format, [any_allocator] >`. - * + * * \return \ref String with the serialization type. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp * \par Output @@ -318,11 +318,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Returns string with serialization type. - * + * * See \ref SparseMatrix::getSerializationType. - * + * * \return \e String with the serialization type. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_getSerializationType.cpp * \par Output @@ -332,7 +332,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Set number of rows and columns of this matrix. - * + * * \param rows is the number of matrix rows. * \param columns is the number of matrix columns. */ @@ -341,10 +341,10 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Set the number of matrix rows and columns by the given matrix. - * - * \tparam Matrix is matrix type. This can be any matrix having methods + * + * \tparam Matrix is matrix type. This can be any matrix having methods * \ref getRows and \ref getColumns. - * + * * \param matrix in the input matrix dimensions of which are to be adopted. */ template< typename Matrix > @@ -352,18 +352,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Allocates memory for non-zero matrix elements. - * + * * The size of the input vector must be equal to the number of matrix rows. * The number of allocated matrix elements for each matrix row depends on * the sparse matrix format. Some formats may allocate more elements than * required. - * + * * \tparam RowsCapacitiesVector is a type of vector/array used for row * capacities setting. - * + * * \param rowCapacities is a vector telling the number of required non-zero * matrix elements in each row. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_setRowCapacities.cpp * \par Output @@ -372,25 +372,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > template< typename RowsCapacitiesVector > void setRowCapacities( const RowsCapacitiesVector& rowCapacities ); - // TODO: Remove this when possible - template< typename RowsCapacitiesVector > - [[deprecated]] - void setCompressedRowLengths( const RowsCapacitiesVector& rowLengths ) { - this->setRowCapacities( rowLengths ); - }; - /** * \brief This method sets the sparse matrix elements from initializer list. - * + * * The number of matrix rows and columns must be set already. * The matrix elements values are given as a list \e data of triples: * { { row1, column1, value1 }, * { row2, column2, value2 }, * ... }. - * + * * \param data is a initializer list of initializer lists representing * list of matrix rows. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_setElements.cpp * \par Output @@ -400,16 +393,16 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief This method sets the sparse matrix elements from std::map. - * + * * The matrix elements values are given as a map \e data where keys are * std::pair of matrix coordinates ( {row, column} ) and value is the * matrix element value. - * + * * \tparam MapIndex is a type for indexing rows and columns. * \tparam MapValue is a type for matrix elements values in the map. - * + * * \param map is std::map containing matrix elements. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_setElements_map.cpp * \par Output @@ -421,10 +414,10 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Computes number of non-zeros in each row. - * + * * \param rowLengths is a vector into which the number of non-zeros in each row * will be stored. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_getCompressedRowLengths.cpp * \par Output @@ -435,7 +428,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Returns capacity of given matrix row. - * + * * \param row index of matrix row. * \return number of matrix elements allocated for the row. */ @@ -444,10 +437,10 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Returns number of non-zero matrix elements. - * + * * This method really counts the non-zero matrix elements and so * it returns zero for matrix having all allocated elements set to zero. - * + * * \return number of non-zero matrix elements. */ IndexType getNonzeroElementsCount() const; @@ -459,16 +452,16 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Constant getter of simple structure for accessing given matrix row. - * + * * \param rowIdx is matrix row index. - * + * * \return RowView for accessing given matrix row. * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_getConstRow.cpp * \par Output * \include SparseMatrixExample_getConstRow.out - * + * * See \ref SparseMatrixRowView. */ __cuda_callable__ @@ -476,16 +469,16 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Non-constant getter of simple structure for accessing given matrix row. - * + * * \param rowIdx is matrix row index. - * + * * \return RowView for accessing given matrix row. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_getRow.cpp * \par Output * \include SparseMatrixExample_getRow.out - * + * * See \ref SparseMatrixRowView. */ __cuda_callable__ @@ -493,7 +486,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Sets element at given \e row and \e column to given \e value. - * + * * This method can be called from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated on GPU this method * can be called even from device kernels. If the matrix is allocated in GPU device @@ -501,11 +494,11 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > * performance is very low. For higher performance see. \ref SparseMatrix::getRow * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows. * The call may fail if the matrix row capacity is exhausted. - * + * * \param row is row index of the element. * \param column is columns index of the element. * \param value is the value the element will be set to. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_setElement.cpp * \par Output @@ -518,7 +511,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Add element at given \e row and \e column to given \e value. - * + * * This method can be called from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated on GPU this method * can be called even from device kernels. If the matrix is allocated in GPU device @@ -526,18 +519,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > * performance is very low. For higher performance see. \ref SparseMatrix::getRow * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows. * The call may fail if the matrix row capacity is exhausted. - * + * * \param row is row index of the element. * \param column is columns index of the element. * \param value is the value the element will be set to. * \param thisElementMultiplicator is multiplicator the original matrix element * value is multiplied by before addition of given \e value. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_addElement.cpp * \par Output * \include SparseMatrixExample_addElement.out - * + * */ __cuda_callable__ void addElement( const IndexType row, @@ -547,24 +540,24 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Returns value of matrix element at position given by its row and column index. - * + * * This method can be called from the host system (CPU) no matter * where the matrix is allocated. If the matrix is allocated on GPU this method * can be called even from device kernels. If the matrix is allocated in GPU device * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref SparseMatrix::getRow * or \ref SparseMatrix::forRows and \ref SparseMatrix::forAllRows. - * + * * \param row is a row index of the matrix element. * \param column i a column index of the matrix element. - * + * * \return value of given matrix element. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_getElement.cpp * \par Output * \include SparseMatrixExample_getElement.out - * + * */ __cuda_callable__ RealType getElement( const IndexType row, @@ -572,7 +565,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Method for performing general reduction on matrix rows. - * + * * \tparam Fetch is a type of lambda function for data fetch declared as * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. * The return type of this lambda can be any non void. @@ -581,14 +574,14 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > * \tparam Keep is a type of lambda function for storing results of reduction in each row. * It is declared as `keep( const IndexType rowIdx, const double& value )`. * \tparam FetchValue is type returned by the Fetch lambda function. - * + * * \param begin defines beginning of the range [begin,end) of rows to be processed. * \param end defines ending of the range [begin,end) of rows to be processed. * \param fetch is an instance of lambda function for data fetch. * \param reduce is an instance of lambda function for reduction. * \param keep in an instance of lambda function for storing results. * \param zero is zero of given reduction operation also known as idempotent element. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp * \par Output @@ -599,7 +592,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Method for performing general reduction on matrix rows for constant instances. - * + * * \tparam Fetch is a type of lambda function for data fetch declared as * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. * The return type of this lambda can be any non void. @@ -608,14 +601,14 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > * \tparam Keep is a type of lambda function for storing results of reduction in each row. * It is declared as `keep( const IndexType rowIdx, const double& value )`. * \tparam FetchValue is type returned by the Fetch lambda function. - * + * * \param begin defines beginning of the range [begin,end) of rows to be processed. * \param end defines ending of the range [begin,end) of rows to be processed. * \param fetch is an instance of lambda function for data fetch. * \param reduce is an instance of lambda function for reduction. * \param keep in an instance of lambda function for storing results. * \param zero is zero of given reduction operation also known as idempotent element. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_rowsReduction.cpp * \par Output @@ -626,7 +619,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Method for performing general reduction on all matrix rows. - * + * * \tparam Fetch is a type of lambda function for data fetch declared as * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. * The return type of this lambda can be any non void. @@ -635,12 +628,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > * \tparam Keep is a type of lambda function for storing results of reduction in each row. * It is declared as `keep( const IndexType rowIdx, const double& value )`. * \tparam FetchValue is type returned by the Fetch lambda function. - * + * * \param fetch is an instance of lambda function for data fetch. * \param reduce is an instance of lambda function for reduction. * \param keep in an instance of lambda function for storing results. * \param zero is zero of given reduction operation also known as idempotent element. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp * \par Output @@ -651,7 +644,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Method for performing general reduction on all matrix rows for constant instances. - * + * * \tparam Fetch is a type of lambda function for data fetch declared as * `fetch( IndexType rowIdx, IndexType& columnIdx, RealType& elementValue ) -> FetchValue`. * The return type of this lambda can be any non void. @@ -660,12 +653,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > * \tparam Keep is a type of lambda function for storing results of reduction in each row. * It is declared as `keep( const IndexType rowIdx, const double& value )`. * \tparam FetchValue is type returned by the Fetch lambda function. - * + * * \param fetch is an instance of lambda function for data fetch. * \param reduce is an instance of lambda function for reduction. * \param keep in an instance of lambda function for storing results. * \param zero is zero of given reduction operation also known as idempotent element. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_allRowsReduction.cpp * \par Output @@ -676,18 +669,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Method for iteration over all matrix rows for constant instances. - * + * * \tparam Function is type of lambda function that will operate on matrix elements. * It is should have form like * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. - * The \e localIdx parameter is a rank of the non-zero element in given row. - * If the 'compute' variable is set to false the iteration over the row can + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can * be interrupted. - * + * * \param begin defines beginning of the range [begin,end) of rows to be processed. * \param end defines ending of the range [begin,end) of rows to be processed. * \param function is an instance of the lambda function to be called in each row. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp * \par Output @@ -698,18 +691,18 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Method for iteration over all matrix rows for non-constant instances. - * + * * \tparam Function is type of lambda function that will operate on matrix elements. * It is should have form like * `function( IndexType rowIdx, IndexType localIdx, IndexType columnIdx, const RealType& value, bool& compute )`. - * The \e localIdx parameter is a rank of the non-zero element in given row. - * If the 'compute' variable is set to false the iteration over the row can + * The \e localIdx parameter is a rank of the non-zero element in given row. + * If the 'compute' variable is set to false the iteration over the row can * be interrupted. - * + * * \param begin defines beginning of the range [begin,end) of rows to be processed. * \param end defines ending of the range [begin,end) of rows to be processed. * \param function is an instance of the lambda function to be called in each row. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_forRows.cpp * \par Output @@ -720,12 +713,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief This method calls \e forRows for all matrix rows (for constant instances). - * + * * See \ref SparseMatrix::forRows. - * + * * \tparam Function is a type of lambda function that will operate on matrix elements. * \param function is an instance of the lambda function to be called in each row. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp * \par Output @@ -736,12 +729,12 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief This method calls \e forRows for all matrix rows. - * + * * See \ref SparseMatrix::forRows. - * + * * \tparam Function is a type of lambda function that will operate on matrix elements. * \param function is an instance of the lambda function to be called in each row. - * + * * \par Example * \include Matrices/SparseMatrix/SparseMatrixExample_forAllRows.cpp * \par Output @@ -752,16 +745,16 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Computes product of matrix and vector. - * + * * More precisely, it computes: - * + * * `outVector = matrixMultiplicator * ( * this ) * inVector + outVectorMultiplicator * outVector` - * + * * \tparam InVector is type of input vector. It can be \ref Vector, * \ref VectorView, \ref Array, \ref ArraView or similar container. * \tparam OutVector is type of output vector. It can be \ref Vector, * \ref VectorView, \ref Array, \ref ArraView or similar container. - * + * * \param inVector is input vector. * \param outVector is output vector. * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. @@ -799,7 +792,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Assignment of exactly the same matrix type. - * + * * \param matrix is input matrix for the assignment. * \return reference to this matrix. */ @@ -807,7 +800,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Assignment of dense matrix - * + * * \param matrix is input matrix for the assignment. * \return reference to this matrix. */ @@ -826,7 +819,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Comparison operator with another arbitrary matrix type. - * + * * \param matrix is the right-hand side matrix. * \return \e true if the RHS matrix is equal, \e false otherwise. */ @@ -835,7 +828,7 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Comparison operator with another arbitrary matrix type. - * + * * \param matrix is the right-hand side matrix. * \return \e true if the RHS matrix is equal, \e false otherwise. */ @@ -844,45 +837,45 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Method for saving the matrix to the file with given filename. - * + * * \param fileName is name of the file. */ void save( const String& fileName ) const; /** * \brief Method for loading the matrix from the file with given filename. - * + * * \param fileName is name of the file. */ void load( const String& fileName ); /** * \brief Method for saving the matrix to a file. - * + * * \param fileName is name of the file. */ virtual void save( File& file ) const override; /** * \brief Method for loading the matrix from a file. - * + * * \param fileName is name of the file. */ virtual void load( File& file ) override; /** * \brief Method for printing the matrix to output stream. - * + * * \param str is the output stream. */ virtual void print( std::ostream& str ) const override; /** * \brief Returns a padding index value. - * + * * Padding index is used for column indexes of padding zeros. Padding zeros * are used in some sparse matrix formats for better data alignment in memory. - * + * * \return value of the padding index. */ __cuda_callable__ @@ -890,20 +883,20 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Getter of segments for non-constant instances. - * + * * \e Segments are a structure for addressing the matrix elements columns and values. * In fact, \e Segments represent the sparse matrix format. - * + * * \return Non-constant reference to segments. */ SegmentsType& getSegments(); /** * \brief Getter of segments for constant instances. - * + * * \e Segments are a structure for addressing the matrix elements columns and values. * In fact, \e Segments represent the sparse matrix format. - * + * * \return Constant reference to segments. */ const SegmentsType& getSegments() const; diff --git a/src/TNL/Matrices/SparseOperations_impl.h b/src/TNL/Matrices/SparseOperations_impl.h index 43899e4f3..214c7dd43 100644 --- a/src/TNL/Matrices/SparseOperations_impl.h +++ b/src/TNL/Matrices/SparseOperations_impl.h @@ -110,7 +110,7 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B ) break; rowLengths[ i ] = length; } - A.setCompressedRowLengths( rowLengths ); + A.setRowCapacities( rowLengths ); #ifdef HAVE_OPENMP #pragma omp parallel for if( Devices::Host::isOMPEnabled() ) @@ -145,7 +145,7 @@ copySparseMatrix_impl( Matrix1& A, const Matrix2& B ) rows, cols ); TNL_CHECK_CUDA_DEVICE; - Apointer->setCompressedRowLengths( rowLengths ); + Apointer->setRowCapacities( rowLengths ); // copy rows Pointers::synchronizeSmartPointersOnDevice< Devices::Cuda >(); @@ -241,7 +241,7 @@ copyAdjacencyStructure( const Matrix& A, AdjacencyMatrix& B, length--; rowLengths[ i ] += length; } - B.setCompressedRowLengths( rowLengths ); + B.setRowCapacities( rowLengths ); // set non-zeros for( IndexType i = 0; i < A.getRows(); i++ ) { diff --git a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h index aa05292a2..c11909c07 100644 --- a/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h +++ b/src/TNL/Solvers/Linear/Preconditioners/ILU0_impl.h @@ -321,8 +321,8 @@ allocate_LU() U_rowLengths_view[ i ] = U_entries; }; Algorithms::ParallelFor< DeviceType >::exec( (IndexType) 0, N, kernel_copy_row_lengths ); - L->setCompressedRowLengths( L_rowLengths ); - U->setCompressedRowLengths( U_rowLengths ); + L->setRowCapacities( L_rowLengths ); + U->setRowCapacities( U_rowLengths ); #else throw std::runtime_error("The program was not compiled with the CUSPARSE library. Pass -DHAVE_CUSPARSE -lcusparse to the compiler."); #endif diff --git a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h index c61f7fda7..8a6e0abdd 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixCopyTest.h @@ -68,7 +68,7 @@ void setupUnevenRowSizeMatrix( Matrix& m ) rowLengths.setElement( 6, 1 ); rowLengths.setElement( 7, 1 ); rowLengths.setElement( 9, 1 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); for( int i = 0; i < cols - 4; i++ ) // 0th row m.setElement( 0, i, 1 ); @@ -198,7 +198,7 @@ void setupAntiTriDiagMatrix( Matrix& m ) rowLengths.setValue( 3 ); rowLengths.setElement( 0, 4); rowLengths.setElement( 1, 4 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); for( int i = 0; i < rows; i++ ) for( int j = cols - 1; j > 2; j-- ) @@ -285,7 +285,7 @@ void setupTriDiagMatrix( Matrix& m ) rowLengths.setValue( 3 ); rowLengths.setElement( 0 , 4 ); rowLengths.setElement( 1, 4 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); for( int i = 0; i < rows; i++ ) for( int j = 0; j < 3; j++ ) diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp index a5cd735b0..590a44704 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest.hpp @@ -58,7 +58,7 @@ void test_SetDimensions() } template< typename Matrix > -void test_SetCompressedRowLengths() +void test_SetRowCapacities() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; @@ -76,7 +76,7 @@ void test_SetCompressedRowLengths() for( IndexType i = 2; i < rows; i++ ) rowLengths.setElement( i, rowLength++ ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); // Insert values into the rows. for( IndexType i = 0; i < 3; i++ ) // 0th row @@ -172,7 +172,7 @@ void test_GetNumberOfNonzeroMatrixElements() rowLengths.setElement( 8, 1 ); rowLengths.setElement( 9, 1 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); for( IndexType i = 0; i < 4; i++ ) m.setElement( 0, 2 * i, 1 ); @@ -261,7 +261,7 @@ void test_GetRow() rowLengths.setElement( 8, 10 ); rowLengths.setElement( 9, 10 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); auto matrixView = m.getView(); auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { @@ -458,7 +458,7 @@ void test_SetElement() rowLengths.setElement( 8, 10 ); rowLengths.setElement( 9, 10 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); for( IndexType i = 0; i < 4; i++ ) m.setElement( 0, 2 * i, 1 ); @@ -618,7 +618,7 @@ void test_VectorProduct() rowLengths_1.setElement( 1, 2 ); rowLengths_1.setElement( 2, 1 ); rowLengths_1.setElement( 3, 1 ); - m_1.setCompressedRowLengths( rowLengths_1 ); + m_1.setRowCapacities( rowLengths_1 ); m_1.setElement( 0, 0, 1 ); // 0th row @@ -661,7 +661,7 @@ void test_VectorProduct() rowLengths_2.setValue( 3 ); rowLengths_2.setElement( 1, 1 ); rowLengths_2.setElement( 3, 1 ); - m_2.setCompressedRowLengths( rowLengths_2 ); + m_2.setRowCapacities( rowLengths_2 ); for( IndexType i = 0; i < 3; i++ ) // 0th row m_2.setElement( 0, i, 1 ); @@ -702,7 +702,7 @@ void test_VectorProduct() typename Matrix::CompressedRowLengthsVector rowLengths_3; rowLengths_3.setSize( m_rows_3 ); rowLengths_3.setValue( 3 ); - m_3.setCompressedRowLengths( rowLengths_3 ); + m_3.setRowCapacities( rowLengths_3 ); for( IndexType i = 0; i < 3; i++ ) // 0th row m_3.setElement( 0, i, 1 ); @@ -752,7 +752,7 @@ void test_VectorProduct() rowLengths_4.setElement( 2, 5 ); rowLengths_4.setElement( 6, 5 ); rowLengths_4.setElement( 7, 5 ); - m_4.setCompressedRowLengths( rowLengths_4 ); + m_4.setRowCapacities( rowLengths_4 ); for( IndexType i = 0; i < 3; i++ ) // 0th row m_4.setElement( 0, i, 1 ); @@ -826,7 +826,7 @@ void test_VectorProduct() rowLengths_5.setElement(5, 7); rowLengths_5.setElement(6, 8); rowLengths_5.setElement(7, 8); - m_5.setCompressedRowLengths( rowLengths_5 ); + m_5.setRowCapacities( rowLengths_5 ); for( IndexType i = 0; i < 3; i++ ) // 0th row m_5.setElement( 0, i, 1 ); @@ -912,7 +912,7 @@ void test_RowsReduction() rowsCapacities.setElement(5, 7); rowsCapacities.setElement(6, 8); rowsCapacities.setElement(7, 8); - m.setCompressedRowLengths( rowsCapacities ); + m.setRowCapacities( rowsCapacities ); for( IndexType i = 0; i < 3; i++ ) // 0th row m.setElement( 0, i, 1 ); @@ -998,7 +998,7 @@ void test_PerformSORIteration() typename Matrix::CompressedRowLengthsVector rowLengths; rowLengths.setSize( m_rows ); rowLengths.setValue( 3 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); m.setElement( 0, 0, 4.0 ); // 0th row m.setElement( 0, 1, 1.0); @@ -1074,7 +1074,7 @@ void test_SaveAndLoad( const char* filename ) Matrix savedMatrix( m_rows, m_cols ); typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); - savedMatrix.setCompressedRowLengths( rowLengths ); + savedMatrix.setRowCapacities( rowLengths ); for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row savedMatrix.setElement( 0, i, 1 ); diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h index df1605c8c..8f7dad73c 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_CSR.h @@ -46,11 +46,11 @@ TYPED_TEST( BinaryMatrixTest_CSR, setDimensionsTest ) test_SetDimensions< CSRMatrixType >(); } -TYPED_TEST( BinaryMatrixTest_CSR, setCompressedRowLengthsTest ) +TYPED_TEST( BinaryMatrixTest_CSR, setRowCapacitiesTest ) { using CSRMatrixType = typename TestFixture::CSRMatrixType; - test_SetCompressedRowLengths< CSRMatrixType >(); + test_SetRowCapacities< CSRMatrixType >(); } TYPED_TEST( BinaryMatrixTest_CSR, setLikeTest ) diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h index 789a86447..b903edeaa 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h @@ -57,11 +57,11 @@ TYPED_TEST( BinaryMatrixTest_Ellpack, setDimensionsTest ) test_SetDimensions< EllpackMatrixType >(); } -TYPED_TEST( BinaryMatrixTest_Ellpack, setCompressedRowLengthsTest ) +TYPED_TEST( BinaryMatrixTest_Ellpack, setRowCapacitiesTest ) { using EllpackMatrixType = typename TestFixture::EllpackMatrixType; - test_SetCompressedRowLengths< EllpackMatrixType >(); + test_SetRowCapacities< EllpackMatrixType >(); } TYPED_TEST( BinaryMatrixTest_Ellpack, setLikeTest ) diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h index 382f1d168..673b3b49b 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h @@ -57,11 +57,11 @@ TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setDimensionsTest ) test_SetDimensions< SlicedEllpackMatrixType >(); } -TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setCompressedRowLengthsTest ) +TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setRowCapacitiesTest ) { using SlicedEllpackMatrixType = typename TestFixture::SlicedEllpackMatrixType; - test_SetCompressedRowLengths< SlicedEllpackMatrixType >(); + test_SetRowCapacities< SlicedEllpackMatrixType >(); } TYPED_TEST( BinaryMatrixTest_SlicedEllpack, setLikeTest ) diff --git a/src/UnitTests/Matrices/SparseMatrixCopyTest.h b/src/UnitTests/Matrices/SparseMatrixCopyTest.h index f5bdd7e3f..c9f68b588 100644 --- a/src/UnitTests/Matrices/SparseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/SparseMatrixCopyTest.h @@ -68,7 +68,7 @@ void setupUnevenRowSizeMatrix( Matrix& m ) rowLengths.setElement( 6, 1 ); rowLengths.setElement( 7, 1 ); rowLengths.setElement( 9, 1 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); int value = 1; for( int i = 0; i < cols - 4; i++ ) // 0th row @@ -199,7 +199,7 @@ void setupAntiTriDiagMatrix( Matrix& m ) rowLengths.setValue( 3 ); rowLengths.setElement( 0, 4); rowLengths.setElement( 1, 4 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); int value = 1; for( int i = 0; i < rows; i++ ) @@ -287,7 +287,7 @@ void setupTriDiagMatrix( Matrix& m ) rowLengths.setValue( 3 ); rowLengths.setElement( 0 , 4 ); rowLengths.setElement( 1, 4 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); int value = 1; for( int i = 0; i < rows; i++ ) diff --git a/src/UnitTests/Matrices/SparseMatrixTest.h b/src/UnitTests/Matrices/SparseMatrixTest.h index 98e779dae..928e8336c 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.h +++ b/src/UnitTests/Matrices/SparseMatrixTest.h @@ -46,11 +46,11 @@ TYPED_TEST( MatrixTest, setDimensionsTest ) test_SetDimensions< MatrixType >(); } -TYPED_TEST( MatrixTest, setCompressedRowLengthsTest ) +TYPED_TEST( MatrixTest, setRowCapacitiesTest ) { using MatrixType = typename TestFixture::MatrixType; - test_SetCompressedRowLengths< MatrixType >(); + test_SetRowCapacities< MatrixType >(); } TYPED_TEST( MatrixTest, setLikeTest ) @@ -108,4 +108,4 @@ TYPED_TEST( MatrixTest, saveAndLoadTest ) test_SaveAndLoad< MatrixType >( saveAndLoadFileName ); } -#endif \ No newline at end of file +#endif diff --git a/src/UnitTests/Matrices/SparseMatrixTest.hpp b/src/UnitTests/Matrices/SparseMatrixTest.hpp index 7c0d831a8..b5885afbe 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SparseMatrixTest.hpp @@ -243,7 +243,7 @@ void test_SetDimensions() } template< typename Matrix > -void test_SetCompressedRowLengths() +void test_SetRowCapacities() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; @@ -259,7 +259,7 @@ void test_SetCompressedRowLengths() for( IndexType i = 2; i < rows; i++ ) rowLengths.setElement( i, rowLength++ ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); // Insert values into the rows. RealType value = 1; @@ -347,7 +347,7 @@ void test_GetNonzeroElementsCount() Matrix m( rows, cols ); typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); RealType value = 1; for( IndexType i = 0; i < 4; i++ ) @@ -539,7 +539,7 @@ void test_GetRow() Matrix m( rows, cols ); typename Matrix::CompressedRowLengthsVector rowLengths{ 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); auto matrixView = m.getView(); auto f = [=] __cuda_callable__ ( const IndexType rowIdx ) mutable { @@ -736,7 +736,7 @@ void test_SetElement() m.setDimensions( rows, cols ); typename Matrix::CompressedRowLengthsVector rowLengths { 4, 3, 8, 2, 1, 1, 1, 1, 10, 10 }; - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); RealType value = 1; for( IndexType i = 0; i < 4; i++ ) @@ -898,7 +898,7 @@ void test_AddElement() { 4, 1, 11 }, { 4, 2, 1 }, { 4, 3, 1 }, { 5, 2, 1 }, { 5, 3, 12 }, { 5, 4, 1 } } ); /*typename Matrix::CompressedRowLengthsVector rowLengths( rows, 3 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); RealType value = 1; for( IndexType i = 0; i < cols - 2; i++ ) // 0th row @@ -1047,7 +1047,7 @@ void test_VectorProduct() m_1.reset(); m_1.setDimensions( m_rows_1, m_cols_1 ); typename Matrix::CompressedRowLengthsVector rowLengths_1{ 1, 2, 1, 1 }; - m_1.setCompressedRowLengths( rowLengths_1 ); + m_1.setRowCapacities( rowLengths_1 ); RealType value_1 = 1; m_1.setElement( 0, 0, value_1++ ); // 0th row @@ -1090,7 +1090,7 @@ void test_VectorProduct() Matrix m_2( m_rows_2, m_cols_2 ); typename Matrix::CompressedRowLengthsVector rowLengths_2{ 3, 1, 3, 1 }; - m_2.setCompressedRowLengths( rowLengths_2 ); + m_2.setRowCapacities( rowLengths_2 ); RealType value_2 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row @@ -1135,7 +1135,7 @@ void test_VectorProduct() Matrix m_3( m_rows_3, m_cols_3 ); typename Matrix::CompressedRowLengthsVector rowLengths_3{ 3, 3, 3, 3 }; - m_3.setCompressedRowLengths( rowLengths_3 ); + m_3.setRowCapacities( rowLengths_3 ); RealType value_3 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row @@ -1185,7 +1185,7 @@ void test_VectorProduct() Matrix m_4( m_rows_4, m_cols_4 ); typename Matrix::CompressedRowLengthsVector rowLengths_4{ 4, 4, 5, 4, 4, 4, 5, 5 }; - m_4.setCompressedRowLengths( rowLengths_4 ); + m_4.setRowCapacities( rowLengths_4 ); RealType value_4 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row @@ -1253,7 +1253,7 @@ void test_VectorProduct() Matrix m_5( m_rows_5, m_cols_5 ); typename Matrix::CompressedRowLengthsVector rowLengths_5{ 6, 3, 4, 5, 2, 7, 8, 8 }; - m_5.setCompressedRowLengths( rowLengths_5 ); + m_5.setRowCapacities( rowLengths_5 ); RealType value_5 = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row @@ -1388,7 +1388,7 @@ void test_RowsReduction() Matrix m; m.setDimensions( rows, cols ); typename Matrix::RowsCapacitiesType rowsCapacities{ 6, 3, 4, 5, 2, 7, 8, 8 }; - m.setCompressedRowLengths( rowsCapacities ); + m.setRowCapacities( rowsCapacities ); RealType value = 1; for( IndexType i = 0; i < 3; i++ ) // 0th row @@ -1473,7 +1473,7 @@ void test_PerformSORIteration() Matrix m( m_rows, m_cols ); typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); m.setElement( 0, 0, 4.0 ); // 0th row m.setElement( 0, 1, 1.0); @@ -1545,7 +1545,7 @@ void test_SaveAndLoad( const char* filename ) Matrix savedMatrix( m_rows, m_cols ); typename Matrix::CompressedRowLengthsVector rowLengths( m_rows, 3 ); - savedMatrix.setCompressedRowLengths( rowLengths ); + savedMatrix.setRowCapacities( rowLengths ); RealType value = 1; for( IndexType i = 0; i < m_cols - 1; i++ ) // 0th row diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h index 3c4093785..d871579b3 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.h @@ -31,11 +31,11 @@ TYPED_TEST( MatrixTest, setDimensionsTest ) test_SetDimensions< MatrixType >(); } -TYPED_TEST( MatrixTest, setCompressedRowLengthsTest ) +TYPED_TEST( MatrixTest, setRowCapacitiesTest ) { using MatrixType = typename TestFixture::MatrixType; - test_SetCompressedRowLengths< MatrixType >(); + test_SetRowCapacities< MatrixType >(); } TYPED_TEST( MatrixTest, setLikeTest ) diff --git a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp index bcb6e8afb..7eeceb87b 100644 --- a/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp +++ b/src/UnitTests/Matrices/SymmetricSparseMatrixTest.hpp @@ -55,7 +55,7 @@ void test_SetDimensions() } template< typename Matrix > -void test_SetCompressedRowLengths() +void test_SetRowCapacities() { using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; @@ -79,7 +79,7 @@ void test_SetCompressedRowLengths() Matrix m( rows, cols ); typename Matrix::CompressedRowLengthsVector rowLengths { 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3 }; - m.setCompressedRowLengths( rowLengths ); + m.setRowCapacities( rowLengths ); // Insert values into the rows. RealType value = 1; -- GitLab From 09578e410351aad277983287f5353bb93ac7114f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sat, 26 Dec 2020 19:46:31 +0100 Subject: [PATCH 6/8] Cleaned up tnl-benchmark-linear-solvers.h --- .../tnl-benchmark-linear-solvers.h | 26 ++++--------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h index dbbd7febd..e5a8d9819 100644 --- a/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h +++ b/src/Benchmarks/LinearSolvers/tnl-benchmark-linear-solvers.h @@ -122,14 +122,6 @@ parse_comma_list( const Config::ParameterContainer& parameters, return set; } -// TODO: implement this in TNL::String -bool ends_with( const std::string& value, const std::string& ending ) -{ - if (ending.size() > value.size()) - return false; - return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); -} - // initialize all vector entries with a unioformly distributed random value from the interval [a, b] template< typename Vector > void set_random_vector( Vector& v, typename Vector::RealType a, typename Vector::RealType b ) @@ -161,9 +153,7 @@ void set_random_vector( Vector& v, typename Vector::RealType a, typename Vector: template< typename Matrix, typename Vector > void benchmarkIterativeSolvers( Benchmark& benchmark, -// FIXME: ParameterContainer should be copyable, but that leads to double-free -// Config::ParameterContainer parameters, - Config::ParameterContainer& parameters, + Config::ParameterContainer parameters, const SharedPointer< Matrix >& matrixPointer, const Vector& x0, const Vector& b ) @@ -356,9 +346,7 @@ struct LinearSolversBenchmark static bool run( Benchmark& benchmark, Benchmark::MetadataMap metadata, -// FIXME: ParameterContainer should be copyable, but that leads to double-free -// const Config::ParameterContainer& parameters ) - Config::ParameterContainer& parameters ) + const Config::ParameterContainer& parameters ) { const String file_matrix = parameters.getParameter< String >( "input-matrix" ); const String file_dof = parameters.getParameter< String >( "input-dof" ); @@ -368,7 +356,7 @@ struct LinearSolversBenchmark VectorType x0, b; // load the matrix - if( ends_with( file_matrix, ".mtx" ) ) { + if( file_matrix.endsWith( ".mtx" ) ) { Matrices::MatrixReader< MatrixType > reader; reader.readMtxFile( file_matrix, *matrixPointer ); } @@ -443,9 +431,7 @@ struct LinearSolversBenchmark static void runDistributed( Benchmark& benchmark, Benchmark::MetadataMap metadata, -// FIXME: ParameterContainer should be copyable, but that leads to double-free -// const Config::ParameterContainer& parameters, - Config::ParameterContainer& parameters, + const Config::ParameterContainer& parameters, const SharedPointer< MatrixType >& matrixPointer, const VectorType& x0, const VectorType& b ) @@ -489,9 +475,7 @@ struct LinearSolversBenchmark static void runNonDistributed( Benchmark& benchmark, Benchmark::MetadataMap metadata, -// FIXME: ParameterContainer should be copyable, but that leads to double-free -// const Config::ParameterContainer& parameters, - Config::ParameterContainer& parameters, + const Config::ParameterContainer& parameters, const SharedPointer< MatrixType >& matrixPointer, const VectorType& x0, const VectorType& b ) -- GitLab From 45a757dea4eb2ffe6a19e99a4d8a587a1d9e22d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sun, 27 Dec 2020 21:47:03 +0100 Subject: [PATCH 7/8] Removed useless comment and type alias from DistributedVerticalOperations.h --- src/TNL/Containers/Expressions/DistributedVerticalOperations.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/TNL/Containers/Expressions/DistributedVerticalOperations.h b/src/TNL/Containers/Expressions/DistributedVerticalOperations.h index 43390b529..b525e8a53 100644 --- a/src/TNL/Containers/Expressions/DistributedVerticalOperations.h +++ b/src/TNL/Containers/Expressions/DistributedVerticalOperations.h @@ -17,8 +17,6 @@ namespace TNL { namespace Containers { namespace Expressions { -//// -// Vertical operations template< typename Expression > auto DistributedExpressionMin( const Expression& expression ) -> std::decay_t< decltype( expression[0] ) > { @@ -145,7 +143,6 @@ template< typename Expression > auto DistributedExpressionSum( const Expression& expression ) -> std::decay_t< decltype( expression[0] ) > { using ResultType = std::decay_t< decltype( expression[0] ) >; - using IndexType = typename Expression::IndexType; using CommunicatorType = typename Expression::CommunicatorType; ResultType result = 0; -- GitLab From 73d94bb586ee7e0ec3dd64c6462a7440df635b77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Klinkovsk=C3=BD?= Date: Sat, 2 Jan 2021 10:16:07 +0100 Subject: [PATCH 8/8] MultiDeviceMemoryOperations: improved conditions to avoid unnecessary buffering --- src/TNL/Algorithms/MultiDeviceMemoryOperations.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h index 48e5ad647..903d3befe 100644 --- a/src/TNL/Algorithms/MultiDeviceMemoryOperations.h +++ b/src/TNL/Algorithms/MultiDeviceMemoryOperations.h @@ -125,7 +125,7 @@ copy( DestinationElement* destination, TNL_ASSERT_TRUE( destination, "Attempted to copy data to a nullptr." ); TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); #ifdef HAVE_CUDA - if( std::is_same< DestinationElement, SourceElement >::value ) + if( std::is_same< std::remove_cv_t, std::remove_cv_t >::value ) { if( cudaMemcpy( destination, source, @@ -136,7 +136,7 @@ copy( DestinationElement* destination, } else { - using BaseType = typename std::remove_cv< SourceElement >::type; + using BaseType = std::remove_cv_t< SourceElement >; const int buffer_size = TNL::min( Cuda::getTransferBufferSize() / sizeof(BaseType), size ); std::unique_ptr< BaseType[] > buffer{ new BaseType[ buffer_size ] }; Index i = 0; @@ -221,7 +221,7 @@ copy( DestinationElement* destination, TNL_ASSERT_TRUE( source, "Attempted to copy data from a nullptr." ); TNL_ASSERT_GE( size, (Index) 0, "Array size must be non-negative." ); #ifdef HAVE_CUDA - if( std::is_same< DestinationElement, SourceElement >::value ) + if( std::is_same< std::remove_cv_t, std::remove_cv_t >::value ) { if( cudaMemcpy( destination, source, -- GitLab