From 344b5e071e1f51d255af5035db2a18b2a9e3740f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 9 May 2020 13:55:43 +0200 Subject: [PATCH 01/11] Methods DenseMatrix[View]::get/set/addElement are __cuda_callble__. --- src/TNL/Matrices/DenseMatrix.h | 3 + src/TNL/Matrices/DenseMatrix.hpp | 7 +- src/TNL/Matrices/DenseMatrixView.h | 5 +- src/TNL/Matrices/DenseMatrixView.hpp | 17 +- src/UnitTests/Matrices/DenseMatrixTest.h | 338 ++++++++++++----------- 5 files changed, 190 insertions(+), 180 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index aea7a33d6..afaa7ca76 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -346,6 +346,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \par Output * \include DenseMatrixExample_setElement.out */ + __cuda_callable__ void setElement( const IndexType row, const IndexType column, const RealType& value ); @@ -365,6 +366,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \param thisElementMultiplicator is multiplicator the original matrix element * value is multiplied by before addition of given e value. */ + __cuda_callable__ void addElement( const IndexType row, const IndexType column, const RealType& value, @@ -384,6 +386,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * * \return value of given matrix element. */ + __cuda_callable__ Real getElement( const IndexType row, const IndexType column ) const; diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp index bd2ea6212..0bb2b3cb9 100644 --- a/src/TNL/Matrices/DenseMatrix.hpp +++ b/src/TNL/Matrices/DenseMatrix.hpp @@ -233,6 +233,7 @@ DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); + this->segments.reset(); } template< typename Real, @@ -300,7 +301,7 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void +__cuda_callable__ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: setElement( const IndexType row, const IndexType column, @@ -314,7 +315,7 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void +__cuda_callable__ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: addElement( const IndexType row, const IndexType column, @@ -329,7 +330,7 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -Real +__cuda_callable__ Real DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 8ae12f64e..7ad00e97b 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -91,8 +91,6 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > IndexType getNonzeroElementsCount() const; - void reset(); - __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; @@ -110,15 +108,18 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > const Real& operator()( const IndexType row, const IndexType column ) const; + __cuda_callable__ void setElement( const IndexType row, const IndexType column, const RealType& value ); + __cuda_callable__ void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); + __cuda_callable__ Real getElement( const IndexType row, const IndexType column ) const; diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 917fb596b..83e06f9f6 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -168,17 +168,6 @@ getNonzeroElementsCount() const return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > -void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: -reset() -{ - Matrix< Real, Device, Index >::reset(); -} - template< typename Real, typename Device, typename Index, @@ -250,7 +239,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -void +__cuda_callable__ void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: setElement( const IndexType row, const IndexType column, @@ -263,7 +252,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -void +__cuda_callable__ void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: addElement( const IndexType row, const IndexType column, @@ -283,7 +272,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -Real +__cuda_callable__ Real DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getElement( const IndexType row, const IndexType column ) const diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 391043f0f..5441f8834 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -8,15 +8,17 @@ /* See Copyright Notice in tnl/Copyright */ +#include +#include #include #include #include #include - #include #include +#include +#include #include -#include using Dense_host_float = TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int >; using Dense_host_int = TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int >; @@ -377,178 +379,192 @@ void test_SetValue() template< typename Matrix > void test_SetElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * \ 21 22 23 24 25 / - */ - const IndexType rows = 5; - const IndexType cols = 5; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * \ 21 22 23 24 25 / + */ + const IndexType rows = 5; + const IndexType cols = 5; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + TNL::Containers::Vector< RealType, DeviceType, IndexType > v( m.getRows(), 0 ); + auto v_view = v.getView(); + auto m_view = m.getView(); + auto f1 = [=] __cuda_callable__ ( IndexType i ) mutable { + v_view[ i ] = m_view.getElement( i, i ); + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, m.getRows(), f1 ); + + for( IndexType i = 0; i < m.getRows(); i++ ) + EXPECT_EQ( v.getElement( i ), m.getElement( i, i ) ); + auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { + return ( v_view[ i ] == m_view.getElement( i, i ) ); + }; + EXPECT_TRUE( TNL::Algorithms::Reduction< DeviceType >::reduce( m.getRows(), std::logical_and<>{}, fetch, true ) ); - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); } template< typename Matrix > void test_AddElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 6x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * | 21 22 23 24 25 | - * \ 26 27 28 29 30 / - */ - const IndexType rows = 6; - const IndexType cols = 5; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 6x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * | 21 22 23 24 25 | + * \ 26 27 28 29 30 / + */ + const IndexType rows = 6; + const IndexType cols = 5; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - // Check the added elements - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 26 ); - EXPECT_EQ( m.getElement( 5, 1 ), 27 ); - EXPECT_EQ( m.getElement( 5, 2 ), 28 ); - EXPECT_EQ( m.getElement( 5, 3 ), 29 ); - EXPECT_EQ( m.getElement( 5, 4 ), 30 ); - - // Add new elements to the old elements with a multiplying factor applied to the old elements. -/* - * The following setup results in the following 6x5 dense matrix: - * - * / 3 6 9 12 15 \ - * | 18 21 24 27 30 | - * | 33 36 39 42 45 | - * | 48 51 54 57 60 | - * | 63 66 69 72 75 | - * \ 78 81 84 87 90 / - */ - RealType newValue = 1; - RealType multiplicator = 2; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.addElement( i, j, newValue++, multiplicator ); - - EXPECT_EQ( m.getElement( 0, 0 ), 3 ); - EXPECT_EQ( m.getElement( 0, 1 ), 6 ); - EXPECT_EQ( m.getElement( 0, 2 ), 9 ); - EXPECT_EQ( m.getElement( 0, 3 ), 12 ); - EXPECT_EQ( m.getElement( 0, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 18 ); - EXPECT_EQ( m.getElement( 1, 1 ), 21 ); - EXPECT_EQ( m.getElement( 1, 2 ), 24 ); - EXPECT_EQ( m.getElement( 1, 3 ), 27 ); - EXPECT_EQ( m.getElement( 1, 4 ), 30 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 33 ); - EXPECT_EQ( m.getElement( 2, 1 ), 36 ); - EXPECT_EQ( m.getElement( 2, 2 ), 39 ); - EXPECT_EQ( m.getElement( 2, 3 ), 42 ); - EXPECT_EQ( m.getElement( 2, 4 ), 45 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 48 ); - EXPECT_EQ( m.getElement( 3, 1 ), 51 ); - EXPECT_EQ( m.getElement( 3, 2 ), 54 ); - EXPECT_EQ( m.getElement( 3, 3 ), 57 ); - EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 63 ); - EXPECT_EQ( m.getElement( 4, 1 ), 66 ); - EXPECT_EQ( m.getElement( 4, 2 ), 69 ); - EXPECT_EQ( m.getElement( 4, 3 ), 72 ); - EXPECT_EQ( m.getElement( 4, 4 ), 75 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 78 ); - EXPECT_EQ( m.getElement( 5, 1 ), 81 ); - EXPECT_EQ( m.getElement( 5, 2 ), 84 ); - EXPECT_EQ( m.getElement( 5, 3 ), 87 ); - EXPECT_EQ( m.getElement( 5, 4 ), 90 ); + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); + EXPECT_EQ( m.getElement( 5, 1 ), 27 ); + EXPECT_EQ( m.getElement( 5, 2 ), 28 ); + EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 dense matrix: + * + * / 3 6 9 12 15 \ + * | 18 21 24 27 30 | + * | 33 36 39 42 45 | + * | 48 51 54 57 60 | + * | 63 66 69 72 75 | + * \ 78 81 84 87 90 / + */ + RealType newValue = 1; + RealType multiplicator = 2; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.addElement( i, j, newValue++, multiplicator ); + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 3 ), 12 ); + EXPECT_EQ( m.getElement( 0, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 18 ); + EXPECT_EQ( m.getElement( 1, 1 ), 21 ); + EXPECT_EQ( m.getElement( 1, 2 ), 24 ); + EXPECT_EQ( m.getElement( 1, 3 ), 27 ); + EXPECT_EQ( m.getElement( 1, 4 ), 30 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 33 ); + EXPECT_EQ( m.getElement( 2, 1 ), 36 ); + EXPECT_EQ( m.getElement( 2, 2 ), 39 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 45 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 48 ); + EXPECT_EQ( m.getElement( 3, 1 ), 51 ); + EXPECT_EQ( m.getElement( 3, 2 ), 54 ); + EXPECT_EQ( m.getElement( 3, 3 ), 57 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 63 ); + EXPECT_EQ( m.getElement( 4, 1 ), 66 ); + EXPECT_EQ( m.getElement( 4, 2 ), 69 ); + EXPECT_EQ( m.getElement( 4, 3 ), 72 ); + EXPECT_EQ( m.getElement( 4, 4 ), 75 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 78 ); + EXPECT_EQ( m.getElement( 5, 1 ), 81 ); + EXPECT_EQ( m.getElement( 5, 2 ), 84 ); + EXPECT_EQ( m.getElement( 5, 3 ), 87 ); + EXPECT_EQ( m.getElement( 5, 4 ), 90 ); } template< typename Matrix > -- GitLab From 5f98eedebfc46bbea6cdf4735f05f3d465479508 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 9 May 2020 14:12:30 +0200 Subject: [PATCH 02/11] Deleted DenseMatrix[View]::getRowVectorProduct and added rows range to vectorProduct method. --- src/TNL/Matrices/DenseMatrix.h | 42 +++++++++++----------------- src/TNL/Matrices/DenseMatrix.hpp | 23 +++++---------- src/TNL/Matrices/DenseMatrixView.h | 11 ++++---- src/TNL/Matrices/DenseMatrixView.hpp | 30 +++++++------------- 4 files changed, 39 insertions(+), 67 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index afaa7ca76..db262a145 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -331,9 +331,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Sets element at given \e row and \e column to given \e value. * - * This method can be called only from the host system (CPU) no matter - * where the matrix is allocated. If the matrix is allocated in GPU device - * this methods transfer values of each matrix element separately and so the + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * @@ -354,9 +355,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Add element at given \e row and \e column to given \e value. * - * This method can be called only from the host system (CPU) no matter - * where the matrix is allocated. If the matrix is allocated in GPU device - * this methods transfer values of each matrix element separately and so the + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * @@ -375,9 +377,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Returns value of matrix element at position given by its row and column index. * - * This method can be called only from the host system (CPU) no matter - * where the matrix is allocated. If the matrix is allocated in GPU device - * this methods transfer values of each matrix element separately and so the + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * @@ -518,21 +521,6 @@ class DenseMatrix : public Matrix< Real, Device, Index > template< typename Function > void forAllRows( Function& function ); - /** - * \brief This method computes scalar product of given vector and one - * row of the matrix. - * - * \tparam Vector is type of input vector. It can be \ref Vector, - * \ref VectorView, \ref Array, \ref ArraView or similar container. - * \param row is index of the row used for the scalar product. - * \param vector is the input vector. - * \return result of the matrix row and vector product. - */ - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - /** * \brief Computes product of matrix and vector. * @@ -546,7 +534,11 @@ class DenseMatrix : public Matrix< Real, Device, Index > */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + OutVector& outVector, + const RealType& matrixMultiplicator = 1.0, + const RealType& outVectorMultiplicator = 0.0, + const IndexType firstRow = 0, + const IndexType lastRow = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp index 0bb2b3cb9..c41a466de 100644 --- a/src/TNL/Matrices/DenseMatrix.hpp +++ b/src/TNL/Matrices/DenseMatrix.hpp @@ -416,20 +416,6 @@ forAllRows( Function& function ) this->forRows( 0, this->getRows(), function ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > - template< typename Vector > -__cuda_callable__ -typename Vector::RealType -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: -rowVectorProduct( const IndexType row, const Vector& vector ) const -{ - return this->view.rowVectorProduct( row, vector ); -} - template< typename Real, typename Device, typename Index, @@ -439,9 +425,14 @@ template< typename Real, typename OutVector > void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: -vectorProduct( const InVector& inVector, OutVector& outVector ) const +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator, + const RealType& outVectorMultiplicator, + const IndexType firstRow, + const IndexType lastRow ) const { - this->view.vectorProduct( inVector, outVector ); + this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, firstRow, lastRow ); } template< typename Real, diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 7ad00e97b..61cf8686a 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -141,14 +141,13 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > template< typename Function > void forAllRows( Function& function ); - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + OutVector& outVector, + const RealType& matrixMultiplicator = 1.0, + const RealType& outVectorMultiplicator = 0.0, + const IndexType firstRow = 0, + IndexType lastRow = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 83e06f9f6..57deb41a4 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -367,23 +367,6 @@ forAllRows( Function& function ) this->forRows( 0, this->getRows(), function ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > - template< typename Vector > -__cuda_callable__ -typename Vector::RealType -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: -rowVectorProduct( const IndexType row, const Vector& vector ) const -{ - RealType sum( 0.0 ); - // TODO: Fix this - //for( IndexType column = 0; column < this->getColumns(); column++ ) - // sum += this->getElementFast( row, column ) * vector[ column ]; - return sum; -} - template< typename Real, typename Device, typename Index, @@ -392,7 +375,12 @@ template< typename Real, typename OutVector > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: -vectorProduct( const InVector& inVector, OutVector& outVector ) const +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator, + const RealType& outVectorMultiplicator, + const IndexType firstRow, + IndexType lastRow ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." ); @@ -400,13 +388,15 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); + if( lastRow == 0 ) + lastRow = this->getRows(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { return valuesView[ offset ] * inVectorView[ column ]; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { - outVectorView[ row ] = value; + outVectorView[ row ] = matrixMultiplicator * value + outVectorMultiplicator * outVectorView[ row ]; }; - this->segments.segmentsReduction( 0, this->getRows(), fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); + this->segments.segmentsReduction( firstRow, lastRow, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); } template< typename Real, -- GitLab From c990232637bc4a2afe7291ebcfe45f4ea8ecd322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 9 May 2020 14:23:09 +0200 Subject: [PATCH 03/11] Fixed documentation of DenseMatrix::vectorProduct. --- src/TNL/Matrices/DenseMatrix.h | 15 +++++++++++++-- src/TNL/Matrices/DenseMatrix.hpp | 6 +++--- src/TNL/Matrices/DenseMatrixView.h | 4 ++-- src/TNL/Matrices/DenseMatrixView.hpp | 10 +++++----- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index db262a145..e6595f3a2 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -524,6 +524,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Computes product of matrix and vector. * + * More precisely, it computes: + * + * outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector. + * * \tparam InVector is type of input vector. It can be \ref Vector, * \ref VectorView, \ref Array, \ref ArraView or similar container. * \tparam OutVector is type of output vector. It can be \ref Vector, @@ -531,14 +535,21 @@ class DenseMatrix : public Matrix< Real, Device, Index > * * \param inVector is input vector. * \param outVector is output vector. + * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator = 1.0, const RealType& outVectorMultiplicator = 0.0, - const IndexType firstRow = 0, - const IndexType lastRow = 0 ) const; + const IndexType begin = 0, + const IndexType end = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp index c41a466de..da6957a9c 100644 --- a/src/TNL/Matrices/DenseMatrix.hpp +++ b/src/TNL/Matrices/DenseMatrix.hpp @@ -429,10 +429,10 @@ vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, const RealType& outVectorMultiplicator, - const IndexType firstRow, - const IndexType lastRow ) const + const IndexType begin, + const IndexType end ) const { - this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, firstRow, lastRow ); + this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, begin, end ); } template< typename Real, diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 61cf8686a..65bcaeb42 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -146,8 +146,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > OutVector& outVector, const RealType& matrixMultiplicator = 1.0, const RealType& outVectorMultiplicator = 0.0, - const IndexType firstRow = 0, - IndexType lastRow = 0 ) const; + const IndexType begin = 0, + IndexType end = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 57deb41a4..c406f5254 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -379,8 +379,8 @@ vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, const RealType& outVectorMultiplicator, - const IndexType firstRow, - IndexType lastRow ) const + const IndexType begin, + IndexType end ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." ); @@ -388,15 +388,15 @@ vectorProduct( const InVector& inVector, const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); - if( lastRow == 0 ) - lastRow = this->getRows(); + if( end == 0 ) + end = this->getRows(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { return valuesView[ offset ] * inVectorView[ column ]; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { outVectorView[ row ] = matrixMultiplicator * value + outVectorMultiplicator * outVectorView[ row ]; }; - this->segments.segmentsReduction( firstRow, lastRow, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); + this->segments.segmentsReduction( begin, end, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); } template< typename Real, -- GitLab From 1936f67ef633714ca7b27a36d0543d9434bfee6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 9 May 2020 20:10:03 +0200 Subject: [PATCH 04/11] Update of DenseMatrix::setElement example. --- .../DenseMatrixExample_setElement.cpp | 20 ++++++++++++++++--- src/TNL/Matrices/DenseMatrix.h | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp index 0b5498adf..4f92496f8 100644 --- a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp +++ b/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp @@ -1,15 +1,29 @@ #include +#include #include #include +#include +#include template< typename Device > void setElements() { - TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); + TNL::Pointers::SharedPointer< TNL::Matrices::DenseMatrix< double, Device > > matrix( 5, 5 ); for( int i = 0; i < 5; i++ ) - matrix.setElement( i, i, i ); + matrix->setElement( i, i, i ); + + std::cout << "Matrix set from the host:" << std::endl; + std::cout << *matrix << std::endl; + + auto f = [=] __cuda_callable__ ( int i ) mutable { + matrix->setElement( i, i, -i ); + }; + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f ); + + std::cout << "Matrix set from its native device:" << std::endl; + std::cout << *matrix << std::endl; - std::cout << matrix << std::endl; } int main( int argc, char* argv[] ) diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index e6595f3a2..81a6e26a8 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -366,7 +366,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \param column is columns index of the element. * \param value is the value the element will be set to. * \param thisElementMultiplicator is multiplicator the original matrix element - * value is multiplied by before addition of given e value. + * value is multiplied by before addition of given \e value. */ __cuda_callable__ void addElement( const IndexType row, -- GitLab From ddd42c40cd4cb1d6d33e65b5b53a6f9ce73c65fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 9 May 2020 13:55:43 +0200 Subject: [PATCH 05/11] Methods DenseMatrix[View]::get/set/addElement are __cuda_callble__. --- src/TNL/Matrices/DenseMatrix.h | 3 + src/TNL/Matrices/DenseMatrix.hpp | 7 +- src/TNL/Matrices/DenseMatrixView.h | 5 +- src/TNL/Matrices/DenseMatrixView.hpp | 17 +- src/UnitTests/Matrices/DenseMatrixTest.h | 338 ++++++++++++----------- 5 files changed, 190 insertions(+), 180 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index aea7a33d6..afaa7ca76 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -346,6 +346,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \par Output * \include DenseMatrixExample_setElement.out */ + __cuda_callable__ void setElement( const IndexType row, const IndexType column, const RealType& value ); @@ -365,6 +366,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \param thisElementMultiplicator is multiplicator the original matrix element * value is multiplied by before addition of given e value. */ + __cuda_callable__ void addElement( const IndexType row, const IndexType column, const RealType& value, @@ -384,6 +386,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * * \return value of given matrix element. */ + __cuda_callable__ Real getElement( const IndexType row, const IndexType column ) const; diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp index bd2ea6212..0bb2b3cb9 100644 --- a/src/TNL/Matrices/DenseMatrix.hpp +++ b/src/TNL/Matrices/DenseMatrix.hpp @@ -233,6 +233,7 @@ DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); + this->segments.reset(); } template< typename Real, @@ -300,7 +301,7 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void +__cuda_callable__ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: setElement( const IndexType row, const IndexType column, @@ -314,7 +315,7 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -void +__cuda_callable__ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: addElement( const IndexType row, const IndexType column, @@ -329,7 +330,7 @@ template< typename Real, typename Index, bool RowMajorOrder, typename RealAllocator > -Real +__cuda_callable__ Real DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 8ae12f64e..7ad00e97b 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -91,8 +91,6 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > IndexType getNonzeroElementsCount() const; - void reset(); - __cuda_callable__ const RowView getRow( const IndexType& rowIdx ) const; @@ -110,15 +108,18 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > const Real& operator()( const IndexType row, const IndexType column ) const; + __cuda_callable__ void setElement( const IndexType row, const IndexType column, const RealType& value ); + __cuda_callable__ void addElement( const IndexType row, const IndexType column, const RealType& value, const RealType& thisElementMultiplicator = 1.0 ); + __cuda_callable__ Real getElement( const IndexType row, const IndexType column ) const; diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 917fb596b..83e06f9f6 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -168,17 +168,6 @@ getNonzeroElementsCount() const return Algorithms::Reduction< DeviceType >::reduce( this->values.getSize(), std::plus<>{}, fetch, 0 ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > -void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: -reset() -{ - Matrix< Real, Device, Index >::reset(); -} - template< typename Real, typename Device, typename Index, @@ -250,7 +239,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -void +__cuda_callable__ void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: setElement( const IndexType row, const IndexType column, @@ -263,7 +252,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -void +__cuda_callable__ void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: addElement( const IndexType row, const IndexType column, @@ -283,7 +272,7 @@ template< typename Real, typename Device, typename Index, bool RowMajorOrder > -Real +__cuda_callable__ Real DenseMatrixView< Real, Device, Index, RowMajorOrder >:: getElement( const IndexType row, const IndexType column ) const diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 391043f0f..5441f8834 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -8,15 +8,17 @@ /* See Copyright Notice in tnl/Copyright */ +#include +#include #include #include #include #include - #include #include +#include +#include #include -#include using Dense_host_float = TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int >; using Dense_host_int = TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int >; @@ -377,178 +379,192 @@ void test_SetValue() template< typename Matrix > void test_SetElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 5x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * \ 21 22 23 24 25 / - */ - const IndexType rows = 5; - const IndexType cols = 5; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 5x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * \ 21 22 23 24 25 / + */ + const IndexType rows = 5; + const IndexType cols = 5; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); + + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + TNL::Containers::Vector< RealType, DeviceType, IndexType > v( m.getRows(), 0 ); + auto v_view = v.getView(); + auto m_view = m.getView(); + auto f1 = [=] __cuda_callable__ ( IndexType i ) mutable { + v_view[ i ] = m_view.getElement( i, i ); + }; + TNL::Algorithms::ParallelFor< DeviceType >::exec( ( IndexType ) 0, m.getRows(), f1 ); + + for( IndexType i = 0; i < m.getRows(); i++ ) + EXPECT_EQ( v.getElement( i ), m.getElement( i, i ) ); + auto fetch = [=] __cuda_callable__ ( IndexType i ) -> bool { + return ( v_view[ i ] == m_view.getElement( i, i ) ); + }; + EXPECT_TRUE( TNL::Algorithms::Reduction< DeviceType >::reduce( m.getRows(), std::logical_and<>{}, fetch, true ) ); - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); } template< typename Matrix > void test_AddElement() { - using RealType = typename Matrix::RealType; - using DeviceType = typename Matrix::DeviceType; - using IndexType = typename Matrix::IndexType; -/* - * Sets up the following 6x5 dense matrix: - * - * / 1 2 3 4 5 \ - * | 6 7 8 9 10 | - * | 11 12 13 14 15 | - * | 16 17 18 19 20 | - * | 21 22 23 24 25 | - * \ 26 27 28 29 30 / - */ - const IndexType rows = 6; - const IndexType cols = 5; + using RealType = typename Matrix::RealType; + using DeviceType = typename Matrix::DeviceType; + using IndexType = typename Matrix::IndexType; - Matrix m; - m.reset(); - m.setDimensions( rows, cols ); + /* + * Sets up the following 6x5 dense matrix: + * + * / 1 2 3 4 5 \ + * | 6 7 8 9 10 | + * | 11 12 13 14 15 | + * | 16 17 18 19 20 | + * | 21 22 23 24 25 | + * \ 26 27 28 29 30 / + */ + const IndexType rows = 6; + const IndexType cols = 5; - RealType value = 1; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.setElement( i, j, value++ ); + Matrix m( rows, cols ); - // Check the added elements - EXPECT_EQ( m.getElement( 0, 0 ), 1 ); - EXPECT_EQ( m.getElement( 0, 1 ), 2 ); - EXPECT_EQ( m.getElement( 0, 2 ), 3 ); - EXPECT_EQ( m.getElement( 0, 3 ), 4 ); - EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + RealType value = 1; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.setElement( i, j, value++ ); - EXPECT_EQ( m.getElement( 1, 0 ), 6 ); - EXPECT_EQ( m.getElement( 1, 1 ), 7 ); - EXPECT_EQ( m.getElement( 1, 2 ), 8 ); - EXPECT_EQ( m.getElement( 1, 3 ), 9 ); - EXPECT_EQ( m.getElement( 1, 4 ), 10 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 11 ); - EXPECT_EQ( m.getElement( 2, 1 ), 12 ); - EXPECT_EQ( m.getElement( 2, 2 ), 13 ); - EXPECT_EQ( m.getElement( 2, 3 ), 14 ); - EXPECT_EQ( m.getElement( 2, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 16 ); - EXPECT_EQ( m.getElement( 3, 1 ), 17 ); - EXPECT_EQ( m.getElement( 3, 2 ), 18 ); - EXPECT_EQ( m.getElement( 3, 3 ), 19 ); - EXPECT_EQ( m.getElement( 3, 4 ), 20 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 21 ); - EXPECT_EQ( m.getElement( 4, 1 ), 22 ); - EXPECT_EQ( m.getElement( 4, 2 ), 23 ); - EXPECT_EQ( m.getElement( 4, 3 ), 24 ); - EXPECT_EQ( m.getElement( 4, 4 ), 25 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 26 ); - EXPECT_EQ( m.getElement( 5, 1 ), 27 ); - EXPECT_EQ( m.getElement( 5, 2 ), 28 ); - EXPECT_EQ( m.getElement( 5, 3 ), 29 ); - EXPECT_EQ( m.getElement( 5, 4 ), 30 ); - - // Add new elements to the old elements with a multiplying factor applied to the old elements. -/* - * The following setup results in the following 6x5 dense matrix: - * - * / 3 6 9 12 15 \ - * | 18 21 24 27 30 | - * | 33 36 39 42 45 | - * | 48 51 54 57 60 | - * | 63 66 69 72 75 | - * \ 78 81 84 87 90 / - */ - RealType newValue = 1; - RealType multiplicator = 2; - for( IndexType i = 0; i < rows; i++ ) - for( IndexType j = 0; j < cols; j++ ) - m.addElement( i, j, newValue++, multiplicator ); - - EXPECT_EQ( m.getElement( 0, 0 ), 3 ); - EXPECT_EQ( m.getElement( 0, 1 ), 6 ); - EXPECT_EQ( m.getElement( 0, 2 ), 9 ); - EXPECT_EQ( m.getElement( 0, 3 ), 12 ); - EXPECT_EQ( m.getElement( 0, 4 ), 15 ); - - EXPECT_EQ( m.getElement( 1, 0 ), 18 ); - EXPECT_EQ( m.getElement( 1, 1 ), 21 ); - EXPECT_EQ( m.getElement( 1, 2 ), 24 ); - EXPECT_EQ( m.getElement( 1, 3 ), 27 ); - EXPECT_EQ( m.getElement( 1, 4 ), 30 ); - - EXPECT_EQ( m.getElement( 2, 0 ), 33 ); - EXPECT_EQ( m.getElement( 2, 1 ), 36 ); - EXPECT_EQ( m.getElement( 2, 2 ), 39 ); - EXPECT_EQ( m.getElement( 2, 3 ), 42 ); - EXPECT_EQ( m.getElement( 2, 4 ), 45 ); - - EXPECT_EQ( m.getElement( 3, 0 ), 48 ); - EXPECT_EQ( m.getElement( 3, 1 ), 51 ); - EXPECT_EQ( m.getElement( 3, 2 ), 54 ); - EXPECT_EQ( m.getElement( 3, 3 ), 57 ); - EXPECT_EQ( m.getElement( 3, 4 ), 60 ); - - EXPECT_EQ( m.getElement( 4, 0 ), 63 ); - EXPECT_EQ( m.getElement( 4, 1 ), 66 ); - EXPECT_EQ( m.getElement( 4, 2 ), 69 ); - EXPECT_EQ( m.getElement( 4, 3 ), 72 ); - EXPECT_EQ( m.getElement( 4, 4 ), 75 ); - - EXPECT_EQ( m.getElement( 5, 0 ), 78 ); - EXPECT_EQ( m.getElement( 5, 1 ), 81 ); - EXPECT_EQ( m.getElement( 5, 2 ), 84 ); - EXPECT_EQ( m.getElement( 5, 3 ), 87 ); - EXPECT_EQ( m.getElement( 5, 4 ), 90 ); + // Check the added elements + EXPECT_EQ( m.getElement( 0, 0 ), 1 ); + EXPECT_EQ( m.getElement( 0, 1 ), 2 ); + EXPECT_EQ( m.getElement( 0, 2 ), 3 ); + EXPECT_EQ( m.getElement( 0, 3 ), 4 ); + EXPECT_EQ( m.getElement( 0, 4 ), 5 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 6 ); + EXPECT_EQ( m.getElement( 1, 1 ), 7 ); + EXPECT_EQ( m.getElement( 1, 2 ), 8 ); + EXPECT_EQ( m.getElement( 1, 3 ), 9 ); + EXPECT_EQ( m.getElement( 1, 4 ), 10 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 11 ); + EXPECT_EQ( m.getElement( 2, 1 ), 12 ); + EXPECT_EQ( m.getElement( 2, 2 ), 13 ); + EXPECT_EQ( m.getElement( 2, 3 ), 14 ); + EXPECT_EQ( m.getElement( 2, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 16 ); + EXPECT_EQ( m.getElement( 3, 1 ), 17 ); + EXPECT_EQ( m.getElement( 3, 2 ), 18 ); + EXPECT_EQ( m.getElement( 3, 3 ), 19 ); + EXPECT_EQ( m.getElement( 3, 4 ), 20 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 21 ); + EXPECT_EQ( m.getElement( 4, 1 ), 22 ); + EXPECT_EQ( m.getElement( 4, 2 ), 23 ); + EXPECT_EQ( m.getElement( 4, 3 ), 24 ); + EXPECT_EQ( m.getElement( 4, 4 ), 25 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 26 ); + EXPECT_EQ( m.getElement( 5, 1 ), 27 ); + EXPECT_EQ( m.getElement( 5, 2 ), 28 ); + EXPECT_EQ( m.getElement( 5, 3 ), 29 ); + EXPECT_EQ( m.getElement( 5, 4 ), 30 ); + + // Add new elements to the old elements with a multiplying factor applied to the old elements. + /* + * The following setup results in the following 6x5 dense matrix: + * + * / 3 6 9 12 15 \ + * | 18 21 24 27 30 | + * | 33 36 39 42 45 | + * | 48 51 54 57 60 | + * | 63 66 69 72 75 | + * \ 78 81 84 87 90 / + */ + RealType newValue = 1; + RealType multiplicator = 2; + for( IndexType i = 0; i < rows; i++ ) + for( IndexType j = 0; j < cols; j++ ) + m.addElement( i, j, newValue++, multiplicator ); + + EXPECT_EQ( m.getElement( 0, 0 ), 3 ); + EXPECT_EQ( m.getElement( 0, 1 ), 6 ); + EXPECT_EQ( m.getElement( 0, 2 ), 9 ); + EXPECT_EQ( m.getElement( 0, 3 ), 12 ); + EXPECT_EQ( m.getElement( 0, 4 ), 15 ); + + EXPECT_EQ( m.getElement( 1, 0 ), 18 ); + EXPECT_EQ( m.getElement( 1, 1 ), 21 ); + EXPECT_EQ( m.getElement( 1, 2 ), 24 ); + EXPECT_EQ( m.getElement( 1, 3 ), 27 ); + EXPECT_EQ( m.getElement( 1, 4 ), 30 ); + + EXPECT_EQ( m.getElement( 2, 0 ), 33 ); + EXPECT_EQ( m.getElement( 2, 1 ), 36 ); + EXPECT_EQ( m.getElement( 2, 2 ), 39 ); + EXPECT_EQ( m.getElement( 2, 3 ), 42 ); + EXPECT_EQ( m.getElement( 2, 4 ), 45 ); + + EXPECT_EQ( m.getElement( 3, 0 ), 48 ); + EXPECT_EQ( m.getElement( 3, 1 ), 51 ); + EXPECT_EQ( m.getElement( 3, 2 ), 54 ); + EXPECT_EQ( m.getElement( 3, 3 ), 57 ); + EXPECT_EQ( m.getElement( 3, 4 ), 60 ); + + EXPECT_EQ( m.getElement( 4, 0 ), 63 ); + EXPECT_EQ( m.getElement( 4, 1 ), 66 ); + EXPECT_EQ( m.getElement( 4, 2 ), 69 ); + EXPECT_EQ( m.getElement( 4, 3 ), 72 ); + EXPECT_EQ( m.getElement( 4, 4 ), 75 ); + + EXPECT_EQ( m.getElement( 5, 0 ), 78 ); + EXPECT_EQ( m.getElement( 5, 1 ), 81 ); + EXPECT_EQ( m.getElement( 5, 2 ), 84 ); + EXPECT_EQ( m.getElement( 5, 3 ), 87 ); + EXPECT_EQ( m.getElement( 5, 4 ), 90 ); } template< typename Matrix > -- GitLab From c722348782a0faeaff65d44a8a834564121fc8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 9 May 2020 14:12:30 +0200 Subject: [PATCH 06/11] Deleted DenseMatrix[View]::getRowVectorProduct and added rows range to vectorProduct method. --- src/TNL/Matrices/DenseMatrix.h | 42 +++++++++++----------------- src/TNL/Matrices/DenseMatrix.hpp | 23 +++++---------- src/TNL/Matrices/DenseMatrixView.h | 11 ++++---- src/TNL/Matrices/DenseMatrixView.hpp | 30 +++++++------------- 4 files changed, 39 insertions(+), 67 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index afaa7ca76..db262a145 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -331,9 +331,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Sets element at given \e row and \e column to given \e value. * - * This method can be called only from the host system (CPU) no matter - * where the matrix is allocated. If the matrix is allocated in GPU device - * this methods transfer values of each matrix element separately and so the + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * @@ -354,9 +355,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Add element at given \e row and \e column to given \e value. * - * This method can be called only from the host system (CPU) no matter - * where the matrix is allocated. If the matrix is allocated in GPU device - * this methods transfer values of each matrix element separately and so the + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * @@ -375,9 +377,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Returns value of matrix element at position given by its row and column index. * - * This method can be called only from the host system (CPU) no matter - * where the matrix is allocated. If the matrix is allocated in GPU device - * this methods transfer values of each matrix element separately and so the + * This method can be called from the host system (CPU) no matter + * where the matrix is allocated. If the matrix is allocated on GPU this method + * can be called even from device kernels. If the matrix is allocated in GPU device + * this method is called from CPU, it transfers values of each matrix element separately and so the * performance is very low. For higher performance see. \ref DenseMatrix::getRow * or \ref DenseMatrix::forRows and \ref DenseMatrix::forAllRows. * @@ -518,21 +521,6 @@ class DenseMatrix : public Matrix< Real, Device, Index > template< typename Function > void forAllRows( Function& function ); - /** - * \brief This method computes scalar product of given vector and one - * row of the matrix. - * - * \tparam Vector is type of input vector. It can be \ref Vector, - * \ref VectorView, \ref Array, \ref ArraView or similar container. - * \param row is index of the row used for the scalar product. - * \param vector is the input vector. - * \return result of the matrix row and vector product. - */ - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - /** * \brief Computes product of matrix and vector. * @@ -546,7 +534,11 @@ class DenseMatrix : public Matrix< Real, Device, Index > */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + OutVector& outVector, + const RealType& matrixMultiplicator = 1.0, + const RealType& outVectorMultiplicator = 0.0, + const IndexType firstRow = 0, + const IndexType lastRow = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp index 0bb2b3cb9..c41a466de 100644 --- a/src/TNL/Matrices/DenseMatrix.hpp +++ b/src/TNL/Matrices/DenseMatrix.hpp @@ -416,20 +416,6 @@ forAllRows( Function& function ) this->forRows( 0, this->getRows(), function ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder, - typename RealAllocator > - template< typename Vector > -__cuda_callable__ -typename Vector::RealType -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: -rowVectorProduct( const IndexType row, const Vector& vector ) const -{ - return this->view.rowVectorProduct( row, vector ); -} - template< typename Real, typename Device, typename Index, @@ -439,9 +425,14 @@ template< typename Real, typename OutVector > void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: -vectorProduct( const InVector& inVector, OutVector& outVector ) const +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator, + const RealType& outVectorMultiplicator, + const IndexType firstRow, + const IndexType lastRow ) const { - this->view.vectorProduct( inVector, outVector ); + this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, firstRow, lastRow ); } template< typename Real, diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 7ad00e97b..61cf8686a 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -141,14 +141,13 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > template< typename Function > void forAllRows( Function& function ); - template< typename Vector > - __cuda_callable__ - typename Vector::RealType rowVectorProduct( const IndexType row, - const Vector& vector ) const; - template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, - OutVector& outVector ) const; + OutVector& outVector, + const RealType& matrixMultiplicator = 1.0, + const RealType& outVectorMultiplicator = 0.0, + const IndexType firstRow = 0, + IndexType lastRow = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 83e06f9f6..57deb41a4 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -367,23 +367,6 @@ forAllRows( Function& function ) this->forRows( 0, this->getRows(), function ); } -template< typename Real, - typename Device, - typename Index, - bool RowMajorOrder > - template< typename Vector > -__cuda_callable__ -typename Vector::RealType -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: -rowVectorProduct( const IndexType row, const Vector& vector ) const -{ - RealType sum( 0.0 ); - // TODO: Fix this - //for( IndexType column = 0; column < this->getColumns(); column++ ) - // sum += this->getElementFast( row, column ) * vector[ column ]; - return sum; -} - template< typename Real, typename Device, typename Index, @@ -392,7 +375,12 @@ template< typename Real, typename OutVector > void DenseMatrixView< Real, Device, Index, RowMajorOrder >:: -vectorProduct( const InVector& inVector, OutVector& outVector ) const +vectorProduct( const InVector& inVector, + OutVector& outVector, + const RealType& matrixMultiplicator, + const RealType& outVectorMultiplicator, + const IndexType firstRow, + IndexType lastRow ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." ); @@ -400,13 +388,15 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); + if( lastRow == 0 ) + lastRow = this->getRows(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { return valuesView[ offset ] * inVectorView[ column ]; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { - outVectorView[ row ] = value; + outVectorView[ row ] = matrixMultiplicator * value + outVectorMultiplicator * outVectorView[ row ]; }; - this->segments.segmentsReduction( 0, this->getRows(), fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); + this->segments.segmentsReduction( firstRow, lastRow, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); } template< typename Real, -- GitLab From 861e594d39fb8208a748fd7512960395b3c8da0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 9 May 2020 14:23:09 +0200 Subject: [PATCH 07/11] Fixed documentation of DenseMatrix::vectorProduct. --- src/TNL/Matrices/DenseMatrix.h | 15 +++++++++++++-- src/TNL/Matrices/DenseMatrix.hpp | 6 +++--- src/TNL/Matrices/DenseMatrixView.h | 4 ++-- src/TNL/Matrices/DenseMatrixView.hpp | 10 +++++----- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index db262a145..e6595f3a2 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -524,6 +524,10 @@ class DenseMatrix : public Matrix< Real, Device, Index > /** * \brief Computes product of matrix and vector. * + * More precisely, it computes: + * + * outVector = matrixMultiplicator * ( *this ) * inVector + outVectorMultiplicator * outVector. + * * \tparam InVector is type of input vector. It can be \ref Vector, * \ref VectorView, \ref Array, \ref ArraView or similar container. * \tparam OutVector is type of output vector. It can be \ref Vector, @@ -531,14 +535,21 @@ class DenseMatrix : public Matrix< Real, Device, Index > * * \param inVector is input vector. * \param outVector is output vector. + * \param matrixMultiplicator is a factor by which the matrix is multiplied. It is one by default. + * \param outVectorMultiplicator is a factor by which the outVector is multiplied before added + * to the result of matrix-vector product. It is zero by default. + * \param begin is the beginning of the rows range for which the vector product + * is computed. It is zero by default. + * \param end is the end of the rows range for which the vector product + * is computed. It is number if the matrix rows by default. */ template< typename InVector, typename OutVector > void vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator = 1.0, const RealType& outVectorMultiplicator = 0.0, - const IndexType firstRow = 0, - const IndexType lastRow = 0 ) const; + const IndexType begin = 0, + const IndexType end = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp index c41a466de..da6957a9c 100644 --- a/src/TNL/Matrices/DenseMatrix.hpp +++ b/src/TNL/Matrices/DenseMatrix.hpp @@ -429,10 +429,10 @@ vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, const RealType& outVectorMultiplicator, - const IndexType firstRow, - const IndexType lastRow ) const + const IndexType begin, + const IndexType end ) const { - this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, firstRow, lastRow ); + this->view.vectorProduct( inVector, outVector, matrixMultiplicator, outVectorMultiplicator, begin, end ); } template< typename Real, diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 61cf8686a..65bcaeb42 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -146,8 +146,8 @@ class DenseMatrixView : public MatrixView< Real, Device, Index > OutVector& outVector, const RealType& matrixMultiplicator = 1.0, const RealType& outVectorMultiplicator = 0.0, - const IndexType firstRow = 0, - IndexType lastRow = 0 ) const; + const IndexType begin = 0, + IndexType end = 0 ) const; template< typename Matrix > void addMatrix( const Matrix& matrix, diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index 57deb41a4..c406f5254 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -379,8 +379,8 @@ vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, const RealType& outVectorMultiplicator, - const IndexType firstRow, - IndexType lastRow ) const + const IndexType begin, + IndexType end ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns count differs with input vector size." ); TNL_ASSERT_EQ( this->getRows(), outVector.getSize(), "Matrix rows count differs with output vector size." ); @@ -388,15 +388,15 @@ vectorProduct( const InVector& inVector, const auto inVectorView = inVector.getConstView(); auto outVectorView = outVector.getView(); const auto valuesView = this->values.getConstView(); - if( lastRow == 0 ) - lastRow = this->getRows(); + if( end == 0 ) + end = this->getRows(); auto fetch = [=] __cuda_callable__ ( IndexType row, IndexType column, IndexType offset, bool& compute ) -> RealType { return valuesView[ offset ] * inVectorView[ column ]; }; auto keeper = [=] __cuda_callable__ ( IndexType row, const RealType& value ) mutable { outVectorView[ row ] = matrixMultiplicator * value + outVectorMultiplicator * outVectorView[ row ]; }; - this->segments.segmentsReduction( firstRow, lastRow, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); + this->segments.segmentsReduction( begin, end, fetch, std::plus<>{}, keeper, ( RealType ) 0.0 ); } template< typename Real, -- GitLab From a715fb4838cc3e2f6441d83a60892b920ce70b7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sat, 9 May 2020 20:10:03 +0200 Subject: [PATCH 08/11] Update of DenseMatrix::setElement example. --- .../DenseMatrixExample_setElement.cpp | 20 ++++++++++++++++--- src/TNL/Matrices/DenseMatrix.h | 2 +- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp b/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp index 0b5498adf..4f92496f8 100644 --- a/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp +++ b/Documentation/Examples/Matrices/DenseMatrixExample_setElement.cpp @@ -1,15 +1,29 @@ #include +#include #include #include +#include +#include template< typename Device > void setElements() { - TNL::Matrices::DenseMatrix< double, Device > matrix( 5, 5 ); + TNL::Pointers::SharedPointer< TNL::Matrices::DenseMatrix< double, Device > > matrix( 5, 5 ); for( int i = 0; i < 5; i++ ) - matrix.setElement( i, i, i ); + matrix->setElement( i, i, i ); + + std::cout << "Matrix set from the host:" << std::endl; + std::cout << *matrix << std::endl; + + auto f = [=] __cuda_callable__ ( int i ) mutable { + matrix->setElement( i, i, -i ); + }; + TNL::Pointers::synchronizeSmartPointersOnDevice< Device >(); + TNL::Algorithms::ParallelFor< Device >::exec( 0, 5, f ); + + std::cout << "Matrix set from its native device:" << std::endl; + std::cout << *matrix << std::endl; - std::cout << matrix << std::endl; } int main( int argc, char* argv[] ) diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index e6595f3a2..81a6e26a8 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -366,7 +366,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \param column is columns index of the element. * \param value is the value the element will be set to. * \param thisElementMultiplicator is multiplicator the original matrix element - * value is multiplied by before addition of given e value. + * value is multiplied by before addition of given \e value. */ __cuda_callable__ void addElement( const IndexType row, -- GitLab From 6ad896e3f2c5e6960a9bc00476e8ad0ad6bd45c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 10 May 2020 14:24:51 +0200 Subject: [PATCH 09/11] Replacing bool RowMajorOrder with ElementsOrganization enum. --- .../Examples/Matrices/CMakeLists.txt | 15 +- src/TNL/Containers/Segments/BiEllpack.h | 18 +- src/TNL/Containers/Segments/BiEllpack.hpp | 134 +++++------ .../Segments/BiEllpackSegmentView.h | 5 +- src/TNL/Containers/Segments/BiEllpackView.h | 5 +- src/TNL/Containers/Segments/BiEllpackView.hpp | 134 +++++------ src/TNL/Containers/Segments/CSR.h | 2 +- src/TNL/Containers/Segments/CSRView.h | 2 +- src/TNL/Containers/Segments/ChunkedEllpack.h | 18 +- .../Containers/Segments/ChunkedEllpack.hpp | 114 ++++----- .../Segments/ChunkedEllpackSegmentView.h | 6 +- .../Containers/Segments/ChunkedEllpackView.h | 5 +- .../Segments/ChunkedEllpackView.hpp | 122 +++++----- .../Segments/ElementsOrganization.h | 32 +++ src/TNL/Containers/Segments/Ellpack.h | 14 +- src/TNL/Containers/Segments/Ellpack.hpp | 118 ++++----- src/TNL/Containers/Segments/EllpackView.h | 9 +- src/TNL/Containers/Segments/EllpackView.hpp | 102 ++++---- src/TNL/Containers/Segments/SegmentView.h | 8 +- src/TNL/Containers/Segments/SlicedEllpack.h | 16 +- src/TNL/Containers/Segments/SlicedEllpack.hpp | 110 ++++----- .../Containers/Segments/SlicedEllpackView.h | 9 +- .../Containers/Segments/SlicedEllpackView.hpp | 102 ++++---- .../Containers/Segments/details/BiEllpack.h | 10 +- .../Segments/details/ChunkedEllpack.h | 14 +- src/TNL/Matrices/DenseMatrix.h | 36 +-- src/TNL/Matrices/DenseMatrix.hpp | 210 ++++++++-------- src/TNL/Matrices/DenseMatrixView.h | 118 +++++++-- src/TNL/Matrices/DenseMatrixView.hpp | 147 ++++++------ src/TNL/Matrices/DistributedSpMV.h | 2 +- src/TNL/Matrices/Matrix.h | 3 + src/TNL/Matrices/MatrixInfo.h | 10 +- src/TNL/Matrices/Multidiagonal.h | 30 +-- src/TNL/Matrices/Multidiagonal.hpp | 224 +++++++++--------- src/TNL/Matrices/MultidiagonalMatrixView.h | 24 +- src/TNL/Matrices/MultidiagonalMatrixView.hpp | 176 +++++++------- src/TNL/Matrices/SparseMatrix.h | 4 +- src/TNL/Matrices/SparseMatrix.hpp | 6 +- src/TNL/Matrices/Tridiagonal.h | 30 +-- src/TNL/Matrices/Tridiagonal.hpp | 212 ++++++++--------- src/TNL/Matrices/TridiagonalMatrixView.h | 24 +- src/TNL/Matrices/TridiagonalMatrixView.hpp | 162 ++++++------- .../Matrices/BinarySparseMatrixTest_Ellpack.h | 4 +- .../BinarySparseMatrixTest_SlicedEllpack.h | 4 +- src/UnitTests/Matrices/DenseMatrixCopyTest.h | 8 +- src/UnitTests/Matrices/DenseMatrixTest.h | 17 +- .../Matrices/MultidiagonalMatrixTest.h | 24 +- .../Matrices/SparseMatrixTest_BiEllpack.h | 4 +- .../SparseMatrixTest_ChunkedEllpack.h | 4 +- .../Matrices/SparseMatrixTest_Ellpack.h | 4 +- .../Matrices/SparseMatrixTest_SlicedEllpack.h | 4 +- .../Matrices/TridiagonalMatrixTest.h | 29 ++- 52 files changed, 1390 insertions(+), 1254 deletions(-) create mode 100644 src/TNL/Containers/Segments/ElementsOrganization.h diff --git a/Documentation/Examples/Matrices/CMakeLists.txt b/Documentation/Examples/Matrices/CMakeLists.txt index e0d7a6f42..710524aaa 100644 --- a/Documentation/Examples/Matrices/CMakeLists.txt +++ b/Documentation/Examples/Matrices/CMakeLists.txt @@ -64,6 +64,11 @@ IF( BUILD_CUDA ) ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out OUTPUT DenseMatrixExample_forAllRows.out ) + CUDA_ADD_EXECUTABLE( DenseMatrixViewExample_constructor_cuda DenseMatrixViewExample_constructor.cu ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor_cuda > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out + OUTPUT DenseMatrixViewExample_constructor.out ) + ELSE() ADD_EXECUTABLE( DenseMatrixExample_Constructor_init_list DenseMatrixExample_Constructor_init_list.cpp ) ADD_CUSTOM_COMMAND( COMMAND DenseMatrixExample_Constructor_init_list > @@ -130,11 +135,12 @@ ELSE() ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixExample_forAllRows.out OUTPUT DenseMatrixExample_forAllRows.out ) -ENDIF() - - - + ADD_EXECUTABLE( DenseMatrixViewExample_constructor DenseMatrixViewExample_constructor.cpp ) + ADD_CUSTOM_COMMAND( COMMAND DenseMatrixViewExample_constructor > + ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/DenseMatrixViewExample_constructor.out + OUTPUT DenseMatrixViewExample_constructor.out ) +ENDIF() ADD_CUSTOM_TARGET( RunMatricesExamples ALL DEPENDS DenseMatrixExample_Constructor_init_list.out @@ -150,5 +156,6 @@ ADD_CUSTOM_TARGET( RunMatricesExamples ALL DEPENDS DenseMatrixExample_allRowsReduction.out DenseMatrixExample_forRows.out DenseMatrixExample_forAllRows.out + DenseMatrixViewExample_constructor.out ) diff --git a/src/TNL/Containers/Segments/BiEllpack.h b/src/TNL/Containers/Segments/BiEllpack.h index 5f16011c5..c367bfa5b 100644 --- a/src/TNL/Containers/Segments/BiEllpack.h +++ b/src/TNL/Containers/Segments/BiEllpack.h @@ -22,7 +22,7 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), int WarpSize = 32 > class BiEllpack { @@ -31,12 +31,12 @@ class BiEllpack using DeviceType = Device; using IndexType = std::remove_const_t< Index >; using OffsetsHolder = Containers::Vector< Index, DeviceType, IndexType, IndexAllocator >; - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } - using ViewType = BiEllpackView< Device, Index, RowMajorOrder >; + static constexpr bool getOrganization() { return Organization; } + using ViewType = BiEllpackView< Device, Index, Organization >; template< typename Device_, typename Index_ > - using ViewTemplate = BiEllpackView< Device_, Index_, RowMajorOrder >; - using ConstViewType = BiEllpackView< Device, std::add_const_t< IndexType >, RowMajorOrder >; - using SegmentViewType = BiEllpackSegmentView< IndexType, RowMajorOrder >; + using ViewTemplate = BiEllpackView< Device_, Index_, Organization >; + using ConstViewType = BiEllpackView< Device, std::add_const_t< IndexType >, Organization >; + using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >; BiEllpack() = default; @@ -109,8 +109,8 @@ class BiEllpack BiEllpack& operator=( const BiEllpack& source ) = default; - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > - BiEllpack& operator=( const BiEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, WarpSize >& source ); + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ > + BiEllpack& operator=( const BiEllpack< Device_, Index_, IndexAllocator_, Organization_, WarpSize >& source ); void save( File& file ) const; @@ -163,7 +163,7 @@ class BiEllpack return 0; }; - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int WarpSize_ > + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_, int WarpSize_ > friend class BiEllpack; }; diff --git a/src/TNL/Containers/Segments/BiEllpack.hpp b/src/TNL/Containers/Segments/BiEllpack.hpp index 032543d1a..36995d1ef 100644 --- a/src/TNL/Containers/Segments/BiEllpack.hpp +++ b/src/TNL/Containers/Segments/BiEllpack.hpp @@ -23,9 +23,9 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: BiEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) { this->setSegmentsSizes( sizes ); @@ -34,9 +34,9 @@ BiEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: BiEllpack( const BiEllpack& biEllpack ) : size( biEllpack.size ), storageSize( biEllpack.storageSize ), @@ -49,9 +49,9 @@ BiEllpack( const BiEllpack& biEllpack ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: BiEllpack( const BiEllpack&& biEllpack ) : size( biEllpack.size ), storageSize( biEllpack.storageSize ), @@ -64,10 +64,10 @@ BiEllpack( const BiEllpack&& biEllpack ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > String -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getSerializationType() { return "BiEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; @@ -76,10 +76,10 @@ getSerializationType() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > String -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getSegmentsType() { return ViewType::getSegmentsType(); @@ -88,10 +88,10 @@ getSegmentsType() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -typename BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >::ViewType -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +typename BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >::ViewType +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getView() { return ViewType( size, storageSize, virtualRows, rowPermArray.getView(), groupPointers.getView() ); @@ -100,9 +100,9 @@ getView() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -auto BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getConstView() const -> const ConstViewType { return ConstViewType( size, storageSize, virtualRows, rowPermArray.getConstView(), groupPointers.getConstView() ); @@ -111,9 +111,9 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -auto BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getSegmentsCount() const -> IndexType { return this->size; @@ -122,10 +122,10 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename SizesHolder > -void BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +void BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: performRowBubbleSort( const SizesHolder& segmentsSizes ) { if( segmentsSizes.getSize() == 0 ) @@ -190,10 +190,10 @@ performRowBubbleSort( const SizesHolder& segmentsSizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename SizesHolder > -void BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +void BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: computeColumnSizes( const SizesHolder& segmentsSizes ) { IndexType numberOfStrips = this->virtualRows / getWarpSize(); @@ -237,10 +237,10 @@ computeColumnSizes( const SizesHolder& segmentsSizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename SizesHolder > -void BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +void BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: verifyRowPerm( const SizesHolder& segmentsSizes ) { bool ok = true; @@ -284,10 +284,10 @@ verifyRowPerm( const SizesHolder& segmentsSizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename SizesHolder > -void BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +void BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: verifyRowLengths( const SizesHolder& segmentsSizes ) { bool ok = true; @@ -300,7 +300,7 @@ verifyRowLengths( const SizesHolder& segmentsSizes ) const IndexType begin = this->groupPointers.getElement( groupBegin ) * getWarpSize() + rowStripPerm * stripLength; IndexType elementPtr = begin; IndexType rowLength = 0; - const IndexType groupsCount = details::BiEllpack< Index, Device, RowMajorOrder, WarpSize >::getActiveGroupsCount( this->rowPermArray.getConstView(), segmentIdx ); + const IndexType groupsCount = details::BiEllpack< Index, Device, Organization, WarpSize >::getActiveGroupsCount( this->rowPermArray.getConstView(), segmentIdx ); for( IndexType group = 0; group < groupsCount; group++ ) { for( IndexType i = 0; i < this->getGroupLength( strip, group ); i++ ) @@ -324,11 +324,11 @@ verifyRowLengths( const SizesHolder& segmentsSizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename SizesHolder > void -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: setSegmentsSizes( const SizesHolder& segmentsSizes ) { if( std::is_same< DeviceType, Devices::Host >::value ) @@ -354,7 +354,7 @@ setSegmentsSizes( const SizesHolder& segmentsSizes ) } else { - BiEllpack< Devices::Host, Index, typename Allocators::Default< Devices::Host >::template Allocator< IndexType >, RowMajorOrder > hostSegments; + BiEllpack< Devices::Host, Index, typename Allocators::Default< Devices::Host >::template Allocator< IndexType >, Organization > hostSegments; Containers::Vector< IndexType, Devices::Host, IndexType > hostSegmentsSizes; hostSegmentsSizes = segmentsSizes; hostSegments.setSegmentsSizes( hostSegmentsSizes ); @@ -365,10 +365,10 @@ setSegmentsSizes( const SizesHolder& segmentsSizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > void -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: reset() { this->size = 0; @@ -381,12 +381,12 @@ reset() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -auto BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSize( + return details::BiEllpack< IndexType, DeviceType, Organization >::getSegmentSize( rowPermArray.getConstView(), groupPointers.getConstView(), segmentIdx ); @@ -395,9 +395,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getSize() const -> IndexType { return this->size; @@ -406,9 +406,9 @@ getSize() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getStorageSize() const -> IndexType { return this->storageSize; @@ -417,12 +417,12 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getGlobalIndex( const IndexType segmentIdx, const IndexType localIdx ) const -> IndexType { - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndex( + return details::BiEllpack< IndexType, DeviceType, Organization >::getGlobalIndex( rowPermArray.getConstView(), groupPointers.getConstView(), segmentIdx, @@ -432,9 +432,9 @@ getGlobalIndex( const IndexType segmentIdx, const IndexType localIdx ) const -> template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { } @@ -442,11 +442,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Function, typename... Args > void -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { this->getConstView().forSegments( first, last, f, args... ); @@ -455,11 +455,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Function, typename... Args > void -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -468,11 +468,11 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... ); @@ -481,11 +481,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -494,12 +494,12 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >& -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: -operator=( const BiEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, WarpSize >& source ) + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ > +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >& +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: +operator=( const BiEllpack< Device_, Index_, IndexAllocator_, Organization_, WarpSize >& source ) { this->size = source.size; this->storageSize = source.storageSize; @@ -512,10 +512,10 @@ operator=( const BiEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Wa template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > void -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: save( File& file ) const { file.save( &this->size ); @@ -528,10 +528,10 @@ save( File& file ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > void -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: load( File& file ) { file.load( &this->size ); @@ -544,10 +544,10 @@ load( File& file ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > void -BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: printStructure( std::ostream& str ) const { this->view.printStructure( str ); @@ -556,20 +556,20 @@ printStructure( std::ostream& str ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -auto BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getStripLength( const IndexType stripIdx ) const -> IndexType { - return details::BiEllpack< Index, Device, RowMajorOrder, WarpSize >::getStripLength( this->groupPointers.getConstView(), stripIdx ); + return details::BiEllpack< Index, Device, Organization, WarpSize >::getStripLength( this->groupPointers.getConstView(), stripIdx ); } template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -auto BiEllpack< Device, Index, IndexAllocator, RowMajorOrder, WarpSize >:: +auto BiEllpack< Device, Index, IndexAllocator, Organization, WarpSize >:: getGroupLength( const IndexType strip, const IndexType group ) const -> IndexType { return this->groupPointers.getElement( strip * ( getLogWarpSize() + 1 ) + group + 1 ) diff --git a/src/TNL/Containers/Segments/BiEllpackSegmentView.h b/src/TNL/Containers/Segments/BiEllpackSegmentView.h index 5f5e72049..0c05947c5 100644 --- a/src/TNL/Containers/Segments/BiEllpackSegmentView.h +++ b/src/TNL/Containers/Segments/BiEllpackSegmentView.h @@ -11,6 +11,7 @@ #pragma once #include +#include #include namespace TNL { @@ -18,7 +19,7 @@ namespace TNL { namespace Segments { template< typename Index, - bool RowMajorOrder = false, + ElementsOrganization Organization, int WarpSize = 32 > class BiEllpackSegmentView { @@ -68,7 +69,7 @@ class BiEllpackSegmentView groupHeight /= 2; } TNL_ASSERT_LE( groupIdx, TNL::log2( getWarpSize() - inStripIdx + 1 ), "Local index exceeds segment bounds." ); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { //std::cerr << " offset = " << offset << " inStripIdx = " << inStripIdx << " localIdx = " << localIdx // << " return = " << offset + inStripIdx * groupsWidth[ groupIdx ] + localIdx << std::endl; diff --git a/src/TNL/Containers/Segments/BiEllpackView.h b/src/TNL/Containers/Segments/BiEllpackView.h index e4807bef8..b132e5e37 100644 --- a/src/TNL/Containers/Segments/BiEllpackView.h +++ b/src/TNL/Containers/Segments/BiEllpackView.h @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -23,7 +24,7 @@ namespace TNL { template< typename Device, typename Index, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), int WarpSize = 32 > class BiEllpackView { @@ -37,7 +38,7 @@ class BiEllpackView template< typename Device_, typename Index_ > using ViewTemplate = BiEllpackView< Device_, Index_ >; using ConstViewType = BiEllpackView< Device, std::add_const_t< Index > >; - using SegmentViewType = BiEllpackSegmentView< IndexType, RowMajorOrder >; + using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >; __cuda_callable__ BiEllpackView() = default; diff --git a/src/TNL/Containers/Segments/BiEllpackView.hpp b/src/TNL/Containers/Segments/BiEllpackView.hpp index 1cb5ce7c0..0f396b7a4 100644 --- a/src/TNL/Containers/Segments/BiEllpackView.hpp +++ b/src/TNL/Containers/Segments/BiEllpackView.hpp @@ -22,10 +22,10 @@ namespace TNL { template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > __cuda_callable__ -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: BiEllpackView( const IndexType size, const IndexType storageSize, const IndexType virtualRows, @@ -41,10 +41,10 @@ BiEllpackView( const IndexType size, template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > __cuda_callable__ -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: BiEllpackView( const IndexType size, const IndexType storageSize, const IndexType virtualRows, @@ -60,10 +60,10 @@ BiEllpackView( const IndexType size, template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > __cuda_callable__ -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: BiEllpackView( const BiEllpackView& bi_ellpack_view ) : size( bi_ellpack_view.size ), storageSize( bi_ellpack_view.storageSize ), @@ -75,10 +75,10 @@ BiEllpackView( const BiEllpackView& bi_ellpack_view ) template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > __cuda_callable__ -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: BiEllpackView( const BiEllpackView&& bi_ellpack_view ) : size( bi_ellpack_view.size ), storageSize( bi_ellpack_view.storageSize ), @@ -90,10 +90,10 @@ BiEllpackView( const BiEllpackView&& bi_ellpack_view ) template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > String -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: getSerializationType() { return "BiEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; @@ -101,10 +101,10 @@ getSerializationType() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > String -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: getSegmentsType() { return "BiEllpack"; @@ -112,11 +112,11 @@ getSegmentsType() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > __cuda_callable__ -typename BiEllpackView< Device, Index, RowMajorOrder, WarpSize >::ViewType -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +typename BiEllpackView< Device, Index, Organization, WarpSize >::ViewType +BiEllpackView< Device, Index, Organization, WarpSize >:: getView() { return ViewType( size, storageSize, virtualRows, rowPermArray.getView(), groupPointers.getView() ); @@ -124,9 +124,9 @@ getView() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getConstView() const -> const ConstViewType { return ConstViewType( size, storageSize, virtualRows, rowPermArray.getConstView(), groupPointers.getConstView() ); @@ -134,9 +134,9 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getSegmentsCount() const -> IndexType { return this->size; @@ -144,25 +144,25 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { if( std::is_same< DeviceType, Devices::Host >::value ) - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder, WarpSize >::getSegmentSizeDirect( + return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect( rowPermArray, groupPointers, segmentIdx ); if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder, WarpSize >::getSegmentSizeDirect( + return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSizeDirect( rowPermArray, groupPointers, segmentIdx ); #else - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder, WarpSize >::getSegmentSize( + return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentSize( rowPermArray, groupPointers, segmentIdx ); @@ -172,9 +172,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getSize() const -> IndexType { return this->size; @@ -182,9 +182,9 @@ getSize() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getStorageSize() const -> IndexType { return this->storageSize; @@ -192,13 +192,13 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -__cuda_callable__ auto BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +__cuda_callable__ auto BiEllpackView< Device, Index, Organization, WarpSize >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( std::is_same< DeviceType, Devices::Host >::value ) - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder, WarpSize >::getGlobalIndexDirect( + return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect( rowPermArray, groupPointers, segmentIdx, @@ -206,13 +206,13 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder, WarpSize >::getGlobalIndexDirect( + return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndexDirect( rowPermArray, groupPointers, segmentIdx, localIdx ); #else - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder, WarpSize >::getGlobalIndex( + return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getGlobalIndex( rowPermArray, groupPointers, segmentIdx, @@ -223,27 +223,27 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > __cuda_callable__ auto -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { if( std::is_same< DeviceType, Devices::Host >::value ) - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder, WarpSize >::getSegmentViewDirect( + return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect( rowPermArray, groupPointers, segmentIdx ); if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder, WarpSize >::getSegmentViewDirect( + return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentViewDirect( rowPermArray, groupPointers, segmentIdx ); #else - return details::BiEllpack< IndexType, DeviceType, RowMajorOrder, WarpSize >::getSegmentView( + return details::BiEllpack< IndexType, DeviceType, Organization, WarpSize >::getSegmentView( rowPermArray, groupPointers, segmentIdx ); @@ -253,11 +253,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Function, typename... Args > void -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto segmentsPermutationView = this->rowPermArray.getConstView(); @@ -266,7 +266,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType strip = segmentIdx / getWarpSize(); const IndexType firstGroupInStrip = strip * ( getLogWarpSize() + 1 ); const IndexType rowStripPerm = segmentsPermutationView[ segmentIdx ] - strip * getWarpSize(); - const IndexType groupsCount = details::BiEllpack< IndexType, DeviceType, RowMajorOrder, getWarpSize() >::getActiveGroupsCountDirect( segmentsPermutationView, segmentIdx ); + const IndexType groupsCount = details::BiEllpack< IndexType, DeviceType, Organization, getWarpSize() >::getActiveGroupsCountDirect( segmentsPermutationView, segmentIdx ); IndexType groupHeight = getWarpSize(); //printf( "segmentIdx = %d strip = %d firstGroupInStrip = %d rowStripPerm = %d groupsCount = %d \n", segmentIdx, strip, firstGroupInStrip, rowStripPerm, groupsCount ); bool compute( true ); @@ -281,7 +281,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType groupWidth = groupSize / groupHeight; for( IndexType i = 0; i < groupWidth; i++ ) { - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { f( segmentIdx, localIdx, groupOffset + rowStripPerm * groupWidth + i, compute ); } @@ -303,11 +303,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Function, typename... Args > void -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -315,11 +315,11 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType; @@ -329,7 +329,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio const IndexType stripIdx = segmentIdx / getWarpSize(); const IndexType groupIdx = stripIdx * ( getLogWarpSize() + 1 ); const IndexType inStripIdx = rowPermArray[ segmentIdx ] - stripIdx * getWarpSize(); - const IndexType groupsCount = details::BiEllpack< IndexType, DeviceType, RowMajorOrder, getWarpSize() >::getActiveGroupsCount( rowPermArray, segmentIdx ); + const IndexType groupsCount = details::BiEllpack< IndexType, DeviceType, Organization, getWarpSize() >::getActiveGroupsCount( rowPermArray, segmentIdx ); IndexType globalIdx = groupPointers[ groupIdx ]; IndexType groupHeight = getWarpSize(); IndexType localIdx( 0 ); @@ -337,10 +337,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio bool compute( true ); for( IndexType group = 0; group < groupsCount && compute; group++ ) { - const IndexType groupSize = details::BiEllpack< IndexType, DeviceType, RowMajorOrder, getWarpSize() >::getGroupSize( groupPointers, stripIdx, group ); + const IndexType groupSize = details::BiEllpack< IndexType, DeviceType, Organization, getWarpSize() >::getGroupSize( groupPointers, stripIdx, group ); IndexType groupWidth = groupSize / groupHeight; const IndexType globalIdxBack = globalIdx; - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) globalIdx += inStripIdx * groupWidth; else globalIdx += inStripIdx; @@ -351,7 +351,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio // << " localIdx = " << localIdx << " globalIdx = " << globalIdx // << " fetch = " << details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) << std::endl; aux = reduction( aux, details::FetchLambdaAdapter< IndexType, Fetch >::call( fetch, segmentIdx, localIdx++, globalIdx, compute ) ); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) globalIdx ++; else globalIdx += groupHeight; @@ -370,7 +370,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio const IndexType cudaBlocks = roundUpDivision( stripsCount * getWarpSize(), cudaBlockSize.x ); const IndexType cudaGrids = roundUpDivision( cudaBlocks, Cuda::getMaxGridSize() ); IndexType sharedMemory = 0; - if( ! RowMajorOrder ) + if( ! Organization ) sharedMemory = cudaBlockSize.x * sizeof( RealType ); for( IndexType gridIdx = 0; gridIdx < cudaGrids; gridIdx++ ) @@ -390,11 +390,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -402,10 +402,10 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >& -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >& +BiEllpackView< Device, Index, Organization, WarpSize >:: operator=( const BiEllpackView& source ) { this->size = source.size; @@ -418,10 +418,10 @@ operator=( const BiEllpackView& source ) template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > void -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: save( File& file ) const { file.save( &this->size ); @@ -433,10 +433,10 @@ save( File& file ) const template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > void -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: printStructure( std::ostream& str ) const { const IndexType stripsCount = roundUpDivision( this->getSize(), getWarpSize() ); @@ -459,7 +459,7 @@ printStructure( std::ostream& str ) const #ifdef HAVE_CUDA template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Fetch, typename Reduction, @@ -469,7 +469,7 @@ template< typename Device, typename... Args > __device__ void -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: segmentsReductionKernelWithAllParameters( IndexType gridIdx, IndexType first, IndexType last, @@ -487,7 +487,7 @@ segmentsReductionKernelWithAllParameters( IndexType gridIdx, const IndexType strip = segmentIdx / getWarpSize(); const IndexType firstGroupInStrip = strip * ( getLogWarpSize() + 1 ); const IndexType rowStripPerm = rowPermArray[ segmentIdx ] - strip * getWarpSize(); - const IndexType groupsCount = details::BiEllpack< IndexType, DeviceType, RowMajorOrder, getWarpSize() >::getActiveGroupsCountDirect( rowPermArray, segmentIdx ); + const IndexType groupsCount = details::BiEllpack< IndexType, DeviceType, Organization, getWarpSize() >::getActiveGroupsCountDirect( rowPermArray, segmentIdx ); IndexType groupHeight = getWarpSize(); bool compute( true ); IndexType localIdx( 0 ); @@ -501,7 +501,7 @@ segmentsReductionKernelWithAllParameters( IndexType gridIdx, const IndexType groupWidth = groupSize / groupHeight; for( IndexType i = 0; i < groupWidth; i++ ) { - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) result = reduction( result, fetch( segmentIdx, localIdx, groupOffset + rowStripPerm * groupWidth + i, compute ) ); else result = reduction( result, fetch( segmentIdx, localIdx, groupOffset + rowStripPerm + i * groupHeight, compute ) ); @@ -515,7 +515,7 @@ segmentsReductionKernelWithAllParameters( IndexType gridIdx, template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int WarpSize > template< typename Fetch, typename Reduction, @@ -525,7 +525,7 @@ template< typename Device, typename... Args > __device__ void -BiEllpackView< Device, Index, RowMajorOrder, WarpSize >:: +BiEllpackView< Device, Index, Organization, WarpSize >:: segmentsReductionKernel( IndexType gridIdx, IndexType first, IndexType last, @@ -557,7 +557,7 @@ segmentsReductionKernel( IndexType gridIdx, __syncthreads(); bool compute( true ); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { for( IndexType group = 0; group < getLogWarpSize() + 1; group++ ) { diff --git a/src/TNL/Containers/Segments/CSR.h b/src/TNL/Containers/Segments/CSR.h index c5c0ce68f..9bcd45d08 100644 --- a/src/TNL/Containers/Segments/CSR.h +++ b/src/TNL/Containers/Segments/CSR.h @@ -35,7 +35,7 @@ class CSR using ViewTemplate = CSRView< Device_, Index_ >; using ViewType = CSRView< Device, Index >; using ConstViewType = CSRView< Device, std::add_const_t< IndexType > >; - using SegmentViewType = SegmentView< IndexType, true >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; CSR(); diff --git a/src/TNL/Containers/Segments/CSRView.h b/src/TNL/Containers/Segments/CSRView.h index b00b012d4..b01e6c66d 100644 --- a/src/TNL/Containers/Segments/CSRView.h +++ b/src/TNL/Containers/Segments/CSRView.h @@ -33,7 +33,7 @@ class CSRView template< typename Device_, typename Index_ > using ViewTemplate = CSRView< Device_, Index_ >; using ConstViewType = CSRView< Device, std::add_const_t< Index > >; - using SegmentViewType = SegmentView< IndexType, true >; + using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; __cuda_callable__ CSRView(); diff --git a/src/TNL/Containers/Segments/ChunkedEllpack.h b/src/TNL/Containers/Segments/ChunkedEllpack.h index dd4805887..0eb18f325 100644 --- a/src/TNL/Containers/Segments/ChunkedEllpack.h +++ b/src/TNL/Containers/Segments/ChunkedEllpack.h @@ -22,7 +22,7 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > class ChunkedEllpack { public: @@ -30,12 +30,12 @@ class ChunkedEllpack using DeviceType = Device; using IndexType = std::remove_const_t< Index >; using OffsetsHolder = Containers::Vector< Index, DeviceType, IndexType, IndexAllocator >; - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } - using ViewType = ChunkedEllpackView< Device, Index, RowMajorOrder >; + static constexpr bool getOrganization() { return Organization; } + using ViewType = ChunkedEllpackView< Device, Index, Organization >; template< typename Device_, typename Index_ > - using ViewTemplate = ChunkedEllpackView< Device_, Index_, RowMajorOrder >; - using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< IndexType >, RowMajorOrder >; - using SegmentViewType = ChunkedEllpackSegmentView< IndexType, RowMajorOrder >; + using ViewTemplate = ChunkedEllpackView< Device_, Index_, Organization >; + using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< IndexType >, Organization >; + using SegmentViewType = ChunkedEllpackSegmentView< IndexType, Organization >; using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >; //TODO: using ChunkedEllpackSliceInfoAllocator = typename IndexAllocatorType::retype< ChunkedEllpackSliceInfoType >; using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >; @@ -112,8 +112,8 @@ class ChunkedEllpack ChunkedEllpack& operator=( const ChunkedEllpack& source ) = default; - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > - ChunkedEllpack& operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source ); + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ > + ChunkedEllpack& operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, Organization_ >& source ); void save( File& file ) const; @@ -157,7 +157,7 @@ class ChunkedEllpack IndexType numberOfSlices = 0; - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ > friend class ChunkedEllpack; }; diff --git a/src/TNL/Containers/Segments/ChunkedEllpack.hpp b/src/TNL/Containers/Segments/ChunkedEllpack.hpp index 005b22a78..b1af00439 100644 --- a/src/TNL/Containers/Segments/ChunkedEllpack.hpp +++ b/src/TNL/Containers/Segments/ChunkedEllpack.hpp @@ -22,8 +22,8 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) { this->setSegmentsSizes( sizes ); @@ -32,8 +32,8 @@ ChunkedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: ChunkedEllpack( const ChunkedEllpack& chunkedEllpack ) : size( chunkedEllpack.size ), storageSize( chunkedEllpack.storageSize ), @@ -51,8 +51,8 @@ ChunkedEllpack( const ChunkedEllpack& chunkedEllpack ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: ChunkedEllpack( const ChunkedEllpack&& chunkedEllpack ) : size( chunkedEllpack.size ), storageSize( chunkedEllpack.storageSize ), @@ -70,9 +70,9 @@ ChunkedEllpack( const ChunkedEllpack&& chunkedEllpack ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > String -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getSerializationType() { return "ChunkedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; @@ -81,9 +81,9 @@ getSerializationType() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > String -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getSegmentsType() { return ViewType::getSegmentsType(); @@ -92,9 +92,9 @@ getSegmentsType() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -typename ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >::ViewType -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +typename ChunkedEllpack< Device, Index, IndexAllocator, Organization >::ViewType +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getView() { return ViewType( size, storageSize, chunksInSlice, desiredChunkSize, @@ -109,8 +109,8 @@ getView() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -auto ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +auto ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getConstView() const -> const ConstViewType { return ConstViewType( size, storageSize, chunksInSlice, desiredChunkSize, @@ -125,10 +125,10 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename SegmentsSizes > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: resolveSliceSizes( SegmentsSizes& segmentsSizes ) { /**** @@ -166,10 +166,10 @@ resolveSliceSizes( SegmentsSizes& segmentsSizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename SegmentsSizes > bool -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: setSlice( SegmentsSizes& rowLengths, const IndexType sliceIndex, IndexType& elementsToAllocation ) @@ -254,10 +254,10 @@ setSlice( SegmentsSizes& rowLengths, template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename SizesHolder > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: setSegmentsSizes( const SizesHolder& segmentsSizes ) { if( std::is_same< DeviceType, Devices::Host >::value ) @@ -292,7 +292,7 @@ setSegmentsSizes( const SizesHolder& segmentsSizes ) } else { - ChunkedEllpack< Devices::Host, Index, typename Allocators::Default< Devices::Host >::template Allocator< Index >, RowMajorOrder > hostSegments; + ChunkedEllpack< Devices::Host, Index, typename Allocators::Default< Devices::Host >::template Allocator< Index >, Organization > hostSegments; Containers::Vector< IndexType, Devices::Host, IndexType > hostSegmentsSizes; hostSegmentsSizes = segmentsSizes; hostSegments.setSegmentsSizes( hostSegmentsSizes ); @@ -303,9 +303,9 @@ setSegmentsSizes( const SizesHolder& segmentsSizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: reset() { this->size = 0; @@ -321,8 +321,8 @@ reset() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getSegmentsCount() const -> IndexType { return this->size; @@ -331,11 +331,11 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -auto ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +auto ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSize( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getSegmentSize( rowToSliceMapping.getView(), slices.getView(), rowToChunkMapping.getView(), @@ -345,8 +345,8 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getSize() const -> IndexType { return this->size; @@ -355,8 +355,8 @@ getSize() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getStorageSize() const -> IndexType { return this->storageSize; @@ -365,11 +365,11 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndex( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getGlobalIndex( rowToSliceMapping, slices, rowToChunkMapping, @@ -381,8 +381,8 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { } @@ -390,10 +390,10 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function, typename... Args > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { this->getConstView().forSegments( first, last, f, args... ); @@ -402,10 +402,10 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function, typename... Args > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -414,10 +414,10 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... ); @@ -426,10 +426,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -438,11 +438,11 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >& -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: -operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ >& source ) + ElementsOrganization Organization > + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ > +ChunkedEllpack< Device, Index, IndexAllocator, Organization >& +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: +operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, Organization_ >& source ) { this->size = source.size; this->storageSize = source.storageSize; @@ -460,9 +460,9 @@ operator=( const ChunkedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: save( File& file ) const { file.save( &this->size ); @@ -480,9 +480,9 @@ save( File& file ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: load( File& file ) { file.load( &this->size ); @@ -500,9 +500,9 @@ load( File& file ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder > + ElementsOrganization Organization > void -ChunkedEllpack< Device, Index, IndexAllocator, RowMajorOrder >:: +ChunkedEllpack< Device, Index, IndexAllocator, Organization >:: printStructure( std::ostream& str ) { this->getView().printStructure( str ); diff --git a/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h b/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h index 93da55927..65a3cc5bd 100644 --- a/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h +++ b/src/TNL/Containers/Segments/ChunkedEllpackSegmentView.h @@ -15,11 +15,11 @@ namespace TNL { namespace Segments { template< typename Index, - bool RowMajorOrder = false > + ElementsOrganization Organization > class ChunkedEllpackSegmentView; template< typename Index > -class ChunkedEllpackSegmentView< Index, false > +class ChunkedEllpackSegmentView< Index, ColumnMajorOrder > { public: @@ -55,7 +55,7 @@ class ChunkedEllpackSegmentView< Index, false > }; template< typename Index > -class ChunkedEllpackSegmentView< Index, true > +class ChunkedEllpackSegmentView< Index, RowMajorOrder > { public: diff --git a/src/TNL/Containers/Segments/ChunkedEllpackView.h b/src/TNL/Containers/Segments/ChunkedEllpackView.h index a840447b9..fe1dd6a2a 100644 --- a/src/TNL/Containers/Segments/ChunkedEllpackView.h +++ b/src/TNL/Containers/Segments/ChunkedEllpackView.h @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -23,7 +24,7 @@ namespace TNL { template< typename Device, typename Index, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > class ChunkedEllpackView { public: @@ -36,7 +37,7 @@ class ChunkedEllpackView template< typename Device_, typename Index_ > using ViewTemplate = ChunkedEllpackView< Device_, Index_ >; using ConstViewType = ChunkedEllpackView< Device, std::add_const_t< Index > >; - using SegmentViewType = ChunkedEllpackSegmentView< IndexType, RowMajorOrder >; + using SegmentViewType = ChunkedEllpackSegmentView< IndexType, Organization >; using ChunkedEllpackSliceInfoType = details::ChunkedEllpackSliceInfo< IndexType >; using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >; using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >; diff --git a/src/TNL/Containers/Segments/ChunkedEllpackView.hpp b/src/TNL/Containers/Segments/ChunkedEllpackView.hpp index 19ae6f672..7ca3fb905 100644 --- a/src/TNL/Containers/Segments/ChunkedEllpackView.hpp +++ b/src/TNL/Containers/Segments/ChunkedEllpackView.hpp @@ -22,9 +22,9 @@ namespace TNL { template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: ChunkedEllpackView( const IndexType size, const IndexType storageSize, const IndexType chunksInSlice, @@ -50,9 +50,9 @@ ChunkedEllpackView( const IndexType size, template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: ChunkedEllpackView( const IndexType size, const IndexType storageSize, const IndexType chunksInSlice, @@ -78,9 +78,9 @@ ChunkedEllpackView( const IndexType size, template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: ChunkedEllpackView( const ChunkedEllpackView& chunked_ellpack_view ) : size( chunked_ellpack_view.size ), storageSize( chunked_ellpack_view.storageSize ), @@ -97,9 +97,9 @@ ChunkedEllpackView( const ChunkedEllpackView& chunked_ellpack_view ) template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: ChunkedEllpackView( const ChunkedEllpackView&& chunked_ellpack_view ) : size( chunked_ellpack_view.size ), storageSize( chunked_ellpack_view.storageSize ), @@ -116,9 +116,9 @@ ChunkedEllpackView( const ChunkedEllpackView&& chunked_ellpack_view ) template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > String -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: getSerializationType() { return "ChunkedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; @@ -126,9 +126,9 @@ getSerializationType() template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > String -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: getSegmentsType() { return "ChunkedEllpack"; @@ -136,10 +136,10 @@ getSegmentsType() template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -typename ChunkedEllpackView< Device, Index, RowMajorOrder >::ViewType -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +typename ChunkedEllpackView< Device, Index, Organization >::ViewType +ChunkedEllpackView< Device, Index, Organization >:: getView() { return ViewType( size, chunksInSlice, desiredChunkSize, @@ -153,8 +153,8 @@ getView() template< typename Device, typename Index, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpackView< Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpackView< Device, Index, Organization >:: getConstView() const -> const ConstViewType { return ConstViewType( size, chunksInSlice, desiredChunkSize, @@ -168,8 +168,8 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpackView< Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpackView< Device, Index, Organization >:: getSegmentsCount() const -> IndexType { return this->size; @@ -177,12 +177,12 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpackView< Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpackView< Device, Index, Organization >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { if( std::is_same< DeviceType, Devices::Host >::value ) - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSizeDirect( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getSegmentSizeDirect( rowToSliceMapping, slices, rowToChunkMapping, @@ -190,13 +190,13 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSizeDirect( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getSegmentSizeDirect( rowToSliceMapping, slices, rowToChunkMapping, segmentIdx ); #else - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentSize( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getSegmentSize( rowToSliceMapping, slices, rowToChunkMapping, @@ -207,8 +207,8 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpackView< Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpackView< Device, Index, Organization >:: getSize() const -> IndexType { return this->size; @@ -216,8 +216,8 @@ getSize() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpackView< Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpackView< Device, Index, Organization >:: getStorageSize() const -> IndexType { return this->storageSize; @@ -225,12 +225,12 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder > -__cuda_callable__ auto ChunkedEllpackView< Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +__cuda_callable__ auto ChunkedEllpackView< Device, Index, Organization >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { if( std::is_same< DeviceType, Devices::Host >::value ) - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndexDirect( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getGlobalIndexDirect( rowToSliceMapping, slices, rowToChunkMapping, @@ -240,7 +240,7 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndexDirect( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getGlobalIndexDirect( rowToSliceMapping, slices, rowToChunkMapping, @@ -248,7 +248,7 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp segmentIdx, localIdx ); #else - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getGlobalIndex( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getGlobalIndex( rowToSliceMapping, slices, rowToChunkMapping, @@ -261,14 +261,14 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { if( std::is_same< DeviceType, Devices::Host >::value ) - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentViewDirect( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getSegmentViewDirect( rowToSliceMapping, slices, rowToChunkMapping, @@ -277,14 +277,14 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType if( std::is_same< DeviceType, Devices::Cuda >::value ) { #ifdef __CUDA_ARCH__ - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentViewDirect( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getSegmentViewDirect( rowToSliceMapping, slices, rowToChunkMapping, chunksInSlice, segmentIdx ); #else - return details::ChunkedEllpack< IndexType, DeviceType, RowMajorOrder >::getSegmentView( + return details::ChunkedEllpack< IndexType, DeviceType, Organization >::getSegmentView( rowToSliceMapping, slices, rowToChunkMapping, @@ -296,10 +296,10 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function, typename... Args > void -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const IndexType chunksInSlice = this->chunksInSlice; @@ -322,7 +322,7 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const const IndexType segmentSize = segmentChunksCount * chunkSize; bool compute( true ); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { IndexType begin = sliceOffset + firstChunkOfSegment * chunkSize; IndexType end = begin + segmentSize; @@ -349,10 +349,10 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function, typename... Args > void -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -360,10 +360,10 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType; @@ -388,7 +388,7 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio const IndexType segmentSize = segmentChunksCount * chunkSize; RealType aux( zero ); bool compute( true ); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { IndexType begin = sliceOffset + firstChunkOfSegment * chunkSize; IndexType end = begin + segmentSize; @@ -434,10 +434,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -445,9 +445,9 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, - bool RowMajorOrder > -ChunkedEllpackView< Device, Index, RowMajorOrder >& -ChunkedEllpackView< Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +ChunkedEllpackView< Device, Index, Organization >& +ChunkedEllpackView< Device, Index, Organization >:: operator=( const ChunkedEllpackView& view ) { this->size = view.size; @@ -465,9 +465,9 @@ operator=( const ChunkedEllpackView& view ) template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: save( File& file ) const { file.save( &this->size ); @@ -484,9 +484,9 @@ save( File& file ) const template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: printStructure( std::ostream& str ) const { //const IndexType numberOfSlices = this->getNumberOfSlices(); @@ -507,7 +507,7 @@ printStructure( std::ostream& str ) const #ifdef HAVE_CUDA template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduction, typename ResultKeeper, @@ -515,7 +515,7 @@ template< typename Device, typename... Args > __device__ void -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: segmentsReductionKernelWithAllParameters( IndexType gridIdx, IndexType first, IndexType last, @@ -553,7 +553,7 @@ segmentsReductionKernelWithAllParameters( IndexType gridIdx, IndexType localIdx = ( threadIdx.x - firstChunkOfSegment ) * chunkSize; bool compute( true ); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { IndexType begin = sliceOffset + threadIdx.x * chunkSize; // threadIdx.x = chunkIdx within the slice IndexType end = begin + chunkSize; @@ -585,7 +585,7 @@ segmentsReductionKernelWithAllParameters( IndexType gridIdx, template< typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduction, typename ResultKeeper, @@ -593,7 +593,7 @@ template< typename Device, typename... Args > __device__ void -ChunkedEllpackView< Device, Index, RowMajorOrder >:: +ChunkedEllpackView< Device, Index, Organization >:: segmentsReductionKernel( IndexType gridIdx, IndexType first, IndexType last, @@ -625,7 +625,7 @@ segmentsReductionKernel( IndexType gridIdx, const IndexType chunkIdx = sliceIdx * chunksInSlice + threadIdx.x; bool compute( true ); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { IndexType begin = sliceOffset + threadIdx.x * chunkSize; // threadIdx.x = chunkIdx within the slice IndexType end = begin + chunkSize; diff --git a/src/TNL/Containers/Segments/ElementsOrganization.h b/src/TNL/Containers/Segments/ElementsOrganization.h new file mode 100644 index 000000000..3c4086cd2 --- /dev/null +++ b/src/TNL/Containers/Segments/ElementsOrganization.h @@ -0,0 +1,32 @@ +/*************************************************************************** + ElementsOrganization.h - description + ------------------- + begin : May 9, 2020 + copyright : (C) 2020 by Tomas Oberhuber + email : tomas.oberhuber@fjfi.cvut.cz + ***************************************************************************/ + +/* See Copyright Notice in tnl/Copyright */ + +#pragma once + +namespace TNL { + namespace Containers { + namespace Segments { + +enum ElementsOrganization { ColumnMajorOrder = 0, RowMajorOrder }; + +template< typename Device > +struct DefaultElementsOrganization +{ + static constexpr ElementsOrganization getOrganization() { + if( std::is_same< Device, Devices::Host >::value ) + return RowMajorOrder; + else + return ColumnMajorOrder; + }; +}; + + } // namespace Segments + } // namespace Containers +} // namespace TNL diff --git a/src/TNL/Containers/Segments/Ellpack.h b/src/TNL/Containers/Segments/Ellpack.h index 63ca556a4..f5c8490de 100644 --- a/src/TNL/Containers/Segments/Ellpack.h +++ b/src/TNL/Containers/Segments/Ellpack.h @@ -21,7 +21,7 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Segments::DefaultElementsOrganization< Device >::getOrganization(), int Alignment = 32 > class Ellpack { @@ -30,14 +30,14 @@ class Ellpack using DeviceType = Device; using IndexType = std::remove_const_t< Index >; static constexpr int getAlignment() { return Alignment; } - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + static constexpr bool getOrganization() { return Organization; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using SegmentsSizes = OffsetsHolder; template< typename Device_, typename Index_ > - using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >; - using ViewType = EllpackView< Device, Index, RowMajorOrder, Alignment >; + using ViewTemplate = EllpackView< Device_, Index_, Organization, Alignment >; + using ViewType = EllpackView< Device, Index, Organization, Alignment >; using ConstViewType = typename ViewType::ConstViewType; - using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + using SegmentViewType = SegmentView< IndexType, Organization >; Ellpack(); @@ -115,8 +115,8 @@ class Ellpack Ellpack& operator=( const Ellpack& source ) = default; - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ > - Ellpack& operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source ); + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_, int Alignment_ > + Ellpack& operator=( const Ellpack< Device_, Index_, IndexAllocator_, Organization_, Alignment_ >& source ); void save( File& file ) const; diff --git a/src/TNL/Containers/Segments/Ellpack.hpp b/src/TNL/Containers/Segments/Ellpack.hpp index e4e2180ad..922a75765 100644 --- a/src/TNL/Containers/Segments/Ellpack.hpp +++ b/src/TNL/Containers/Segments/Ellpack.hpp @@ -22,9 +22,9 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: Ellpack() : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -33,9 +33,9 @@ Ellpack() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: Ellpack( const SegmentsSizes& segmentsSizes ) : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -45,9 +45,9 @@ Ellpack( const SegmentsSizes& segmentsSizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: Ellpack( const IndexType segmentsCount, const IndexType segmentSize ) : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -57,9 +57,9 @@ Ellpack( const IndexType segmentsCount, const IndexType segmentSize ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: Ellpack( const Ellpack& ellpack ) : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { @@ -68,9 +68,9 @@ Ellpack( const Ellpack& ellpack ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: Ellpack( const Ellpack&& ellpack ) : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { @@ -79,10 +79,10 @@ Ellpack( const Ellpack&& ellpack ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > String -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getSerializationType() { return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; @@ -91,10 +91,10 @@ getSerializationType() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > String -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getSegmentsType() { return ViewType::getSegmentsType(); @@ -103,10 +103,10 @@ getSegmentsType() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > auto -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getView() -> ViewType { return ViewType( segmentSize, size, alignedSize ); @@ -115,10 +115,10 @@ getView() -> ViewType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > auto -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getConstView() const -> const ConstViewType { return ConstViewType( segmentSize, size, alignedSize ); @@ -127,16 +127,16 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > template< typename SizesHolder > void -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: setSegmentsSizes( const SizesHolder& sizes ) { this->segmentSize = max( sizes ); this->size = sizes.getSize(); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) this->alignedSize = this->size; else this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); @@ -145,10 +145,10 @@ setSegmentsSizes( const SizesHolder& sizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > void -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: reset() { this->segmentSize = 0; @@ -159,15 +159,15 @@ reset() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > void -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ) { this->segmentSize = segmentSize; this->size = segmentsCount; - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) this->alignedSize = this->size; else this->alignedSize = roundUpDivision( size, this->getAlignment() ) * this->getAlignment(); @@ -177,9 +177,9 @@ setSegmentsSizes( const IndexType segmentsCount, const IndexType segmentSize ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getSegmentsCount() const -> IndexType { return this->size; @@ -188,9 +188,9 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { return this->segmentSize; @@ -199,9 +199,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getSize() const -> IndexType { return this->size * this->segmentSize; @@ -211,9 +211,9 @@ getSize() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getStorageSize() const -> IndexType { return this->alignedSize * this->segmentSize; @@ -222,12 +222,12 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return segmentIdx * this->segmentSize + localIdx; else return segmentIdx + this->alignedSize * localIdx; @@ -236,10 +236,10 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > __cuda_callable__ -void Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +void Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } @@ -247,12 +247,12 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +__cuda_callable__ auto Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 ); else return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize ); @@ -261,11 +261,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { this->getConstView().forSegments( first, last, f, args... ); @@ -274,11 +274,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > template< typename Function, typename... Args > void -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -287,11 +287,11 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... ); @@ -300,11 +300,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -313,12 +313,12 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_, int Alignment_ > -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >& -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: -operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alignment_ >& source ) + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_, int Alignment_ > +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >& +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: +operator=( const Ellpack< Device_, Index_, IndexAllocator_, Organization_, Alignment_ >& source ) { this->segmentSize = source.segmentSize; this->size = source.size; @@ -329,10 +329,10 @@ operator=( const Ellpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, Alig template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > void -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: save( File& file ) const { file.save( &segmentSize ); @@ -343,10 +343,10 @@ save( File& file ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > void -Ellpack< Device, Index, IndexAllocator, RowMajorOrder, Alignment >:: +Ellpack< Device, Index, IndexAllocator, Organization, Alignment >:: load( File& file ) { file.load( &segmentSize ); diff --git a/src/TNL/Containers/Segments/EllpackView.h b/src/TNL/Containers/Segments/EllpackView.h index 846e75cf4..3559949d8 100644 --- a/src/TNL/Containers/Segments/EllpackView.h +++ b/src/TNL/Containers/Segments/EllpackView.h @@ -14,6 +14,7 @@ #include #include +#include namespace TNL { @@ -22,7 +23,7 @@ namespace TNL { template< typename Device, typename Index, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Segments::DefaultElementsOrganization< Device >::getOrganization(), int Alignment = 32 > class EllpackView { @@ -31,14 +32,14 @@ class EllpackView using DeviceType = Device; using IndexType = std::remove_const_t< Index >; static constexpr int getAlignment() { return Alignment; } - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + static constexpr bool getOrganization() { return Organization; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using SegmentsSizes = OffsetsHolder; template< typename Device_, typename Index_ > - using ViewTemplate = EllpackView< Device_, Index_, RowMajorOrder, Alignment >; + using ViewTemplate = EllpackView< Device_, Index_, Organization, Alignment >; using ViewType = EllpackView; using ConstViewType = ViewType; - using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + using SegmentViewType = SegmentView< IndexType, Organization >; __cuda_callable__ EllpackView(); diff --git a/src/TNL/Containers/Segments/EllpackView.hpp b/src/TNL/Containers/Segments/EllpackView.hpp index fa40227f8..0195424e9 100644 --- a/src/TNL/Containers/Segments/EllpackView.hpp +++ b/src/TNL/Containers/Segments/EllpackView.hpp @@ -22,10 +22,10 @@ namespace TNL { template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > __cuda_callable__ -EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView< Device, Index, Organization, Alignment >:: EllpackView() : segmentSize( 0 ), size( 0 ), alignedSize( 0 ) { @@ -33,10 +33,10 @@ EllpackView() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > __cuda_callable__ -EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView< Device, Index, Organization, Alignment >:: EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize ) : segmentSize( segmentSize ), size( size ), alignedSize( alignedSize ) { @@ -44,10 +44,10 @@ EllpackView( IndexType segmentSize, IndexType size, IndexType alignedSize ) template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > __cuda_callable__ -EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView< Device, Index, Organization, Alignment >:: EllpackView( const EllpackView& ellpack ) : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { @@ -55,10 +55,10 @@ EllpackView( const EllpackView& ellpack ) template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > __cuda_callable__ -EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView< Device, Index, Organization, Alignment >:: EllpackView( const EllpackView&& ellpack ) : segmentSize( ellpack.segmentSize ), size( ellpack.size ), alignedSize( ellpack.alignedSize ) { @@ -66,10 +66,10 @@ EllpackView( const EllpackView&& ellpack ) template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > String -EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView< Device, Index, Organization, Alignment >:: getSerializationType() { return "Ellpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; @@ -77,10 +77,10 @@ getSerializationType() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > String -EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView< Device, Index, Organization, Alignment >:: getSegmentsType() { return "Ellpack"; @@ -88,11 +88,11 @@ getSegmentsType() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > __cuda_callable__ -typename EllpackView< Device, Index, RowMajorOrder, Alignment >::ViewType -EllpackView< Device, Index, RowMajorOrder, Alignment >:: +typename EllpackView< Device, Index, Organization, Alignment >::ViewType +EllpackView< Device, Index, Organization, Alignment >:: getView() { return ViewType( segmentSize, size, alignedSize ); @@ -100,11 +100,11 @@ getView() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > __cuda_callable__ auto -EllpackView< Device, Index, RowMajorOrder, Alignment >:: +EllpackView< Device, Index, Organization, Alignment >:: getConstView() const -> const ConstViewType { return ConstViewType( segmentSize, size, alignedSize ); @@ -112,9 +112,9 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto EllpackView< Device, Index, RowMajorOrder, Alignment >:: +__cuda_callable__ auto EllpackView< Device, Index, Organization, Alignment >:: getSegmentsCount() const -> IndexType { return this->size; @@ -122,9 +122,9 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto EllpackView< Device, Index, RowMajorOrder, Alignment >:: +__cuda_callable__ auto EllpackView< Device, Index, Organization, Alignment >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { return this->segmentSize; @@ -132,9 +132,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto EllpackView< Device, Index, RowMajorOrder, Alignment >:: +__cuda_callable__ auto EllpackView< Device, Index, Organization, Alignment >:: getSize() const -> IndexType { return this->size * this->segmentSize; @@ -143,9 +143,9 @@ getSize() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto EllpackView< Device, Index, RowMajorOrder, Alignment >:: +__cuda_callable__ auto EllpackView< Device, Index, Organization, Alignment >:: getStorageSize() const -> IndexType { return this->alignedSize * this->segmentSize; @@ -153,12 +153,12 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto EllpackView< Device, Index, RowMajorOrder, Alignment >:: +__cuda_callable__ auto EllpackView< Device, Index, Organization, Alignment >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return segmentIdx * this->segmentSize + localIdx; else return segmentIdx + this->alignedSize * localIdx; @@ -166,21 +166,21 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ void EllpackView< Device, Index, RowMajorOrder, Alignment >:: +__cuda_callable__ void EllpackView< Device, Index, Organization, Alignment >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -__cuda_callable__ auto EllpackView< Device, Index, RowMajorOrder, Alignment >:: +__cuda_callable__ auto EllpackView< Device, Index, Organization, Alignment >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return SegmentViewType( segmentIdx * this->segmentSize, this->segmentSize, 1 ); else return SegmentViewType( segmentIdx, this->segmentSize, this->alignedSize ); @@ -188,13 +188,13 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > template< typename Function, typename... Args > -void EllpackView< Device, Index, RowMajorOrder, Alignment >:: +void EllpackView< Device, Index, Organization, Alignment >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { @@ -225,10 +225,10 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > template< typename Function, typename... Args > -void EllpackView< Device, Index, RowMajorOrder, Alignment >:: +void EllpackView< Device, Index, Organization, Alignment >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -236,15 +236,15 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > -void EllpackView< Device, Index, RowMajorOrder, Alignment >:: +void EllpackView< Device, Index, Organization, Alignment >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { //using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType; - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { const IndexType segmentSize = this->segmentSize; auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { @@ -279,10 +279,10 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > -void EllpackView< Device, Index, RowMajorOrder, Alignment >:: +void EllpackView< Device, Index, Organization, Alignment >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -290,11 +290,11 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -EllpackView< Device, Index, RowMajorOrder, Alignment >& -EllpackView< Device, Index, RowMajorOrder, Alignment >:: -operator=( const EllpackView< Device, Index, RowMajorOrder, Alignment >& view ) +EllpackView< Device, Index, Organization, Alignment >& +EllpackView< Device, Index, Organization, Alignment >:: +operator=( const EllpackView< Device, Index, Organization, Alignment >& view ) { this->segmentSize = view.segmentSize; this->size = view.size; @@ -304,9 +304,9 @@ operator=( const EllpackView< Device, Index, RowMajorOrder, Alignment >& view ) template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -void EllpackView< Device, Index, RowMajorOrder, Alignment >:: +void EllpackView< Device, Index, Organization, Alignment >:: save( File& file ) const { file.save( &segmentSize ); @@ -316,9 +316,9 @@ save( File& file ) const template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int Alignment > -void EllpackView< Device, Index, RowMajorOrder, Alignment >:: +void EllpackView< Device, Index, Organization, Alignment >:: load( File& file ) { file.load( &segmentSize ); diff --git a/src/TNL/Containers/Segments/SegmentView.h b/src/TNL/Containers/Segments/SegmentView.h index eeb3f9d24..42d525fe0 100644 --- a/src/TNL/Containers/Segments/SegmentView.h +++ b/src/TNL/Containers/Segments/SegmentView.h @@ -10,16 +10,18 @@ #pragma once +#include + namespace TNL { namespace Containers { namespace Segments { template< typename Index, - bool RowMajorOrder = false > + ElementsOrganization Organization > class SegmentView; template< typename Index > -class SegmentView< Index, false > +class SegmentView< Index, ColumnMajorOrder > { public: @@ -54,7 +56,7 @@ class SegmentView< Index, false > }; template< typename Index > -class SegmentView< Index, true > +class SegmentView< Index, RowMajorOrder > { public: diff --git a/src/TNL/Containers/Segments/SlicedEllpack.h b/src/TNL/Containers/Segments/SlicedEllpack.h index 19c1b8eb4..c85e45742 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.h +++ b/src/TNL/Containers/Segments/SlicedEllpack.h @@ -22,7 +22,7 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index >, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), int SliceSize = 32 > class SlicedEllpack { @@ -32,12 +32,12 @@ class SlicedEllpack using IndexType = std::remove_const_t< Index >; using OffsetsHolder = Containers::Vector< Index, DeviceType, IndexType, IndexAllocator >; static constexpr int getSliceSize() { return SliceSize; } - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } - using ViewType = SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >; + static constexpr bool getOrganization() { return Organization; } + using ViewType = SlicedEllpackView< Device, Index, Organization, SliceSize >; template< typename Device_, typename Index_ > - using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >; - using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, RowMajorOrder, SliceSize >; - using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + using ViewTemplate = SlicedEllpackView< Device_, Index_, Organization, SliceSize >; + using ConstViewType = SlicedEllpackView< Device, std::add_const_t< Index >, Organization, SliceSize >; + using SegmentViewType = SegmentView< IndexType, Organization >; SlicedEllpack(); @@ -112,8 +112,8 @@ class SlicedEllpack SlicedEllpack& operator=( const SlicedEllpack& source ) = default; - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > - SlicedEllpack& operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ); + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ > + SlicedEllpack& operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, Organization_, SliceSize >& source ); void save( File& file ) const; diff --git a/src/TNL/Containers/Segments/SlicedEllpack.hpp b/src/TNL/Containers/Segments/SlicedEllpack.hpp index e76e6d430..6a9bc4fe2 100644 --- a/src/TNL/Containers/Segments/SlicedEllpack.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpack.hpp @@ -23,9 +23,9 @@ namespace TNL { template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: SlicedEllpack() : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { @@ -34,9 +34,9 @@ SlicedEllpack() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { @@ -46,9 +46,9 @@ SlicedEllpack( const Vector< IndexType, DeviceType, IndexType >& sizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: SlicedEllpack( const SlicedEllpack& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), @@ -59,9 +59,9 @@ SlicedEllpack( const SlicedEllpack& slicedEllpack ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: SlicedEllpack( const SlicedEllpack&& slicedEllpack ) : size( slicedEllpack.size ), alignedSize( slicedEllpack.alignedSize ), segmentsCount( slicedEllpack.segmentsCount ), sliceOffsets( slicedEllpack.sliceOffsets ), @@ -72,10 +72,10 @@ SlicedEllpack( const SlicedEllpack&& slicedEllpack ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > String -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getSerializationType() { return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; @@ -84,10 +84,10 @@ getSerializationType() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > String -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getSegmentsType() { return ViewType::getSegmentsType(); @@ -96,10 +96,10 @@ getSegmentsType() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -typename SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >::ViewType -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +typename SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >::ViewType +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getView() { return ViewType( size, alignedSize, segmentsCount, sliceOffsets.getView(), sliceSegmentSizes.getView() ); @@ -108,10 +108,10 @@ getView() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > auto -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getConstView() const -> const ConstViewType { return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); @@ -120,11 +120,11 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > template< typename SizesHolder > void -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: setSegmentsSizes( const SizesHolder& sizes ) { this->segmentsCount = sizes.getSize(); @@ -132,7 +132,7 @@ setSegmentsSizes( const SizesHolder& sizes ) this->sliceOffsets.setSize( slicesCount + 1 ); this->sliceOffsets = 0; this->sliceSegmentSizes.setSize( slicesCount ); - Ellpack< DeviceType, IndexType, IndexAllocator, true > ellpack; + Ellpack< DeviceType, IndexType, IndexAllocator, RowMajorOrder > ellpack; ellpack.setSegmentsSizes( slicesCount, SliceSize ); const IndexType _size = sizes.getSize(); @@ -160,10 +160,10 @@ setSegmentsSizes( const SizesHolder& sizes ) template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > void -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: reset() { this->size = 0; @@ -176,9 +176,9 @@ reset() template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getSegmentsCount() const -> IndexType { return this->segmentsCount; @@ -187,9 +187,9 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { const Index sliceIdx = segmentIdx / SliceSize; @@ -208,9 +208,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getSize() const -> IndexType { return this->size; @@ -219,9 +219,9 @@ getSize() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getStorageSize() const -> IndexType { return this->alignedSize; @@ -230,9 +230,9 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { const IndexType sliceIdx = segmentIdx / SliceSize; @@ -253,7 +253,7 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp segmentSize = this->sliceSegmentSizes.getElement( sliceIdx ); #endif } - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return sliceOffset + segmentInSliceIdx * segmentSize + localIdx; else return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; @@ -262,11 +262,11 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ void -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } @@ -274,11 +274,11 @@ getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localI template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ auto -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { const IndexType sliceIdx = segmentIdx / SliceSize; @@ -286,7 +286,7 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ]; const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); else return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); @@ -295,11 +295,11 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > template< typename Function, typename... Args > void -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { this->getConstView().forSegments( first, last, f, args... ); @@ -308,11 +308,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > template< typename Function, typename... Args > void -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -321,11 +321,11 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->getConstView().segmentsReduction( first, last, fetch, reduction, keeper, zero, args... ); @@ -334,11 +334,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -347,12 +347,12 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > - template< typename Device_, typename Index_, typename IndexAllocator_, bool RowMajorOrder_ > -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >& -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: -operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_, SliceSize >& source ) + template< typename Device_, typename Index_, typename IndexAllocator_, ElementsOrganization Organization_ > +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >& +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: +operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, Organization_, SliceSize >& source ) { this->size = source.size; this->alignedSize = source.alignedSize; @@ -365,10 +365,10 @@ operator=( const SlicedEllpack< Device_, Index_, IndexAllocator_, RowMajorOrder_ template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > void -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: save( File& file ) const { file.save( &size ); @@ -381,10 +381,10 @@ save( File& file ) const template< typename Device, typename Index, typename IndexAllocator, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > void -SlicedEllpack< Device, Index, IndexAllocator, RowMajorOrder, SliceSize >:: +SlicedEllpack< Device, Index, IndexAllocator, Organization, SliceSize >:: load( File& file ) { file.load( &size ); diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.h b/src/TNL/Containers/Segments/SlicedEllpackView.h index 4ed62ebef..2a0fcb189 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.h +++ b/src/TNL/Containers/Segments/SlicedEllpackView.h @@ -13,6 +13,7 @@ #include #include +#include #include namespace TNL { @@ -21,7 +22,7 @@ namespace TNL { template< typename Device, typename Index, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), int SliceSize = 32 > class SlicedEllpackView { @@ -31,12 +32,12 @@ class SlicedEllpackView using IndexType = std::remove_const_t< Index >; using OffsetsView = typename Containers::VectorView< Index, DeviceType, IndexType >; static constexpr int getSliceSize() { return SliceSize; } - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + static constexpr bool getOrganization() { return Organization; } template< typename Device_, typename Index_ > - using ViewTemplate = SlicedEllpackView< Device_, Index_, RowMajorOrder, SliceSize >; + using ViewTemplate = SlicedEllpackView< Device_, Index_, Organization, SliceSize >; using ViewType = SlicedEllpackView; using ConstViewType = ViewType; - using SegmentViewType = SegmentView< IndexType, RowMajorOrder >; + using SegmentViewType = SegmentView< IndexType, Organization >; __cuda_callable__ SlicedEllpackView(); diff --git a/src/TNL/Containers/Segments/SlicedEllpackView.hpp b/src/TNL/Containers/Segments/SlicedEllpackView.hpp index 258b87754..7331181ef 100644 --- a/src/TNL/Containers/Segments/SlicedEllpackView.hpp +++ b/src/TNL/Containers/Segments/SlicedEllpackView.hpp @@ -24,10 +24,10 @@ namespace TNL { template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: SlicedEllpackView() : size( 0 ), alignedSize( 0 ), segmentsCount( 0 ) { @@ -35,10 +35,10 @@ SlicedEllpackView() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: SlicedEllpackView( IndexType size, IndexType alignedSize, IndexType segmentsCount, @@ -51,10 +51,10 @@ SlicedEllpackView( IndexType size, template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: SlicedEllpackView( const SlicedEllpackView& slicedEllpackView ) : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ), segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ), @@ -64,10 +64,10 @@ SlicedEllpackView( const SlicedEllpackView& slicedEllpackView ) template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ) : size( slicedEllpackView.size ), alignedSize( slicedEllpackView.alignedSize ), segmentsCount( slicedEllpackView.segmentsCount ), sliceOffsets( slicedEllpackView.sliceOffsets ), @@ -77,10 +77,10 @@ SlicedEllpackView( const SlicedEllpackView&& slicedEllpackView ) template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > String -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: getSerializationType() { return "SlicedEllpack< [any_device], " + TNL::getSerializationType< IndexType >() + " >"; @@ -88,10 +88,10 @@ getSerializationType() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > String -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: getSegmentsType() { return "SlicedEllpack"; @@ -99,11 +99,11 @@ getSegmentsType() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ -typename SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >::ViewType -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +typename SlicedEllpackView< Device, Index, Organization, SliceSize >::ViewType +SlicedEllpackView< Device, Index, Organization, SliceSize >:: getView() { return ViewType( size, alignedSize, segmentsCount, sliceOffsets, sliceSegmentSizes ); @@ -111,11 +111,11 @@ getView() template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ auto -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: getConstView() const -> const ConstViewType { return ConstViewType( size, alignedSize, segmentsCount, sliceOffsets.getConstView(), sliceSegmentSizes.getConstView() ); @@ -123,9 +123,9 @@ getConstView() const -> const ConstViewType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpackView< Device, Index, Organization, SliceSize >:: getSegmentsCount() const -> IndexType { return this->segmentsCount; @@ -133,9 +133,9 @@ getSegmentsCount() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpackView< Device, Index, Organization, SliceSize >:: getSegmentSize( const IndexType segmentIdx ) const -> IndexType { const Index sliceIdx = segmentIdx / SliceSize; @@ -153,9 +153,9 @@ getSegmentSize( const IndexType segmentIdx ) const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpackView< Device, Index, Organization, SliceSize >:: getSize() const -> IndexType { return this->size; @@ -163,9 +163,9 @@ getSize() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpackView< Device, Index, Organization, SliceSize >:: getStorageSize() const -> IndexType { return this->alignedSize; @@ -173,9 +173,9 @@ getStorageSize() const -> IndexType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -__cuda_callable__ auto SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +__cuda_callable__ auto SlicedEllpackView< Device, Index, Organization, SliceSize >:: getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexType { const IndexType sliceIdx = segmentIdx / SliceSize; @@ -196,7 +196,7 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp segmentSize = this->sliceSegmentSizes.getElement( sliceIdx ); #endif } - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return sliceOffset + segmentInSliceIdx * segmentSize + localIdx; else return sliceOffset + segmentInSliceIdx + SliceSize * localIdx; @@ -204,22 +204,22 @@ getGlobalIndex( const Index segmentIdx, const Index localIdx ) const -> IndexTyp template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ void -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: getSegmentAndLocalIndex( const Index globalIdx, Index& segmentIdx, Index& localIdx ) const { } template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > __cuda_callable__ auto -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType { const IndexType sliceIdx = segmentIdx / SliceSize; @@ -227,7 +227,7 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType const IndexType& sliceOffset = this->sliceOffsets[ sliceIdx ]; const IndexType& segmentSize = this->sliceSegmentSizes[ sliceIdx ]; - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return SegmentViewType( sliceOffset + segmentInSliceIdx * segmentSize, segmentSize, 1 ); else return SegmentViewType( sliceOffset + segmentInSliceIdx, segmentSize, SliceSize ); @@ -235,16 +235,16 @@ getSegmentView( const IndexType segmentIdx ) const -> SegmentViewType template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > template< typename Function, typename... Args > void -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: forSegments( IndexType first, IndexType last, Function& f, Args... args ) const { const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType sliceIdx = segmentIdx / SliceSize; @@ -278,11 +278,11 @@ forSegments( IndexType first, IndexType last, Function& f, Args... args ) const template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > template< typename Function, typename... Args > void -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: forAll( Function& f, Args... args ) const { this->forSegments( 0, this->getSegmentsCount(), f, args... ); @@ -290,18 +290,18 @@ forAll( Function& f, Args... args ) const template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { using RealType = typename details::FetchLambdaAdapter< Index, Fetch >::ReturnType; //using RealType = decltype( fetch( IndexType(), IndexType(), IndexType(), std::declval< bool& >(), args... ) ); const auto sliceSegmentSizes_view = this->sliceSegmentSizes.getConstView(); const auto sliceOffsets_view = this->sliceOffsets.getConstView(); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { auto l = [=] __cuda_callable__ ( const IndexType segmentIdx, Args... args ) mutable { const IndexType sliceIdx = segmentIdx / SliceSize; @@ -339,11 +339,11 @@ segmentsReduction( IndexType first, IndexType last, Fetch& fetch, const Reductio template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > template< typename Fetch, typename Reduction, typename ResultKeeper, typename Real, typename... Args > void -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, const Real& zero, Args... args ) const { this->segmentsReduction( 0, this->getSegmentsCount(), fetch, reduction, keeper, zero, args... ); @@ -351,11 +351,11 @@ allReduction( Fetch& fetch, const Reduction& reduction, ResultKeeper& keeper, co template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: -operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& view ) +SlicedEllpackView< Device, Index, Organization, SliceSize >& +SlicedEllpackView< Device, Index, Organization, SliceSize >:: +operator=( const SlicedEllpackView< Device, Index, Organization, SliceSize >& view ) { this->size = view.size; this->alignedSize = view.alignedSize; @@ -367,10 +367,10 @@ operator=( const SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >& v template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > void -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: save( File& file ) const { file.save( &size ); @@ -382,10 +382,10 @@ save( File& file ) const template< typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, int SliceSize > void -SlicedEllpackView< Device, Index, RowMajorOrder, SliceSize >:: +SlicedEllpackView< Device, Index, Organization, SliceSize >:: load( File& file ) { file.load( &size ); diff --git a/src/TNL/Containers/Segments/details/BiEllpack.h b/src/TNL/Containers/Segments/details/BiEllpack.h index fe2701f07..480e7a359 100644 --- a/src/TNL/Containers/Segments/details/BiEllpack.h +++ b/src/TNL/Containers/Segments/details/BiEllpack.h @@ -22,7 +22,7 @@ namespace TNL { template< typename Index, typename Device, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), int WarpSize = 32 > class BiEllpack { @@ -30,12 +30,12 @@ class BiEllpack using DeviceType = Device; using IndexType = Index; - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + static constexpr bool getOrganization() { return Organization; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using OffsetsHolderView = typename OffsetsHolder::ViewType; using ConstOffsetsHolderView = typename OffsetsHolderView::ConstViewType; using SegmentsSizes = OffsetsHolder; - using SegmentViewType = BiEllpackSegmentView< IndexType, RowMajorOrder >; + using SegmentViewType = BiEllpackSegmentView< IndexType, Organization >; static constexpr int getWarpSize() { return WarpSize; }; @@ -165,7 +165,7 @@ class BiEllpack } else { - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return globalIdx + rowStripPerm * groupWidth + localIdx; else return globalIdx + rowStripPerm + localIdx * groupHeight; @@ -202,7 +202,7 @@ class BiEllpack } else { - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) { return globalIdx + rowStripPerm * groupWidth + localIdx; } diff --git a/src/TNL/Containers/Segments/details/ChunkedEllpack.h b/src/TNL/Containers/Segments/details/ChunkedEllpack.h index 14e181c7e..692b18d5f 100644 --- a/src/TNL/Containers/Segments/details/ChunkedEllpack.h +++ b/src/TNL/Containers/Segments/details/ChunkedEllpack.h @@ -55,14 +55,14 @@ struct ChunkedEllpackSliceInfo template< typename Index, typename Device, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > class ChunkedEllpack { public: using DeviceType = Device; using IndexType = Index; - static constexpr bool getRowMajorOrder() { return RowMajorOrder; } + static constexpr ElementsOrganization getOrganization() { return Organization; } using OffsetsHolder = Containers::Vector< IndexType, DeviceType, IndexType >; using OffsetsHolderView = typename OffsetsHolder::ViewType; using SegmentsSizes = OffsetsHolder; @@ -70,7 +70,7 @@ class ChunkedEllpack using ChunkedEllpackSliceInfoAllocator = typename Allocators::Default< Device >::template Allocator< ChunkedEllpackSliceInfoType >; using ChunkedEllpackSliceInfoContainer = Containers::Array< ChunkedEllpackSliceInfoType, DeviceType, IndexType, ChunkedEllpackSliceInfoAllocator >; using ChunkedEllpackSliceInfoContainerView = typename ChunkedEllpackSliceInfoContainer::ViewType; - using SegmentViewType = ChunkedEllpackSegmentView< IndexType, RowMajorOrder >; + using SegmentViewType = ChunkedEllpackSegmentView< IndexType, Organization >; __cuda_callable__ static IndexType getSegmentSizeDirect( const OffsetsHolderView& segmentsToSlicesMapping, @@ -125,7 +125,7 @@ class ChunkedEllpack const IndexType chunkSize = slices[ sliceIndex ].chunkSize; TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" ); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return sliceOffset + firstChunkOfSegment * chunkSize + localIdx; else { @@ -154,7 +154,7 @@ class ChunkedEllpack const IndexType chunkSize = slices.getElement( sliceIndex ).chunkSize; TNL_ASSERT_LE( localIdx, segmentChunksCount * chunkSize, "" ); - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return sliceOffset + firstChunkOfSegment * chunkSize + localIdx; else { @@ -182,7 +182,7 @@ class ChunkedEllpack const IndexType chunkSize = slices[ sliceIndex ].chunkSize; const IndexType segmentSize = segmentChunksCount * chunkSize; - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return SegmentViewType( sliceOffset + firstChunkOfSegment * chunkSize, segmentSize, chunkSize, @@ -212,7 +212,7 @@ class ChunkedEllpack const IndexType chunkSize = slices.getElement( sliceIndex ).chunkSize; const IndexType segmentSize = segmentChunksCount * chunkSize; - if( RowMajorOrder ) + if( Organization == RowMajorOrder ) return SegmentViewType( sliceOffset + firstChunkOfSegment * chunkSize, segmentSize, chunkSize, diff --git a/src/TNL/Matrices/DenseMatrix.h b/src/TNL/Matrices/DenseMatrix.h index 81a6e26a8..392bdc617 100644 --- a/src/TNL/Matrices/DenseMatrix.h +++ b/src/TNL/Matrices/DenseMatrix.h @@ -26,14 +26,14 @@ namespace Matrices { * \tparam Real is a type of matrix elements. * \tparam Device is a device where the matrix is allocated. * \tparam Index is a type for indexing of the matrix elements. - * \tparam RowMajorOrder tells the ordering of matrix elements. If it is \e true the matrix elements - * are stored in row major order. If it is \e false, the matrix elements are stored in column major order. + * \tparam Organization tells the ordering of matrix elements. It is either RowMajorOrder + * or ColumnMajorOrder. * \tparam RealAllocator is allocator for the matrix elements. */ template< typename Real = double, typename Device = Devices::Host, typename Index = int, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class DenseMatrix : public Matrix< Real, Device, Index > { @@ -41,10 +41,9 @@ class DenseMatrix : public Matrix< Real, Device, Index > using BaseType = Matrix< Real, Device, Index, RealAllocator >; using ValuesVectorType = typename BaseType::ValuesVectorType; using ValuesViewType = typename ValuesVectorType::ViewType; - using SegmentsType = Containers::Segments::Ellpack< Device, Index, typename Allocators::Default< Device >::template Allocator< Index >, RowMajorOrder, 1 >; + using SegmentsType = Containers::Segments::Ellpack< Device, Index, typename Allocators::Default< Device >::template Allocator< Index >, Organization, 1 >; using SegmentViewType = typename SegmentsType::SegmentViewType; - public: /** @@ -62,6 +61,13 @@ class DenseMatrix : public Matrix< Real, Device, Index > */ using IndexType = Index; + /** + * \brief Matrix elements organization getter. + * + * \return matrix elements organization - RowMajorOrder of ColumnMajorOrder. + */ + static constexpr ElementsOrganization getOrganization() { return Organization; }; + /** * \brief The allocator for matrix elements. */ @@ -72,14 +78,14 @@ class DenseMatrix : public Matrix< Real, Device, Index > * * See \ref DenseMatrixView. */ - using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; + using ViewType = DenseMatrixView< Real, Device, Index, Organization >; /** * \brief Matrix view type for constant instances. * * See \ref DenseMatrixView. */ - using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; /** * \brief Type for accessing matrix row. @@ -92,9 +98,9 @@ class DenseMatrix : public Matrix< Real, Device, Index > template< typename _Real = Real, typename _Device = Device, typename _Index = Index, - bool _RowMajorOrder = RowMajorOrder, + ElementsOrganization _Organization = Organization, typename _RealAllocator = RealAllocator > - using Self = DenseMatrix< _Real, _Device, _Index, _RowMajorOrder, _RealAllocator >; + using Self = DenseMatrix< _Real, _Device, _Index, _Organization, _RealAllocator >; /** * \brief Constructor without parameters. @@ -587,8 +593,8 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \return reference to this matrix. */ template< typename RHSReal, typename RHSDevice, typename RHSIndex, - bool RHSRowMajorOrder, typename RHSRealAllocator > - DenseMatrix& operator=( const DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ); + ElementsOrganization RHSOrganization, typename RHSRealAllocator > + DenseMatrix& operator=( const DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSOrganization, RHSRealAllocator >& matrix ); /** * \brief Assignment operator for other (sparse) types of matrices. @@ -606,7 +612,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \return \e true if the RHS matrix is equal, \e false otherwise. */ template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > - bool operator==( const DenseMatrix< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; + bool operator==( const DenseMatrix< Real_, Device_, Index_, Organization >& matrix ) const; /** * \brief Comparison operator with another dense matrix. @@ -615,7 +621,7 @@ class DenseMatrix : public Matrix< Real, Device, Index > * \return \e false if the RHS matrix is equal, \e true otherwise. */ template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > - bool operator!=( const DenseMatrix< Real_, Device_, Index_, RowMajorOrder >& matrix ) const; + bool operator!=( const DenseMatrix< Real_, Device_, Index_, Organization >& matrix ) const; /** * \brief Method for saving the matrix to the file with given filename. @@ -673,9 +679,9 @@ class DenseMatrix : public Matrix< Real, Device, Index > template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -std::ostream& operator<< ( std::ostream& str, const DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix ); +std::ostream& operator<< ( std::ostream& str, const DenseMatrix< Real, Device, Index, Organization, RealAllocator >& matrix ); } // namespace Matrices } // namespace TNL diff --git a/src/TNL/Matrices/DenseMatrix.hpp b/src/TNL/Matrices/DenseMatrix.hpp index da6957a9c..2b2fcc996 100644 --- a/src/TNL/Matrices/DenseMatrix.hpp +++ b/src/TNL/Matrices/DenseMatrix.hpp @@ -20,18 +20,18 @@ namespace Matrices { template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::DenseMatrix() +DenseMatrix< Real, Device, Index, Organization, RealAllocator >::DenseMatrix() { } template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: DenseMatrix( const IndexType rows, const IndexType columns ) { this->setDimensions( rows, columns ); @@ -40,10 +40,10 @@ DenseMatrix( const IndexType rows, const IndexType columns ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Value > -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: DenseMatrix( std::initializer_list< std::initializer_list< Value > > data ) { this->setElements( data ); @@ -52,11 +52,11 @@ DenseMatrix( std::initializer_list< std::initializer_list< Value > > data ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Value > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: setElements( std::initializer_list< std::initializer_list< Value > > data ) { IndexType rows = data.size(); @@ -93,10 +93,10 @@ setElements( std::initializer_list< std::initializer_list< Value > > data ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > auto -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getView() -> ViewType { return ViewType( this->getRows(), @@ -107,10 +107,10 @@ getView() -> ViewType template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > auto -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getConstView() const -> ConstViewType { return ConstViewType( this->getRows(), @@ -121,10 +121,10 @@ getConstView() const -> ConstViewType template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > String -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getSerializationType() { return ViewType::getSerializationType(); @@ -133,10 +133,10 @@ getSerializationType() template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > String -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getSerializationTypeVirtual() const { return this->getSerializationType(); @@ -145,10 +145,10 @@ getSerializationTypeVirtual() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: setDimensions( const IndexType rows, const IndexType columns ) { @@ -162,11 +162,11 @@ setDimensions( const IndexType rows, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Matrix_ > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: setLike( const Matrix_& matrix ) { this->setDimensions( matrix.getRows(), matrix.getColumns() ); @@ -175,11 +175,11 @@ setLike( const Matrix_& matrix ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename RowCapacitiesVector > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: setRowCapacities( const RowCapacitiesVector& rowCapacities ) { TNL_ASSERT_EQ( rowCapacities.getSize(), this->getRows(), "" ); @@ -189,11 +189,11 @@ setRowCapacities( const RowCapacitiesVector& rowCapacities ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename RowLengthsVector > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getCompressedRowLengths( RowLengthsVector& rowLengths ) const { this->view.getCompressedRowLengths( rowLengths ); @@ -202,10 +202,10 @@ getCompressedRowLengths( RowLengthsVector& rowLengths ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > Index -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getElementsCount() const { return this->getRows() * this->getColumns(); @@ -214,10 +214,10 @@ getElementsCount() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > Index -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getNonzeroElementsCount() const { return this->view.getNonzeroElementsCount(); @@ -226,10 +226,10 @@ getNonzeroElementsCount() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); @@ -239,10 +239,10 @@ reset() template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: setValue( const Real& value ) { this->view.setValue( value ); @@ -251,10 +251,10 @@ setValue( const Real& value ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ auto -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { return this->view.getRow( rowIdx ); @@ -263,10 +263,10 @@ getRow( const IndexType& rowIdx ) const -> const RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ auto -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { return this->view.getRow( rowIdx ); @@ -275,10 +275,10 @@ getRow( const IndexType& rowIdx ) -> RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ -Real& DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row, +Real& DenseMatrix< Real, Device, Index, Organization, RealAllocator >::operator()( const IndexType row, const IndexType column ) { return this->view.operator()( row, column ); @@ -287,10 +287,10 @@ Real& DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::operator template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ -const Real& DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::operator()( const IndexType row, +const Real& DenseMatrix< Real, Device, Index, Organization, RealAllocator >::operator()( const IndexType row, const IndexType column ) const { return this->view.operator()( row, column ); @@ -299,10 +299,10 @@ const Real& DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::op template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) @@ -313,10 +313,10 @@ setElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -328,10 +328,10 @@ addElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ Real -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { @@ -341,11 +341,11 @@ getElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const { this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); @@ -354,11 +354,11 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& redu template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); @@ -367,11 +367,11 @@ allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchRea template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Function > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { this->view.forRows( first, last, function ); @@ -380,11 +380,11 @@ forRows( IndexType first, IndexType last, Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Function > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { this->view.forRows( first, last, function ); @@ -393,11 +393,11 @@ forRows( IndexType first, IndexType last, Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Function > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: forAllRows( Function& function ) const { this->forRows( 0, this->getRows(), function ); @@ -406,11 +406,11 @@ forAllRows( Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Function > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: forAllRows( Function& function ) { this->forRows( 0, this->getRows(), function ); @@ -419,12 +419,12 @@ forAllRows( Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename InVector, typename OutVector > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, @@ -438,11 +438,11 @@ vectorProduct( const InVector& inVector, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Matrix > void -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: addMatrix( const Matrix& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) @@ -463,7 +463,7 @@ addMatrix( const Matrix& matrix, #ifdef HAVE_CUDA template< typename Real, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename Matrix1, typename Matrix2, @@ -564,10 +564,10 @@ __global__ void DenseMatrixProductKernel( DenseMatrix< Real, Devices::Cuda, Inde template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Matrix1, typename Matrix2, int tileDim > -void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::getMatrixProduct( const Matrix1& matrix1, +void DenseMatrix< Real, Device, Index, Organization, RealAllocator >::getMatrixProduct( const Matrix1& matrix1, const Matrix2& matrix2, const RealType& matrix1Multiplicator, const RealType& matrix2Multiplicator ) @@ -655,7 +655,7 @@ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::getMatrix template< typename Real, typename Index, typename Matrix, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, int tileDim, int tileRowBlockSize > @@ -725,7 +725,7 @@ __global__ void DenseTranspositionAlignedKernel( DenseMatrix< Real, Devices::Cud template< typename Real, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename Matrix, int tileDim, @@ -808,10 +808,10 @@ __global__ void DenseTranspositionNonAlignedKernel( DenseMatrix< Real, Devices:: template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Matrix, int tileDim > -void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Matrix& matrix, +void DenseMatrix< Real, Device, Index, Organization, RealAllocator >::getTransposition( const Matrix& matrix, const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getColumns() == matrix.getRows() && @@ -901,10 +901,10 @@ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransp template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Vector1, typename Vector2 > -void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b, +void DenseMatrix< Real, Device, Index, Organization, RealAllocator >::performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega ) const @@ -923,11 +923,11 @@ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::performSO template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >& -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: -operator=( const DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix ) +DenseMatrix< Real, Device, Index, Organization, RealAllocator >& +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: +operator=( const DenseMatrix< Real, Device, Index, Organization, RealAllocator >& matrix ) { setLike( matrix ); this->values = matrix.values; @@ -937,21 +937,21 @@ operator=( const DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename RHSReal, typename RHSDevice, typename RHSIndex, - bool RHSRowMajorOrder, typename RHSRealAllocator > -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >& -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: -operator=( const DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >& matrix ) + ElementsOrganization RHSOrganization, typename RHSRealAllocator > +DenseMatrix< Real, Device, Index, Organization, RealAllocator >& +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: +operator=( const DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSOrganization, RHSRealAllocator >& matrix ) { - using RHSMatrix = DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RHSRealAllocator >; + using RHSMatrix = DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSOrganization, RHSRealAllocator >; using RHSIndexType = typename RHSMatrix::IndexType; using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; this->setLike( matrix ); - if( RowMajorOrder == RHSRowMajorOrder ) + if( Organization == RHSOrganization ) { this->values = matrix.getValues(); return *this; @@ -1010,11 +1010,11 @@ operator=( const DenseMatrix< RHSReal, RHSDevice, RHSIndex, RHSRowMajorOrder, RH template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename RHSMatrix > -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >& -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >& +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: operator=( const RHSMatrix& matrix ) { using RHSIndexType = typename RHSMatrix::IndexType; @@ -1100,12 +1100,12 @@ operator=( const RHSMatrix& matrix ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: -operator==( const DenseMatrix< Real_, Device_, Index_, RowMajorOrder >& matrix ) const +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: +operator==( const DenseMatrix< Real_, Device_, Index_, Organization >& matrix ) const { return( this->getRows() == matrix.getRows() && this->getColumns() == matrix.getColumns() && @@ -1115,12 +1115,12 @@ operator==( const DenseMatrix< Real_, Device_, Index_, RowMajorOrder >& matrix ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Real_, typename Device_, typename Index_, typename RealAllocator_ > bool -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: -operator!=( const DenseMatrix< Real_, Device_, Index_, RowMajorOrder >& matrix ) const +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: +operator!=( const DenseMatrix< Real_, Device_, Index_, Organization >& matrix ) const { return ! ( *this == matrix ); } @@ -1128,9 +1128,9 @@ operator!=( const DenseMatrix< Real_, Device_, Index_, RowMajorOrder >& matrix ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const +void DenseMatrix< Real, Device, Index, Organization, RealAllocator >::save( const String& fileName ) const { this->view.save( fileName ); } @@ -1138,9 +1138,9 @@ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::save( con template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName ) +void DenseMatrix< Real, Device, Index, Organization, RealAllocator >::load( const String& fileName ) { Object::load( fileName ); } @@ -1148,9 +1148,9 @@ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::load( con template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const +void DenseMatrix< Real, Device, Index, Organization, RealAllocator >::save( File& file ) const { this->view.save( file ); } @@ -1158,9 +1158,9 @@ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::save( Fil template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file ) +void DenseMatrix< Real, Device, Index, Organization, RealAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); this->segments.load( file ); @@ -1170,9 +1170,9 @@ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::load( Fil template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::print( std::ostream& str ) const +void DenseMatrix< Real, Device, Index, Organization, RealAllocator >::print( std::ostream& str ) const { this->view.print( str ); } @@ -1180,11 +1180,11 @@ void DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::print( st template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ Index -DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >:: +DenseMatrix< Real, Device, Index, Organization, RealAllocator >:: getElementIndex( const IndexType row, const IndexType column ) const { return this->segments.getGlobalIndex( row, column ); @@ -1193,9 +1193,9 @@ getElementIndex( const IndexType row, const IndexType column ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -std::ostream& operator<< ( std::ostream& str, const DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix ) +std::ostream& operator<< ( std::ostream& str, const DenseMatrix< Real, Device, Index, Organization, RealAllocator >& matrix ) { matrix.print( str ); return str; diff --git a/src/TNL/Matrices/DenseMatrixView.h b/src/TNL/Matrices/DenseMatrixView.h index 65bcaeb42..e09253cb2 100644 --- a/src/TNL/Matrices/DenseMatrixView.h +++ b/src/TNL/Matrices/DenseMatrixView.h @@ -14,64 +14,138 @@ #include #include #include +#include #include namespace TNL { namespace Matrices { +/** + * \brief Implementation of dense matrix view. + * + * It serves as an accessor to \ref DenseMatrix for example when passing the + * matrix to lambda functions. DenseMatrix view can be also created in CUDA kernels. + * + * \tparam Real is a type of matrix elements. + * \tparam Device is a device where the matrix is allocated. + * \tparam Index is a type for indexing of the matrix elements. + * \tparam MatrixElementsOrganization tells the ordering of matrix elements. It is either RowMajorOrder + * or ColumnMajorOrder. + * + * See \ref DenseMatrix. + */ template< typename Real = double, typename Device = Devices::Host, typename Index = int, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > class DenseMatrixView : public MatrixView< Real, Device, Index > { - private: - // convenient template alias for controlling the selection of copy-assignment operator - template< typename Device2 > - using Enabler = std::enable_if< ! std::is_same< Device2, Device >::value >; - - // friend class will be needed for templated assignment operators - //template< typename Real2, typename Device2, typename Index2 > - //friend class Dense; + protected: + using BaseType = Matrix< Real, Device, Index >; + using ValuesVectorType = typename BaseType::ValuesVectorType; + using SegmentsType = Containers::Segments::Ellpack< Device, Index, typename Allocators::Default< Device >::template Allocator< Index >, Organization, 1 >; + using SegmentsViewType = typename SegmentsType::ViewType; + using SegmentViewType = typename SegmentsType::SegmentViewType; public: + + /** + * \brief The type of matrix elements. + */ using RealType = Real; + + /** + * \brief The device where the matrix is allocated. + */ using DeviceType = Device; + + /** + * \brief The type used for matrix elements indexing. + */ using IndexType = Index; - using BaseType = Matrix< Real, Device, Index >; - using ValuesVectorType = typename BaseType::ValuesVectorType; - using ValuesViewType = typename ValuesVectorType::ViewType; - using SegmentsType = Containers::Segments::Ellpack< DeviceType, IndexType, typename Allocators::Default< Device >::template Allocator< IndexType >, RowMajorOrder, 1 >; - using SegmentsViewType = typename SegmentsType::ViewType; - using SegmentViewType = typename SegmentsType::SegmentViewType; - using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; - using ViewType = DenseMatrixView< Real, Device, Index, RowMajorOrder >; - using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + /** + * \brief Matrix elements organization getter. + * + * \return matrix elements organization - RowMajorOrder of ColumnMajorOrder. + */ + static constexpr ElementsOrganization getOrganization() { return Organization; }; + + /** + * \brief Matrix elements container view type. + * + * Use this for embedding of the matrix elements values. + */ + using ValuesViewType = typename ValuesVectorType::ViewType; - // TODO: remove this - using CompressedRowLengthsVector = typename Matrix< Real, Device, Index >::CompressedRowLengthsVector; - using ConstCompressedRowLengthsVectorView = typename Matrix< RealType, DeviceType, IndexType >::ConstCompressedRowLengthsVectorView; + /** + * \brief Matrix view type. + * + * See \ref DenseMatrixView. + */ + using ViewType = DenseMatrixView< Real, Device, Index, Organization >; + + /** + * \brief Matrix view type for constant instances. + * + * See \ref DenseMatrixView. + */ + using ConstViewType = DenseMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; + + /** + * \brief Type for accessing matrix row. + */ + using RowView = DenseMatrixRowView< SegmentViewType, ValuesViewType >; + /** + * \brief Helper type for getting self type or its modifications. + */ template< typename _Real = Real, typename _Device = Device, typename _Index = Index > using Self = DenseMatrixView< _Real, _Device, _Index >; + /** + * \brief Constructor without parameters. + */ __cuda_callable__ DenseMatrixView(); + /** + * \brief Constructor with matrix dimensions and values. + * + * Organization of matrix elements values in + * + * \param rows number of matrix rows. + * \param columns number of matrix columns. + * \param values is vector view with matrix elements values. + */ __cuda_callable__ DenseMatrixView( const IndexType rows, const IndexType columns, const ValuesViewType& values ); + /** + * \brief Copy constructor. + * + * \param matrix is the source matrix view. + */ __cuda_callable__ - DenseMatrixView( const DenseMatrixView& m ) = default; + DenseMatrixView( const DenseMatrixView& matrix ) = default; + /** + * \brief Returns a modifiable dense matrix view. + * + * \return dense matrix view. + */ __cuda_callable__ ViewType getView(); + /** + * \brief Returns a non-modifiable dense matrix view. + * + * \return dense matrix view. + */ __cuda_callable__ ConstViewType getConstView() const; diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index c406f5254..2ba34e549 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -22,9 +22,9 @@ namespace Matrices { template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: DenseMatrixView() { } @@ -32,9 +32,9 @@ DenseMatrixView() template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: DenseMatrixView( const IndexType rows, const IndexType columns, const ValuesViewType& values ) @@ -47,25 +47,24 @@ DenseMatrixView( const IndexType rows, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getView() -> ViewType { return ViewType( this->getRows(), this->getColumns(), - this->getValues().getView(), - this->columnIndexes.getView() ); + this->getValues().getView() ); } template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getConstView() const -> ConstViewType { return ConstViewType( this->getRows(), @@ -77,23 +76,23 @@ getConstView() const -> ConstViewType template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > String -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getSerializationType() { return String( "Matrices::DenseMatrix< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + TNL::getSerializationType< IndexType >() + ", " + - ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; + ( Organization ? "true" : "false" ) + ", [any_allocator] >"; } template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > String -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getSerializationTypeVirtual() const { return this->getSerializationType(); @@ -102,10 +101,10 @@ getSerializationTypeVirtual() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Vector > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getCompressedRowLengths( Vector& rowLengths ) const { rowLengths.setSize( this->getRows() ); @@ -123,9 +122,9 @@ getCompressedRowLengths( Vector& rowLengths ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getRowLength( const IndexType row ) const { return this->getColumns(); @@ -134,9 +133,9 @@ getRowLength( const IndexType row ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getMaxRowLength() const { return this->getColumns(); @@ -145,9 +144,9 @@ getMaxRowLength() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getElementsCount() const { return this->getRows() * this->getColumns(); @@ -156,9 +155,9 @@ getElementsCount() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getNonzeroElementsCount() const { const auto values_view = this->values.getConstView(); @@ -171,9 +170,9 @@ getNonzeroElementsCount() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: setValue( const Real& value ) { this->values = value; @@ -182,9 +181,9 @@ setValue( const Real& value ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getRow( const IndexType& rowIdx ) const -> const RowView { TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); @@ -194,9 +193,9 @@ getRow( const IndexType& rowIdx ) const -> const RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getRow( const IndexType& rowIdx ) -> RowView { TNL_ASSERT_LT( rowIdx, this->getRows(), "Row index is larger than number of matrix rows." ); @@ -206,9 +205,9 @@ getRow( const IndexType& rowIdx ) -> RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row, +Real& DenseMatrixView< Real, Device, Index, Organization >::operator()( const IndexType row, const IndexType column ) { TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); @@ -222,9 +221,9 @@ Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const I template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( const IndexType row, +const Real& DenseMatrixView< Real, Device, Index, Organization >::operator()( const IndexType row, const IndexType column ) const { TNL_ASSERT_GE( row, 0, "Row index must be non-negative." ); @@ -238,9 +237,9 @@ const Real& DenseMatrixView< Real, Device, Index, RowMajorOrder >::operator()( c template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: setElement( const IndexType row, const IndexType column, const RealType& value ) @@ -251,9 +250,9 @@ setElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -271,9 +270,9 @@ addElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ Real -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: getElement( const IndexType row, const IndexType column ) const { @@ -283,10 +282,10 @@ getElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduce, typename Keep, typename FetchValue > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchValue& zero ) const { const auto values_view = this->values.getConstView(); @@ -300,10 +299,10 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, const Reduce& redu template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); @@ -312,10 +311,10 @@ allRowsReduction( Fetch& fetch, const Reduce& reduce, Keep& keep, const FetchRea template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); @@ -329,10 +328,10 @@ forRows( IndexType first, IndexType last, Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); @@ -346,10 +345,10 @@ forRows( IndexType first, IndexType last, Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: forAllRows( Function& function ) const { this->forRows( 0, this->getRows(), function ); @@ -358,10 +357,10 @@ forAllRows( Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: forAllRows( Function& function ) { this->forRows( 0, this->getRows(), function ); @@ -370,11 +369,11 @@ forAllRows( Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename InVector, typename OutVector > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: vectorProduct( const InVector& inVector, OutVector& outVector, const RealType& matrixMultiplicator, @@ -402,10 +401,10 @@ vectorProduct( const InVector& inVector, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Matrix > void -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: +DenseMatrixView< Real, Device, Index, Organization >:: addMatrix( const Matrix& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) @@ -426,9 +425,9 @@ addMatrix( const Matrix& matrix, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Matrix1, typename Matrix2, int tileDim > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( const Matrix1& matrix1, +void DenseMatrixView< Real, Device, Index, Organization >::getMatrixProduct( const Matrix1& matrix1, const Matrix2& matrix2, const RealType& matrix1Multiplicator, const RealType& matrix2Multiplicator ) @@ -516,9 +515,9 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getMatrixProduct( co template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Matrix, int tileDim > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( const Matrix& matrix, +void DenseMatrixView< Real, Device, Index, Organization >::getTransposition( const Matrix& matrix, const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getColumns() == matrix.getRows() && @@ -608,9 +607,9 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::getTransposition( co template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Vector1, typename Vector2 > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( const Vector1& b, +void DenseMatrixView< Real, Device, Index, Organization >::performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega ) const @@ -630,9 +629,9 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::performSORIteration( template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -DenseMatrixView< Real, Device, Index, RowMajorOrder >& -DenseMatrixView< Real, Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +DenseMatrixView< Real, Device, Index, Organization >& +DenseMatrixView< Real, Device, Index, Organization >:: operator=( const DenseMatrixView& matrix ) { MatrixView< Real, Device, Index >::operator=( matrix ); @@ -643,8 +642,8 @@ operator=( const DenseMatrixView& matrix ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& fileName ) const + ElementsOrganization Organization > +void DenseMatrixView< Real, Device, Index, Organization >::save( const String& fileName ) const { Object::save( fileName ); } @@ -652,8 +651,8 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( const String& template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const + ElementsOrganization Organization > +void DenseMatrixView< Real, Device, Index, Organization >::save( File& file ) const { MatrixView< Real, Device, Index >::save( file ); this->segments.save( file ); @@ -662,8 +661,8 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) c template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const + ElementsOrganization Organization > +void DenseMatrixView< Real, Device, Index, Organization >::print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) { @@ -681,9 +680,9 @@ void DenseMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ -Index DenseMatrixView< Real, Device, Index, RowMajorOrder >::getElementIndex( const IndexType row, +Index DenseMatrixView< Real, Device, Index, Organization >::getElementIndex( const IndexType row, const IndexType column ) const { return this->segments.getGlobalIndex( row, column ); diff --git a/src/TNL/Matrices/DistributedSpMV.h b/src/TNL/Matrices/DistributedSpMV.h index 55527834c..4785a6790 100644 --- a/src/TNL/Matrices/DistributedSpMV.h +++ b/src/TNL/Matrices/DistributedSpMV.h @@ -218,7 +218,7 @@ public: protected: // communication pattern - Matrices::DenseMatrix< IndexType, Devices::Host, int, true, Allocators::Host< IndexType > > commPatternStarts, commPatternEnds; + Matrices::DenseMatrix< IndexType, Devices::Host, int, Containers::Segments::RowMajorOrder, Allocators::Host< IndexType > > commPatternStarts, commPatternEnds; // span of rows with only block-diagonal entries std::pair< IndexType, IndexType > localOnlySpan; diff --git a/src/TNL/Matrices/Matrix.h b/src/TNL/Matrices/Matrix.h index ba2172168..4b954d314 100644 --- a/src/TNL/Matrices/Matrix.h +++ b/src/TNL/Matrices/Matrix.h @@ -16,6 +16,7 @@ #include #include #include +#include namespace TNL { /** @@ -23,6 +24,8 @@ namespace TNL { */ namespace Matrices { +using Containers::Segments::ElementsOrganization; + template< typename Real = double, typename Device = Devices::Host, typename Index = int, diff --git a/src/TNL/Matrices/MatrixInfo.h b/src/TNL/Matrices/MatrixInfo.h index e91e8a404..1d825a769 100644 --- a/src/TNL/Matrices/MatrixInfo.h +++ b/src/TNL/Matrices/MatrixInfo.h @@ -37,8 +37,8 @@ struct MatrixInfo template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -struct MatrixInfo< DenseMatrixView< Real, Device, Index, RowMajorOrder > > + ElementsOrganization Organization > +struct MatrixInfo< DenseMatrixView< Real, Device, Index, Organization > > { static String getDensity() { return String( "dense" ); }; }; @@ -46,10 +46,10 @@ struct MatrixInfo< DenseMatrixView< Real, Device, Index, RowMajorOrder > > template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -struct MatrixInfo< DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator > > -: public MatrixInfo< typename DenseMatrix< Real, Device, Index, RowMajorOrder, RealAllocator >::ViewType > +struct MatrixInfo< DenseMatrix< Real, Device, Index, Organization, RealAllocator > > +: public MatrixInfo< typename DenseMatrix< Real, Device, Index, Organization, RealAllocator >::ViewType > { }; diff --git a/src/TNL/Matrices/Multidiagonal.h b/src/TNL/Matrices/Multidiagonal.h index 3b92d1db1..9823a7b61 100644 --- a/src/TNL/Matrices/Multidiagonal.h +++ b/src/TNL/Matrices/Multidiagonal.h @@ -23,7 +23,7 @@ namespace Matrices { template< typename Real = double, typename Device = Devices::Host, typename Index = int, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real >, typename IndexAllocator = typename Allocators::Default< Device >::template Allocator< Index > > class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > @@ -37,12 +37,12 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > using BaseType = Matrix< Real, Device, Index, RealAllocator >; using ValuesVectorType = typename BaseType::ValuesVectorType; using ValuesViewType = typename ValuesVectorType::ViewType; - using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, Organization >; using DiagonalsShiftsType = Containers::Vector< IndexType, DeviceType, IndexType, IndexAllocatorType >; using DiagonalsShiftsView = typename DiagonalsShiftsType::ViewType; using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >; - using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >; - using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using ViewType = MultidiagonalMatrixView< Real, Device, Index, Organization >; + using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; using HostDiagonalsShiftsView = typename HostDiagonalsShiftsType::ViewType; @@ -58,7 +58,7 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > typename _Index = Index > using Self = Multidiagonal< _Real, _Device, _Index >; - static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; + static constexpr ElementsOrganization getOrganization() { return Organization; }; Multidiagonal(); @@ -100,18 +100,18 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > IndexType getMaxRowLength() const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - void setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ); + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + void setLike( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m ); IndexType getNumberOfNonzeroMatrixElements() const; void reset(); - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - bool operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + bool operator == ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - bool operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + bool operator != ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; __cuda_callable__ RowView getRow( const IndexType& rowIdx ); @@ -161,8 +161,8 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - void addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + void addMatrix( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, const RealType& matrixMultiplicator = 1.0, const RealType& thisMatrixMultiplicator = 1.0 ); @@ -184,10 +184,10 @@ class Multidiagonal : public Matrix< Real, Device, Index, RealAllocator > template< typename Real_, typename Device_, typename Index_, - bool RowMajorOrder_, + ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ > - Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix ); + Multidiagonal& operator=( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ); void save( File& file ) const; diff --git a/src/TNL/Matrices/Multidiagonal.hpp b/src/TNL/Matrices/Multidiagonal.hpp index e8eb66751..be6ec58c5 100644 --- a/src/TNL/Matrices/Multidiagonal.hpp +++ b/src/TNL/Matrices/Multidiagonal.hpp @@ -24,10 +24,10 @@ class MultidiagonalDeviceDependentCode; template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: Multidiagonal() { } @@ -35,11 +35,11 @@ Multidiagonal() template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Vector > -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: Multidiagonal( const IndexType rows, const IndexType columns, const Vector& diagonalsShifts ) @@ -51,11 +51,11 @@ Multidiagonal( const IndexType rows, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > auto -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getView() const -> ViewType { // TODO: fix when getConstView works @@ -68,11 +68,11 @@ getView() const -> ViewType /*template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > auto -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getConstView() const -> ConstViewType { return ConstViewType( this->values.getConstView(), indexer ); @@ -81,27 +81,27 @@ getConstView() const -> ConstViewType template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > String -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getSerializationType() { return String( "Matrices::Multidiagonal< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + TNL::getSerializationType< IndexType >() + ", " + - ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator], [any_allocator] >"; + ( Organization ? "true" : "false" ) + ", [any_allocator], [any_allocator] >"; } template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > String -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getSerializationTypeVirtual() const { return this->getSerializationType(); @@ -110,12 +110,12 @@ getSerializationTypeVirtual() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Vector > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: setDimensions( const IndexType rows, const IndexType columns, const Vector& diagonalsShifts ) @@ -136,12 +136,12 @@ setDimensions( const IndexType rows, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > // template< typename Vector > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) { if( max( rowLengths ) > 3 ) @@ -163,11 +163,11 @@ setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > const Index& -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getDiagonalsCount() const { return this->view.getDiagonalsCount(); @@ -176,11 +176,11 @@ getDiagonalsCount() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > auto -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getDiagonalsShifts() const -> const DiagonalsShiftsType& { return this->diagonalsShifts; @@ -189,12 +189,12 @@ getDiagonalsShifts() const -> const DiagonalsShiftsType& template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Vector > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getCompressedRowLengths( Vector& rowLengths ) const { return this->view.getCompressedRowLengths( rowLengths ); @@ -203,11 +203,11 @@ getCompressedRowLengths( Vector& rowLengths ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > Index -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getNonemptyRowsCount() const { return this->indexer.getNonemptyRowsCount(); @@ -216,11 +216,11 @@ getNonemptyRowsCount() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > Index -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getRowLength( const IndexType row ) const { return this->view.getRowLength( row ); @@ -229,11 +229,11 @@ getRowLength( const IndexType row ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > Index -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getMaxRowLength() const { return this->view.getMaxRowLength(); @@ -242,13 +242,13 @@ getMaxRowLength() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: -setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ) +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +setLike( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m ) { this->setDimensions( m.getRows(), m.getColumns(), m.getDiagonalsShifts() ); } @@ -256,11 +256,11 @@ setLike( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > Index -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getNumberOfNonzeroMatrixElements() const { return this->view.getNumberOfNonzeroMatrixElements(); @@ -269,11 +269,11 @@ getNumberOfNonzeroMatrixElements() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); @@ -282,15 +282,15 @@ reset() template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > bool -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: -operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +operator == ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const { - if( RowMajorOrder == RowMajorOrder_ ) + if( Organization == Organization_ ) return this->values == matrix.values; else { @@ -301,13 +301,13 @@ operator == ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealA template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > bool -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: -operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +operator != ( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const { return ! this->operator==( matrix ); } @@ -315,11 +315,11 @@ operator != ( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealA template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: setValue( const RealType& v ) { this->view.setValue( v ); @@ -328,12 +328,12 @@ setValue( const RealType& v ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > __cuda_callable__ auto -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { return this->view.getRow( rowIdx ); @@ -342,12 +342,12 @@ getRow( const IndexType& rowIdx ) const -> const RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > __cuda_callable__ auto -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { return this->view.getRow( rowIdx ); @@ -356,11 +356,11 @@ getRow( const IndexType& rowIdx ) -> RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { this->view.setElement( row, column, value ); @@ -369,11 +369,11 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -385,11 +385,11 @@ addElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > Real -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getElement( const IndexType row, const IndexType column ) const { return this->view.getElement( row, column ); @@ -398,12 +398,12 @@ getElement( const IndexType row, const IndexType column ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); @@ -412,12 +412,12 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); @@ -426,12 +426,12 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Function > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { this->view.forRows( first, last, function ); @@ -440,12 +440,12 @@ forRows( IndexType first, IndexType last, Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Function > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { this->view.forRows( first, last, function ); @@ -454,12 +454,12 @@ forRows( IndexType first, IndexType last, Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Function > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: forAllRows( Function& function ) const { this->view.forRows( 0, this->getRows(), function ); @@ -468,12 +468,12 @@ forAllRows( Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Function > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: forAllRows( Function& function ) { this->view.forRows( 0, this->getRows(), function ); @@ -482,13 +482,13 @@ forAllRows( Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Vector > __cuda_callable__ typename Vector::RealType -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { return this->view.rowVectorProduct(); @@ -497,13 +497,13 @@ rowVectorProduct( const IndexType row, const Vector& vector ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename InVector, typename OutVector > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { this->view.vectorProduct( inVector, outVector ); @@ -512,13 +512,13 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: -addMatrix( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +addMatrix( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { @@ -556,11 +556,11 @@ __global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2 template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Real2, typename Index2 > -void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, +void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::getTransposition( const Multidiagonal< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getRows() == matrix.getRows(), @@ -606,12 +606,12 @@ void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllo template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > template< typename Vector1, typename Vector2 > __cuda_callable__ -void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::performSORIteration( const Vector1& b, +void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega ) const @@ -629,11 +629,11 @@ void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllo template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::operator=( const Multidiagonal& matrix ) +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::operator=( const Multidiagonal& matrix ) { this->setLike( matrix ); this->values = matrix.values; @@ -644,15 +644,15 @@ Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_, typename IndexAllocator_ > -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: -operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >& matrix ) + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_, typename IndexAllocator_ > +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: +operator=( const Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >& matrix ) { - using RHSMatrix = Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_, IndexAllocator_ >; + using RHSMatrix = Multidiagonal< Real_, Device_, Index_, Organization_, RealAllocator_, IndexAllocator_ >; using RHSIndexType = typename RHSMatrix::IndexType; using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; @@ -660,7 +660,7 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo using RHSIndexAllocatorType = typename RHSMatrix::IndexAllocatorType; this->setLike( matrix ); - if( RowMajorOrder == RowMajorOrder_ ) + if( Organization == Organization_ ) this->values = matrix.getValues(); else { @@ -719,10 +719,10 @@ operator=( const Multidiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllo template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( File& file ) const +void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( File& file ) const { Matrix< Real, Device, Index >::save( file ); file << diagonalsShifts; @@ -731,10 +731,10 @@ void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllo template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( File& file ) +void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); file >> this->diagonalsShifts; @@ -750,10 +750,10 @@ void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllo template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::save( const String& fileName ) const +void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::save( const String& fileName ) const { Object::save( fileName ); } @@ -761,10 +761,10 @@ void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllo template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > -void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >::load( const String& fileName ) +void Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >::load( const String& fileName ) { Object::load( fileName ); } @@ -772,11 +772,11 @@ void Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllo template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > void -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: print( std::ostream& str ) const { this->view.print( str ); @@ -785,11 +785,11 @@ print( std::ostream& str ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > auto -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getIndexer() const -> const IndexerType& { return this->indexer; @@ -798,11 +798,11 @@ getIndexer() const -> const IndexerType& template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > auto -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getIndexer() -> IndexerType& { return this->indexer; @@ -811,11 +811,11 @@ getIndexer() -> IndexerType& template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > __cuda_callable__ -Index Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Index Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getElementIndex( const IndexType row, const IndexType column ) const { IndexType localIdx = column - row; @@ -831,12 +831,12 @@ getElementIndex( const IndexType row, const IndexType column ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator, typename IndexAllocator > __cuda_callable__ Index -Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >:: +Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >:: getPaddingIndex() const { return this->view.getPaddingIndex(); @@ -884,7 +884,7 @@ class MultidiagonalDeviceDependentCode< Devices::Host > typename Index, typename InVector, typename OutVector > - static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix, + static void vectorProduct( const Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { @@ -937,7 +937,7 @@ class MultidiagonalDeviceDependentCode< Devices::Cuda > typename Index, typename InVector, typename OutVector > - static void vectorProduct( const Multidiagonal< Real, Device, Index, RowMajorOrder, RealAllocator, IndexAllocator >& matrix, + static void vectorProduct( const Multidiagonal< Real, Device, Index, Organization, RealAllocator, IndexAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.h b/src/TNL/Matrices/MultidiagonalMatrixView.h index 97ff94f85..a3ebfe375 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.h +++ b/src/TNL/Matrices/MultidiagonalMatrixView.h @@ -22,7 +22,7 @@ namespace Matrices { template< typename Real = double, typename Device = Devices::Host, typename Index = int, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > { public: @@ -34,10 +34,10 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > using DiagonalsShiftsView = Containers::VectorView< IndexType, DeviceType, IndexType >; //using HostDiagonalsShiftsType = Containers::Vector< IndexType, Devices::Host, IndexType >; using HostDiagonalsShiftsView = Containers::VectorView< IndexType, Devices::Host, IndexType >; - using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using IndexerType = details::MultidiagonalMatrixIndexer< IndexType, Organization >; using ValuesViewType = typename BaseType::ValuesView; - using ViewType = MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >; - using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using ViewType = MultidiagonalMatrixView< Real, Device, Index, Organization >; + using ConstViewType = MultidiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; using RowView = MultidiagonalMatrixRowView< ValuesViewType, IndexerType, DiagonalsShiftsView >; // TODO: remove this - it is here only for compatibility with original matrix implementation @@ -48,8 +48,8 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > template< typename _Real = Real, typename _Device = Device, typename _Index = Index, - bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value > - using Self = MultidiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >; + ElementsOrganization Organization_ = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > + using Self = MultidiagonalMatrixView< _Real, _Device, _Index, Organization_ >; MultidiagonalMatrixView(); @@ -81,11 +81,11 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > IndexType getNumberOfNonzeroMatrixElements() const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > - bool operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + bool operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > - bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + bool operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const; __cuda_callable__ RowView getRow( const IndexType& rowIdx ); @@ -137,8 +137,8 @@ class MultidiagonalMatrixView : public MatrixView< Real, Device, Index > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > - void addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + void addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix, const RealType& matrixMultiplicator = 1.0, const RealType& thisMatrixMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/MultidiagonalMatrixView.hpp b/src/TNL/Matrices/MultidiagonalMatrixView.hpp index ecfe1c1d8..8d772b6b9 100644 --- a/src/TNL/Matrices/MultidiagonalMatrixView.hpp +++ b/src/TNL/Matrices/MultidiagonalMatrixView.hpp @@ -20,8 +20,8 @@ namespace Matrices { template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +MultidiagonalMatrixView< Real, Device, Index, Organization >:: MultidiagonalMatrixView() { } @@ -29,8 +29,8 @@ MultidiagonalMatrixView() template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +MultidiagonalMatrixView< Real, Device, Index, Organization >:: MultidiagonalMatrixView( const ValuesViewType& values, const DiagonalsShiftsView& diagonalsShifts, const HostDiagonalsShiftsView& hostDiagonalsShifts, @@ -45,9 +45,9 @@ MultidiagonalMatrixView( const ValuesViewType& values, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > auto -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getView() -> ViewType { return ViewType( const_cast< MultidiagonalMatrixView* >( this )->values.getView(), @@ -59,9 +59,9 @@ getView() -> ViewType template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > auto -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getConstView() const -> ConstViewType { return ConstViewType( this->values.getConstView(), @@ -73,23 +73,23 @@ getConstView() const -> ConstViewType template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > String -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getSerializationType() { return String( "Matrices::Multidiagonal< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + TNL::getSerializationType< IndexType >() + ", " + - ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; + ( Organization ? "true" : "false" ) + ", [any_allocator] >"; } template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > String -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getSerializationTypeVirtual() const { return this->getSerializationType(); @@ -98,10 +98,10 @@ getSerializationTypeVirtual() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ const Index& -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getDiagonalsCount() const { return this->diagonalsShifts.getSize(); @@ -110,10 +110,10 @@ getDiagonalsCount() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Vector > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getCompressedRowLengths( Vector& rowLengths ) const { rowLengths.setSize( this->getRows() ); @@ -134,9 +134,9 @@ getCompressedRowLengths( Vector& rowLengths ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getNonemptyRowsCount() const { return this->indexer.getNonemptyRowsCount(); @@ -145,9 +145,9 @@ getNonemptyRowsCount() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getRowLength( const IndexType row ) const { return this->diagonalsShifts.getSize(); @@ -156,9 +156,9 @@ getRowLength( const IndexType row ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getMaxRowLength() const { return this->diagonalsShifts.getSize(); @@ -167,9 +167,9 @@ getMaxRowLength() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getNumberOfNonzeroMatrixElements() const { const auto values_view = this->values.getConstView(); @@ -182,13 +182,13 @@ getNumberOfNonzeroMatrixElements() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + ElementsOrganization Organization > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > bool -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +MultidiagonalMatrixView< Real, Device, Index, Organization >:: +operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const { - if( RowMajorOrder == RowMajorOrder_ ) + if( Organization == Organization_ ) return this->values == matrix.values; else { @@ -199,11 +199,11 @@ operator == ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrd template< typename Real, typename Device, typename Index, - bool RowMajorOrder > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + ElementsOrganization Organization > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > bool -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +MultidiagonalMatrixView< Real, Device, Index, Organization >:: +operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const { return ! this->operator==( matrix ); } @@ -211,9 +211,9 @@ operator != ( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrd template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: setValue( const RealType& v ) { // we dont do this->values = v here because it would set even elements 'outside' the matrix @@ -228,10 +228,10 @@ setValue( const RealType& v ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getRow( const IndexType& rowIdx ) const -> const RowView { return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer ); @@ -240,10 +240,10 @@ getRow( const IndexType& rowIdx ) const -> const RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getRow( const IndexType& rowIdx ) -> RowView { return RowView( rowIdx, this->diagonalsShifts.getView(), this->values.getView(), this->indexer ); @@ -252,9 +252,9 @@ getRow( const IndexType& rowIdx ) -> RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { TNL_ASSERT_GE( row, 0, "" ); @@ -279,9 +279,9 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -310,9 +310,9 @@ addElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Real -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getElement( const IndexType row, const IndexType column ) const { TNL_ASSERT_GE( row, 0, "" ); @@ -329,9 +329,9 @@ getElement( const IndexType row, const IndexType column ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >& -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +MultidiagonalMatrixView< Real, Device, Index, Organization >& +MultidiagonalMatrixView< Real, Device, Index, Organization >:: operator=( const MultidiagonalMatrixView& view ) { MatrixView< Real, Device, Index >::operator=( view ); @@ -344,10 +344,10 @@ operator=( const MultidiagonalMatrixView& view ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const { using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); @@ -373,10 +373,10 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); @@ -385,10 +385,10 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); @@ -411,10 +411,10 @@ forRows( IndexType first, IndexType last, Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); @@ -437,10 +437,10 @@ forRows( IndexType first, IndexType last, Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: forAllRows( Function& function ) const { this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function ); @@ -449,10 +449,10 @@ forAllRows( Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: forAllRows( Function& function ) { this->forRows( 0, this->indexer.getNonemptyRowsCount(), function ); @@ -461,11 +461,11 @@ forAllRows( Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Vector > __cuda_callable__ typename Vector::RealType -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { } @@ -473,11 +473,11 @@ rowVectorProduct( const IndexType row, const Vector& vector ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename InVector, typename OutVector > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); @@ -500,18 +500,18 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + ElementsOrganization Organization > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, +MultidiagonalMatrixView< Real, Device, Index, Organization >:: +addMatrix( const MultidiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." ); TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." ); - /*if( RowMajorOrder == RowMajorOrder_ ) + /*if( Organization == Organization_ ) { if( thisMatrixMultiplicator == 1.0 ) this->values += matrixMultiplicator * matrix.getValues(); @@ -572,10 +572,10 @@ __global__ void MultidiagonalTranspositionCudaKernel( const Multidiagonal< Real2 template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Real2, typename Index2 > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { @@ -622,11 +622,11 @@ getTransposition( const MultidiagonalMatrixView< Real2, Device, Index2 >& matrix template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Vector1, typename Vector2 > __cuda_callable__ void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: performSORIteration( const Vector1& b, const IndexType row, Vector2& x, @@ -644,8 +644,8 @@ performSORIteration( const Vector1& b, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const + ElementsOrganization Organization > +void MultidiagonalMatrixView< Real, Device, Index, Organization >::save( File& file ) const { MatrixView< Real, Device, Index >::save( file ); } @@ -653,9 +653,9 @@ void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: save( const String& fileName ) const { Object::save( fileName ); @@ -664,8 +664,8 @@ save( const String& fileName ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const + ElementsOrganization Organization > +void MultidiagonalMatrixView< Real, Device, Index, Organization >::print( std::ostream& str ) const { for( IndexType rowIdx = 0; rowIdx < this->getRows(); rowIdx++ ) { @@ -687,10 +687,10 @@ void MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std:: template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getIndexer() const -> const IndexerType& { return this->indexer; @@ -699,10 +699,10 @@ getIndexer() const -> const IndexerType& template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getIndexer() -> IndexerType& { return this->indexer; @@ -711,10 +711,10 @@ getIndexer() -> IndexerType& template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ Index -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getElementIndex( const IndexType row, const IndexType localIdx ) const { return this->indexer.getGlobalIndex( row, localIdx ); @@ -723,10 +723,10 @@ getElementIndex( const IndexType row, const IndexType localIdx ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ Index -MultidiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +MultidiagonalMatrixView< Real, Device, Index, Organization >:: getPaddingIndex() const { return -1; diff --git a/src/TNL/Matrices/SparseMatrix.h b/src/TNL/Matrices/SparseMatrix.h index cd58b1f4b..598643137 100644 --- a/src/TNL/Matrices/SparseMatrix.h +++ b/src/TNL/Matrices/SparseMatrix.h @@ -232,8 +232,8 @@ class SparseMatrix : public Matrix< Real, Device, Index, RealAllocator > /** * \brief Assignment of dense matrix */ - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > - SparseMatrix& operator=( const DenseMatrix< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ); + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization, typename RealAllocator_ > + SparseMatrix& operator=( const DenseMatrix< Real_, Device_, Index_, Organization, RealAllocator_ >& matrix ); /** diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index 2f12d5236..d13537cef 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -649,12 +649,12 @@ template< typename Real, template< typename, typename, typename > class Segments, typename RealAllocator, typename IndexAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization, typename RealAllocator_ > SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >& SparseMatrix< Real, Device, Index, MatrixType, Segments, RealAllocator, IndexAllocator >:: -operator=( const DenseMatrix< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >& matrix ) +operator=( const DenseMatrix< Real_, Device_, Index_, Organization, RealAllocator_ >& matrix ) { - using RHSMatrix = DenseMatrix< Real_, Device_, Index_, RowMajorOrder, RealAllocator_ >; + using RHSMatrix = DenseMatrix< Real_, Device_, Index_, Organization, RealAllocator_ >; using RHSIndexType = typename RHSMatrix::IndexType; using RHSRealType = typename RHSMatrix::RealType; using RHSDeviceType = typename RHSMatrix::DeviceType; diff --git a/src/TNL/Matrices/Tridiagonal.h b/src/TNL/Matrices/Tridiagonal.h index 029793681..25472aa3c 100644 --- a/src/TNL/Matrices/Tridiagonal.h +++ b/src/TNL/Matrices/Tridiagonal.h @@ -23,7 +23,7 @@ namespace Matrices { template< typename Real = double, typename Device = Devices::Host, typename Index = int, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value, + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization(), typename RealAllocator = typename Allocators::Default< Device >::template Allocator< Real > > class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > { @@ -33,11 +33,11 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > using IndexType = Index; using RealAllocatorType = RealAllocator; using BaseType = Matrix< Real, Device, Index, RealAllocator >; - using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using IndexerType = details::TridiagonalMatrixIndexer< IndexType, Organization >; using ValuesVectorType = typename BaseType::ValuesVectorType; using ValuesViewType = typename ValuesVectorType::ViewType; - using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; - using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using ViewType = TridiagonalMatrixView< Real, Device, Index, Organization >; + using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; // TODO: remove this - it is here only for compatibility with original matrix implementation @@ -50,7 +50,7 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > typename _Index = Index > using Self = Tridiagonal< _Real, _Device, _Index >; - static constexpr bool getRowMajorOrder() { return RowMajorOrder; }; + static constexpr ElementsOrganization getOrganization() { return Organization; }; Tridiagonal(); @@ -78,18 +78,18 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > IndexType getMaxRowLength() const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - void setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ); + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + void setLike( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m ); IndexType getNumberOfNonzeroMatrixElements() const; void reset(); - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - bool operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + bool operator == ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - bool operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + bool operator != ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const; __cuda_callable__ RowView getRow( const IndexType& rowIdx ); @@ -139,8 +139,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - void addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + void addMatrix( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, const RealType& matrixMultiplicator = 1.0, const RealType& thisMatrixMultiplicator = 1.0 ); @@ -159,8 +159,8 @@ class Tridiagonal : public Matrix< Real, Device, Index, RealAllocator > Tridiagonal& operator=( const Tridiagonal& matrix ); // cross-device copy assignment - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > - Tridiagonal& operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ); + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > + Tridiagonal& operator=( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ); void save( File& file ) const; diff --git a/src/TNL/Matrices/Tridiagonal.hpp b/src/TNL/Matrices/Tridiagonal.hpp index 3ddabc985..8a1804e9d 100644 --- a/src/TNL/Matrices/Tridiagonal.hpp +++ b/src/TNL/Matrices/Tridiagonal.hpp @@ -24,9 +24,9 @@ class TridiagonalDeviceDependentCode; template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: Tridiagonal() { } @@ -34,9 +34,9 @@ Tridiagonal() template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: Tridiagonal( const IndexType rows, const IndexType columns ) { this->setDimensions( rows, columns ); @@ -45,10 +45,10 @@ Tridiagonal( const IndexType rows, const IndexType columns ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > auto -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getView() const -> ViewType { // TODO: fix when getConstView works @@ -58,10 +58,10 @@ getView() const -> ViewType /*template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > auto -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getConstView() const -> ConstViewType { return ConstViewType( this->values.getConstView(), indexer ); @@ -70,25 +70,25 @@ getConstView() const -> ConstViewType template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > String -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getSerializationType() { return String( "Matrices::Tridiagonal< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + TNL::getSerializationType< IndexType >() + ", " + - ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; + ( Organization ? "true" : "false" ) + ", [any_allocator] >"; } template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > String -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getSerializationTypeVirtual() const { return this->getSerializationType(); @@ -97,10 +97,10 @@ getSerializationTypeVirtual() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: setDimensions( const IndexType rows, const IndexType columns ) { Matrix< Real, Device, Index >::setDimensions( rows, columns ); @@ -113,11 +113,11 @@ setDimensions( const IndexType rows, const IndexType columns ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > // template< typename Vector > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) { if( max( rowLengths ) > 3 ) @@ -139,11 +139,11 @@ setCompressedRowLengths( const ConstCompressedRowLengthsVectorView rowLengths ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Vector > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getCompressedRowLengths( Vector& rowLengths ) const { return this->view.getCompressedRowLengths( rowLengths ); @@ -152,10 +152,10 @@ getCompressedRowLengths( Vector& rowLengths ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > Index -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getRowLength( const IndexType row ) const { return this->view.getRowLength( row ); @@ -164,10 +164,10 @@ getRowLength( const IndexType row ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > Index -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getMaxRowLength() const { return this->view.getMaxRowLength(); @@ -176,12 +176,12 @@ getMaxRowLength() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& m ) +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +setLike( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& m ) { this->setDimensions( m.getRows(), m.getColumns() ); } @@ -189,10 +189,10 @@ setLike( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocato template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > Index -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getNumberOfNonzeroMatrixElements() const { return this->view.getNumberOfNonzeroMatrixElements(); @@ -201,10 +201,10 @@ getNumberOfNonzeroMatrixElements() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: reset() { Matrix< Real, Device, Index >::reset(); @@ -213,14 +213,14 @@ reset() template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > bool -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +operator == ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const { - if( RowMajorOrder == RowMajorOrder_ ) + if( Organization == Organization_ ) return this->values == matrix.values; else { @@ -231,12 +231,12 @@ operator == ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAll template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > bool -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) const +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +operator != ( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) const { return ! this->operator==( matrix ); } @@ -244,10 +244,10 @@ operator != ( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAll template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: setValue( const RealType& v ) { this->view.setValue( v ); @@ -256,11 +256,11 @@ setValue( const RealType& v ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ auto -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getRow( const IndexType& rowIdx ) const -> const RowView { return this->view.getRow( rowIdx ); @@ -269,11 +269,11 @@ getRow( const IndexType& rowIdx ) const -> const RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ auto -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getRow( const IndexType& rowIdx ) -> RowView { return this->view.getRow( rowIdx ); @@ -282,10 +282,10 @@ getRow( const IndexType& rowIdx ) -> RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { this->view.setElement( row, column, value ); @@ -294,10 +294,10 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -309,10 +309,10 @@ addElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > Real -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getElement( const IndexType row, const IndexType column ) const { return this->view.getElement( row, column ); @@ -321,11 +321,11 @@ getElement( const IndexType row, const IndexType column ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( first, last, fetch, reduce, keep, zero ); @@ -334,11 +334,11 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->view.rowsReduction( 0, this->getRows(), fetch, reduce, keep, zero ); @@ -347,11 +347,11 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Function > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) const { this->view.forRows( first, last, function ); @@ -360,11 +360,11 @@ forRows( IndexType first, IndexType last, Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Function > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: forRows( IndexType first, IndexType last, Function& function ) { this->view.forRows( first, last, function ); @@ -373,11 +373,11 @@ forRows( IndexType first, IndexType last, Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Function > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: forAllRows( Function& function ) const { this->view.forRows( 0, this->getRows(), function ); @@ -386,11 +386,11 @@ forAllRows( Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Function > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: forAllRows( Function& function ) { this->view.forRows( 0, this->getRows(), function ); @@ -399,12 +399,12 @@ forAllRows( Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Vector > __cuda_callable__ typename Vector::RealType -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { return this->view.rowVectorProduct(); @@ -413,12 +413,12 @@ rowVectorProduct( const IndexType row, const Vector& vector ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename InVector, typename OutVector > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { this->view.vectorProduct( inVector, outVector ); @@ -427,12 +427,12 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -addMatrix( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix, +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +addMatrix( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { @@ -470,10 +470,10 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Real2, typename Index2 > -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, +void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::getTransposition( const Tridiagonal< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { TNL_ASSERT( this->getRows() == matrix.getRows(), @@ -519,11 +519,11 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::getTransp template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > template< typename Vector1, typename Vector2 > __cuda_callable__ -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::performSORIteration( const Vector1& b, +void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::performSORIteration( const Vector1& b, const IndexType row, Vector2& x, const RealType& omega ) const @@ -541,10 +541,10 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::performSO template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( const Tridiagonal& matrix ) +Tridiagonal< Real, Device, Index, Organization, RealAllocator >& +Tridiagonal< Real, Device, Index, Organization, RealAllocator >::operator=( const Tridiagonal& matrix ) { this->setLike( matrix ); this->values = matrix.values; @@ -555,12 +555,12 @@ Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::operator=( con template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_, typename RealAllocator_ > -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: -operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAllocator_ >& matrix ) + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_, typename RealAllocator_ > +Tridiagonal< Real, Device, Index, Organization, RealAllocator >& +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: +operator=( const Tridiagonal< Real_, Device_, Index_, Organization_, RealAllocator_ >& matrix ) { static_assert( std::is_same< Device, Devices::Host >::value || std::is_same< Device, Devices::Cuda >::value, "unknown device" ); @@ -568,7 +568,7 @@ operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca "unknown device" ); this->setLike( matrix ); - if( RowMajorOrder == RowMajorOrder_ ) + if( Organization == Organization_ ) this->values = matrix.getValues(); else { @@ -582,7 +582,7 @@ operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca } else { - Tridiagonal< Real, Device, Index, RowMajorOrder_ > auxMatrix; + Tridiagonal< Real, Device, Index, Organization_ > auxMatrix; auxMatrix = matrix; const auto matrix_view = auxMatrix.getView(); auto f = [=] __cuda_callable__ ( const IndexType& rowIdx, const IndexType& localIdx, const IndexType& column, Real& value ) mutable { @@ -597,9 +597,9 @@ operator=( const Tridiagonal< Real_, Device_, Index_, RowMajorOrder_, RealAlloca template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( File& file ) const +void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::save( File& file ) const { Matrix< Real, Device, Index >::save( file ); } @@ -607,9 +607,9 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( Fil template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( File& file ) +void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::load( File& file ) { Matrix< Real, Device, Index >::load( file ); this->indexer.setDimensions( this->getRows(), this->getColumns() ); @@ -619,9 +619,9 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( Fil template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( const String& fileName ) const +void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::save( const String& fileName ) const { Object::save( fileName ); } @@ -629,9 +629,9 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::save( con template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > -void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( const String& fileName ) +void Tridiagonal< Real, Device, Index, Organization, RealAllocator >::load( const String& fileName ) { Object::load( fileName ); } @@ -639,10 +639,10 @@ void Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >::load( con template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > void -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: print( std::ostream& str ) const { this->view.print( str ); @@ -651,10 +651,10 @@ print( std::ostream& str ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > auto -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getIndexer() const -> const IndexerType& { return this->indexer; @@ -663,10 +663,10 @@ getIndexer() const -> const IndexerType& template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > auto -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getIndexer() -> IndexerType& { return this->indexer; @@ -675,11 +675,11 @@ getIndexer() -> IndexerType& template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ Index -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getElementIndex( const IndexType row, const IndexType column ) const { IndexType localIdx = column - row; @@ -695,11 +695,11 @@ getElementIndex( const IndexType row, const IndexType column ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder, + ElementsOrganization Organization, typename RealAllocator > __cuda_callable__ Index -Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >:: +Tridiagonal< Real, Device, Index, Organization, RealAllocator >:: getPaddingIndex() const { return this->view.getPaddingIndex(); @@ -747,7 +747,7 @@ class TridiagonalDeviceDependentCode< Devices::Host > typename Index, typename InVector, typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, + static void vectorProduct( const Tridiagonal< Real, Device, Index, Organization, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { @@ -800,7 +800,7 @@ class TridiagonalDeviceDependentCode< Devices::Cuda > typename Index, typename InVector, typename OutVector > - static void vectorProduct( const Tridiagonal< Real, Device, Index, RowMajorOrder, RealAllocator >& matrix, + static void vectorProduct( const Tridiagonal< Real, Device, Index, Organization, RealAllocator >& matrix, const InVector& inVector, OutVector& outVector ) { diff --git a/src/TNL/Matrices/TridiagonalMatrixView.h b/src/TNL/Matrices/TridiagonalMatrixView.h index 82b76c73f..9496136dc 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.h +++ b/src/TNL/Matrices/TridiagonalMatrixView.h @@ -22,7 +22,7 @@ namespace Matrices { template< typename Real = double, typename Device = Devices::Host, typename Index = int, - bool RowMajorOrder = std::is_same< Device, Devices::Host >::value > + ElementsOrganization Organization = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > class TridiagonalMatrixView : public MatrixView< Real, Device, Index > { public: @@ -30,10 +30,10 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > using DeviceType = Device; using IndexType = Index; using BaseType = MatrixView< Real, Device, Index >; - using IndexerType = details::TridiagonalMatrixIndexer< IndexType, RowMajorOrder >; + using IndexerType = details::TridiagonalMatrixIndexer< IndexType, Organization >; using ValuesViewType = typename BaseType::ValuesView; - using ViewType = TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >; - using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, RowMajorOrder >; + using ViewType = TridiagonalMatrixView< Real, Device, Index, Organization >; + using ConstViewType = TridiagonalMatrixView< typename std::add_const< Real >::type, Device, Index, Organization >; using RowView = TridiagonalMatrixRowView< ValuesViewType, IndexerType >; // TODO: remove this - it is here only for compatibility with original matrix implementation @@ -44,8 +44,8 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > template< typename _Real = Real, typename _Device = Device, typename _Index = Index, - bool RowMajorOrder_ = std::is_same< Device, Devices::Host >::value > - using Self = TridiagonalMatrixView< _Real, _Device, _Index, RowMajorOrder_ >; + ElementsOrganization Organization_ = Containers::Segments::DefaultElementsOrganization< Device >::getOrganization() > + using Self = TridiagonalMatrixView< _Real, _Device, _Index, Organization_ >; TridiagonalMatrixView(); @@ -69,11 +69,11 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > IndexType getNumberOfNonzeroMatrixElements() const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > - bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + bool operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > - bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const; + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + bool operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const; __cuda_callable__ RowView getRow( const IndexType& rowIdx ); @@ -123,8 +123,8 @@ class TridiagonalMatrixView : public MatrixView< Real, Device, Index > void vectorProduct( const InVector& inVector, OutVector& outVector ) const; - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > - void addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > + void addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix, const RealType& matrixMultiplicator = 1.0, const RealType& thisMatrixMultiplicator = 1.0 ); diff --git a/src/TNL/Matrices/TridiagonalMatrixView.hpp b/src/TNL/Matrices/TridiagonalMatrixView.hpp index 99e3e87d4..092e63cbc 100644 --- a/src/TNL/Matrices/TridiagonalMatrixView.hpp +++ b/src/TNL/Matrices/TridiagonalMatrixView.hpp @@ -20,8 +20,8 @@ namespace Matrices { template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +TridiagonalMatrixView< Real, Device, Index, Organization >:: TridiagonalMatrixView() { } @@ -29,8 +29,8 @@ TridiagonalMatrixView() template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: + ElementsOrganization Organization > +TridiagonalMatrixView< Real, Device, Index, Organization >:: TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer ) : MatrixView< Real, Device, Index >( indexer.getRows(), indexer.getColumns(), values ), indexer( indexer ) { @@ -39,9 +39,9 @@ TridiagonalMatrixView( const ValuesViewType& values, const IndexerType& indexer template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > auto -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getView() -> ViewType { return ViewType( this->values.getView(), indexer ); @@ -50,9 +50,9 @@ getView() -> ViewType template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > auto -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getConstView() const -> ConstViewType { return ConstViewType( this->values.getConstView(), indexer ); @@ -61,23 +61,23 @@ getConstView() const -> ConstViewType template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > String -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getSerializationType() { return String( "Matrices::Tridiagonal< " ) + TNL::getSerializationType< RealType >() + ", [any_device], " + TNL::getSerializationType< IndexType >() + ", " + - ( RowMajorOrder ? "true" : "false" ) + ", [any_allocator] >"; + ( Organization ? "true" : "false" ) + ", [any_allocator] >"; } template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > String -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getSerializationTypeVirtual() const { return this->getSerializationType(); @@ -86,10 +86,10 @@ getSerializationTypeVirtual() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Vector > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getCompressedRowLengths( Vector& rowLengths ) const { rowLengths.setSize( this->getRows() ); @@ -111,9 +111,9 @@ getCompressedRowLengths( Vector& rowLengths ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getRowLength( const IndexType row ) const { return this->indexer.getRowSize( row ); @@ -122,9 +122,9 @@ getRowLength( const IndexType row ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getMaxRowLength() const { return 3; @@ -133,9 +133,9 @@ getMaxRowLength() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Index -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getNumberOfNonzeroMatrixElements() const { const auto values_view = this->values.getConstView(); @@ -148,13 +148,13 @@ getNumberOfNonzeroMatrixElements() const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + ElementsOrganization Organization > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > bool -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +TridiagonalMatrixView< Real, Device, Index, Organization >:: +operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const { - if( RowMajorOrder == RowMajorOrder_ ) + if( Organization == Organization_ ) return this->values == matrix.values; else { @@ -165,11 +165,11 @@ operator == ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder template< typename Real, typename Device, typename Index, - bool RowMajorOrder > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + ElementsOrganization Organization > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > bool -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix ) const +TridiagonalMatrixView< Real, Device, Index, Organization >:: +operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix ) const { return ! this->operator==( matrix ); } @@ -177,9 +177,9 @@ operator != ( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: setValue( const RealType& v ) { this->values = v; @@ -188,10 +188,10 @@ setValue( const RealType& v ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getRow( const IndexType& rowIdx ) const -> const RowView { return RowView( rowIdx, this->values.getView(), this->indexer ); @@ -200,10 +200,10 @@ getRow( const IndexType& rowIdx ) const -> const RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getRow( const IndexType& rowIdx ) -> RowView { return RowView( rowIdx, this->values.getView(), this->indexer ); @@ -212,9 +212,9 @@ getRow( const IndexType& rowIdx ) -> RowView template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: setElement( const IndexType row, const IndexType column, const RealType& value ) { TNL_ASSERT_GE( row, 0, "" ); @@ -233,9 +233,9 @@ setElement( const IndexType row, const IndexType column, const RealType& value ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -258,9 +258,9 @@ addElement( const IndexType row, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > Real -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getElement( const IndexType row, const IndexType column ) const { TNL_ASSERT_GE( row, 0, "" ); @@ -276,10 +276,10 @@ getElement( const IndexType row, const IndexType column ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero_ ) const { using Real_ = decltype( fetch( IndexType(), IndexType(), RealType() ) ); @@ -320,10 +320,10 @@ rowsReduction( IndexType first, IndexType last, Fetch& fetch, Reduce& reduce, Ke template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Fetch, typename Reduce, typename Keep, typename FetchReal > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zero ) const { this->rowsReduction( 0, this->indexer.getNonemptyRowsCount(), fetch, reduce, keep, zero ); @@ -332,10 +332,10 @@ allRowsReduction( Fetch& fetch, Reduce& reduce, Keep& keep, const FetchReal& zer template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: forRows( IndexType first, IndexType last, Function& function ) const { const auto values_view = this->values.getConstView(); @@ -367,10 +367,10 @@ forRows( IndexType first, IndexType last, Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: forRows( IndexType first, IndexType last, Function& function ) { auto values_view = this->values.getView(); @@ -401,10 +401,10 @@ forRows( IndexType first, IndexType last, Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: forAllRows( Function& function ) const { this->forRows( 0, this->indxer.getNonEmptyRowsCount(), function ); @@ -413,10 +413,10 @@ forAllRows( Function& function ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Function > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: forAllRows( Function& function ) { this->forRows( 0, this->indexer.getNonemptyRowsCount(), function ); @@ -425,11 +425,11 @@ forAllRows( Function& function ) template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Vector > __cuda_callable__ typename Vector::RealType -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: rowVectorProduct( const IndexType row, const Vector& vector ) const { } @@ -437,11 +437,11 @@ rowVectorProduct( const IndexType row, const Vector& vector ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename InVector, typename OutVector > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: vectorProduct( const InVector& inVector, OutVector& outVector ) const { TNL_ASSERT_EQ( this->getColumns(), inVector.getSize(), "Matrix columns do not fit with input vector." ); @@ -464,18 +464,18 @@ vectorProduct( const InVector& inVector, OutVector& outVector ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > - template< typename Real_, typename Device_, typename Index_, bool RowMajorOrder_ > + ElementsOrganization Organization > + template< typename Real_, typename Device_, typename Index_, ElementsOrganization Organization_ > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: -addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, RowMajorOrder_ >& matrix, +TridiagonalMatrixView< Real, Device, Index, Organization >:: +addMatrix( const TridiagonalMatrixView< Real_, Device_, Index_, Organization_ >& matrix, const RealType& matrixMultiplicator, const RealType& thisMatrixMultiplicator ) { TNL_ASSERT_EQ( this->getRows(), matrix.getRows(), "Matrices rows are not equal." ); TNL_ASSERT_EQ( this->getColumns(), matrix.getColumns(), "Matrices columns are not equal." ); - if( RowMajorOrder == RowMajorOrder_ ) + if( Organization == Organization_ ) { if( thisMatrixMultiplicator == 1.0 ) this->values += matrixMultiplicator * matrix.getValues(); @@ -536,10 +536,10 @@ __global__ void TridiagonalTranspositionCudaKernel( const Tridiagonal< Real2, De template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Real2, typename Index2 > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, const RealType& matrixMultiplicator ) { @@ -586,11 +586,11 @@ getTransposition( const TridiagonalMatrixView< Real2, Device, Index2 >& matrix, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > template< typename Vector1, typename Vector2 > __cuda_callable__ void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: performSORIteration( const Vector1& b, const IndexType row, Vector2& x, @@ -608,8 +608,8 @@ performSORIteration( const Vector1& b, template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& file ) const + ElementsOrganization Organization > +void TridiagonalMatrixView< Real, Device, Index, Organization >::save( File& file ) const { MatrixView< Real, Device, Index >::save( file ); } @@ -617,9 +617,9 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::save( File& fi template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > void -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: save( const String& fileName ) const { Object::save( fileName ); @@ -628,8 +628,8 @@ save( const String& fileName ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > -void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::ostream& str ) const + ElementsOrganization Organization > +void TridiagonalMatrixView< Real, Device, Index, Organization >::print( std::ostream& str ) const { for( IndexType row = 0; row < this->getRows(); row++ ) { @@ -648,10 +648,10 @@ void TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >::print( std::os template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getIndexer() const -> const IndexerType& { return this->indexer; @@ -660,10 +660,10 @@ getIndexer() const -> const IndexerType& template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ auto -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getIndexer() -> IndexerType& { return this->indexer; @@ -672,10 +672,10 @@ getIndexer() -> IndexerType& template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ Index -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getElementIndex( const IndexType row, const IndexType column ) const { IndexType localIdx = column - row; @@ -691,10 +691,10 @@ getElementIndex( const IndexType row, const IndexType column ) const template< typename Real, typename Device, typename Index, - bool RowMajorOrder > + ElementsOrganization Organization > __cuda_callable__ Index -TridiagonalMatrixView< Real, Device, Index, RowMajorOrder >:: +TridiagonalMatrixView< Real, Device, Index, Organization >:: getPaddingIndex() const { return -1; diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h index 3c0a65cfd..e532c9ff1 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_Ellpack.h @@ -29,13 +29,13 @@ protected: //// // Row-major format is used for the host system template< typename Device, typename Index, typename IndexAlocator > -using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >; +using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, TNL::Containers::Segments::RowMajorOrder, 32 >; //// // Column-major format is used for GPUs template< typename Device, typename Index, typename IndexAllocator > -using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >; +using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, TNL::Containers::Segments::ColumnMajorOrder, 32 >; // types for which MatrixTest is instantiated using EllpackMatrixTypes = ::testing::Types diff --git a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h index 98c5f65ae..e332bc010 100644 --- a/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/BinarySparseMatrixTest_SlicedEllpack.h @@ -29,13 +29,13 @@ protected: //// // Row-major format is used for the host system template< typename Device, typename Index, typename IndexAllocator > -using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >; +using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, TNL::Containers::Segments::RowMajorOrder, 32 >; //// // Column-major format is used for GPUs template< typename Device, typename Index, typename IndexAllocator > -using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >; +using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, TNL::Containers::Segments::ColumnMajorOrder, 32 >; // types for which MatrixTest is instantiated using SlicedEllpackMatrixTypes = ::testing::Types diff --git a/src/UnitTests/Matrices/DenseMatrixCopyTest.h b/src/UnitTests/Matrices/DenseMatrixCopyTest.h index d041de1db..d311d774d 100644 --- a/src/UnitTests/Matrices/DenseMatrixCopyTest.h +++ b/src/UnitTests/Matrices/DenseMatrixCopyTest.h @@ -33,10 +33,10 @@ using E_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL: using E_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, EllpackSegments >; using SE_host = TNL::Matrices::SparseMatrix< int, TNL::Devices::Host, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; using SE_cuda = TNL::Matrices::SparseMatrix< int, TNL::Devices::Cuda, int, TNL::Matrices::GeneralMatrix, SlicedEllpackSegments >; -using Dense_host = TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, false >; -using Dense_host_RowMajorOrder = TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, true >; -using Dense_cuda = TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, false >; -using Dense_cuda_RowMajorOrder = TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, true >; +using Dense_host = TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, TNL::Containers::Segments::ColumnMajorOrder >; +using Dense_host_RowMajorOrder = TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, TNL::Containers::Segments::RowMajorOrder >; +using Dense_cuda = TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, TNL::Containers::Segments::ColumnMajorOrder >; +using Dense_cuda_RowMajorOrder = TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, TNL::Containers::Segments::RowMajorOrder >; #ifdef HAVE_GTEST diff --git a/src/UnitTests/Matrices/DenseMatrixTest.h b/src/UnitTests/Matrices/DenseMatrixTest.h index 5441f8834..ceed58546 100644 --- a/src/UnitTests/Matrices/DenseMatrixTest.h +++ b/src/UnitTests/Matrices/DenseMatrixTest.h @@ -35,14 +35,15 @@ static const char* TEST_FILE_NAME = "test_DenseMatrixTest.tnl"; void test_GetSerializationType() { - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, false, [any_allocator] >" ) ); + using namespace TNL::Containers::Segments; + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::DenseMatrix< int, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::DenseMatrix< int, [any_device], int, false, [any_allocator] >" ) ); } template< typename Matrix > diff --git a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h index 49bcfa11c..8ee8c7ffb 100644 --- a/src/UnitTests/Matrices/MultidiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/MultidiagonalMatrixTest.h @@ -34,14 +34,15 @@ static const char* TEST_FILE_NAME = "test_MultidiagonalMatrixTest.tnl"; void test_GetSerializationType() { - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + using namespace TNL::Containers::Segments; + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, true, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< float, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Multidiagonal< int, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Multidiagonal< int, [any_device], int, false, [any_allocator], [any_allocator] >" ) ); } template< typename Matrix > @@ -1147,10 +1148,11 @@ void test_AssignmentOperator() using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; using DiagonalsShiftsType = typename Matrix::DiagonalsShiftsType; - constexpr bool rowMajorOrder = Matrix::getRowMajorOrder(); + constexpr TNL::Containers::Segments::ElementsOrganization organization = Matrix::getOrganization(); - using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >; - using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >; + using MultidiagonalHost = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Host, IndexType, organization >; + using MultidiagonalCuda = TNL::Matrices::Multidiagonal< RealType, TNL::Devices::Cuda, IndexType, + organization == TNL::Containers::Segments::RowMajorOrder ? TNL::Containers::Segments::ColumnMajorOrder : TNL::Containers::Segments::RowMajorOrder >; const IndexType rows( 10 ), columns( 10 ); DiagonalsShiftsType diagonalsShifts( { -4, -2, 0, 2, 3, 5 } ); diff --git a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h index 03cc3646b..7ea9f337d 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_BiEllpack.h @@ -20,12 +20,12 @@ const char* saveAndLoadFileName = "test_SparseMatrixTest_BiEllpack_segments"; //// // Row-major format is used for the host system template< typename Device, typename Index, typename IndexAllocator > -using RowMajorBiEllpack = TNL::Containers::Segments::BiEllpack< Device, Index, IndexAllocator, true >; +using RowMajorBiEllpack = TNL::Containers::Segments::BiEllpack< Device, Index, IndexAllocator, TNL::Containers::Segments::RowMajorOrder >; //// // Column-major format is used for GPUs template< typename Device, typename Index, typename IndexAllocator > -using ColumnMajorBiEllpack = TNL::Containers::Segments::BiEllpack< Device, Index, IndexAllocator, false >; +using ColumnMajorBiEllpack = TNL::Containers::Segments::BiEllpack< Device, Index, IndexAllocator, TNL::Containers::Segments::ColumnMajorOrder >; // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types diff --git a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h index e7c35dac6..7550edc19 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_ChunkedEllpack.h @@ -20,12 +20,12 @@ const char* saveAndLoadFileName = "test_SparseMatrixTest_ChunkedEllpack_segments //// // Row-major format is used for the host system template< typename Device, typename Index, typename IndexAllocator > -using RowMajorChunkedEllpack = TNL::Containers::Segments::ChunkedEllpack< Device, Index, IndexAllocator, true >; +using RowMajorChunkedEllpack = TNL::Containers::Segments::ChunkedEllpack< Device, Index, IndexAllocator, TNL::Containers::Segments::RowMajorOrder >; //// // Column-major format is used for GPUs template< typename Device, typename Index, typename IndexAllocator > -using ColumnMajorChunkedEllpack = TNL::Containers::Segments::ChunkedEllpack< Device, Index, IndexAllocator, false >; +using ColumnMajorChunkedEllpack = TNL::Containers::Segments::ChunkedEllpack< Device, Index, IndexAllocator, TNL::Containers::Segments::ColumnMajorOrder >; // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types diff --git a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h index aed4d1ac1..fa66838b8 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_Ellpack.h @@ -20,12 +20,12 @@ const char* saveAndLoadFileName = "test_SparseMatrixTest_Ellpack_segments"; //// // Row-major format is used for the host system template< typename Device, typename Index, typename IndexAlocator > -using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, true, 32 >; +using RowMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAlocator, TNL::Containers::Segments::RowMajorOrder, 32 >; //// // Column-major format is used for GPUs template< typename Device, typename Index, typename IndexAllocator > -using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, false, 32 >; +using ColumnMajorEllpack = TNL::Containers::Segments::Ellpack< Device, Index, IndexAllocator, TNL::Containers::Segments::ColumnMajorOrder, 32 >; // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types diff --git a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h index 96115a25c..bb6b14062 100644 --- a/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h +++ b/src/UnitTests/Matrices/SparseMatrixTest_SlicedEllpack.h @@ -21,12 +21,12 @@ const char* saveAndLoadFileName = "test_SparseMatrixTest_SlicedEllpack_segments" //// // Row-major format is used for the host system template< typename Device, typename Index, typename IndexAllocator > -using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, true, 32 >; +using RowMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, TNL::Containers::Segments::RowMajorOrder, 32 >; //// // Column-major format is used for GPUs template< typename Device, typename Index, typename IndexAllocator > -using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, false, 32 >; +using ColumnMajorSlicedEllpack = TNL::Containers::Segments::SlicedEllpack< Device, Index, IndexAllocator, TNL::Containers::Segments::ColumnMajorOrder, 32 >; // types for which MatrixTest is instantiated using MatrixTypes = ::testing::Types diff --git a/src/UnitTests/Matrices/TridiagonalMatrixTest.h b/src/UnitTests/Matrices/TridiagonalMatrixTest.h index d9dc06599..bb613a17d 100644 --- a/src/UnitTests/Matrices/TridiagonalMatrixTest.h +++ b/src/UnitTests/Matrices/TridiagonalMatrixTest.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -34,14 +35,15 @@ static const char* TEST_FILE_NAME = "test_TridiagonalMatrixTest.tnl"; void test_GetSerializationType() { - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, true >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); - EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, false >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); + using namespace TNL::Containers::Segments; + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, RowMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, true, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Host, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< float, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< float, [any_device], int, false, [any_allocator] >" ) ); + EXPECT_EQ( ( TNL::Matrices::Tridiagonal< int, TNL::Devices::Cuda, int, ColumnMajorOrder >::getSerializationType() ), TNL::String( "Matrices::Tridiagonal< int, [any_device], int, false, [any_allocator] >" ) ); } template< typename Matrix > @@ -1166,10 +1168,11 @@ void test_AssignmentOperator() using RealType = typename Matrix::RealType; using DeviceType = typename Matrix::DeviceType; using IndexType = typename Matrix::IndexType; - constexpr bool rowMajorOrder = Matrix::getRowMajorOrder(); + constexpr TNL::Containers::Segments::ElementsOrganization organization = Matrix::getOrganization(); - using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, rowMajorOrder >; - using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType, !rowMajorOrder >; + using TridiagonalHost = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Host, IndexType, organization >; + using TridiagonalCuda = TNL::Matrices::Tridiagonal< RealType, TNL::Devices::Cuda, IndexType, + organization == TNL::Containers::Segments::RowMajorOrder ? TNL::Containers::Segments::ColumnMajorOrder : TNL::Containers::Segments::RowMajorOrder >; const IndexType rows( 10 ), columns( 10 ); TridiagonalHost hostMatrix( rows, columns ); @@ -1485,7 +1488,9 @@ TYPED_TEST( MatrixTest, addMatrixTest_differentOrdering ) using DeviceType = typename MatrixType::DeviceType; using IndexType = typename MatrixType::IndexType; using RealAllocatorType = typename MatrixType::RealAllocatorType; - using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType, ! MatrixType::getRowMajorOrder(), RealAllocatorType >; + using MatrixType2 = TNL::Matrices::Tridiagonal< RealType, DeviceType, IndexType, + MatrixType::getOrganization() == TNL::Containers::Segments::RowMajorOrder ? TNL::Containers::Segments::ColumnMajorOrder : TNL::Containers::Segments::RowMajorOrder, + RealAllocatorType >; test_AddMatrix< MatrixType, MatrixType2 >(); } -- GitLab From ddeb3e1f68a8e1c2ba0e70cac9c496991199d4a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 10 May 2020 15:43:54 +0200 Subject: [PATCH 10/11] Fixes after rebase. --- src/TNL/Matrices/DenseMatrixView.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/TNL/Matrices/DenseMatrixView.hpp b/src/TNL/Matrices/DenseMatrixView.hpp index db6bac91e..2ba34e549 100644 --- a/src/TNL/Matrices/DenseMatrixView.hpp +++ b/src/TNL/Matrices/DenseMatrixView.hpp @@ -250,6 +250,9 @@ setElement( const IndexType row, template< typename Real, typename Device, typename Index, + ElementsOrganization Organization > +__cuda_callable__ void +DenseMatrixView< Real, Device, Index, Organization >:: addElement( const IndexType row, const IndexType column, const RealType& value, @@ -268,7 +271,7 @@ template< typename Real, typename Device, typename Index, ElementsOrganization Organization > -__cuda_callable__ void +__cuda_callable__ Real DenseMatrixView< Real, Device, Index, Organization >:: getElement( const IndexType row, const IndexType column ) const @@ -687,6 +690,3 @@ Index DenseMatrixView< Real, Device, Index, Organization >::getElementIndex( con } // namespace Matrices } // namespace TNL - ElementsOrganization Organization > -__cuda_callable__ Real -DenseMatrixView< Real, Device, Index, Organization >:: -- GitLab From 4f690e131fef15c68508bd5ed4a01fe3d25fa30c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= Date: Sun, 10 May 2020 16:14:24 +0200 Subject: [PATCH 11/11] Added DenseMatrixView constructor example. --- .../DenseMatrixViewExample_constructor.cpp | 39 +++++++++++++++++++ .../DenseMatrixViewExample_constructor.cu | 1 + 2 files changed, 40 insertions(+) create mode 100644 Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp create mode 120000 Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp b/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp new file mode 100644 index 000000000..e8c036fa4 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include +#include + +template< typename Device > +void createMatrixView() +{ + TNL::Containers::Vector< double, Device > values { + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12 }; + + TNL::Matrices::DenseMatrixView< double, Device, int, TNL::Containers::Segments::RowMajorOrder > matrix( 5, 5, values.getView() ); + + /*** + * We need a matrix view to pass the matrix to lambda function even on CUDA device. + */ + /*auto matrixView = matrix.getView(); + auto f = [=] __cuda_callable__ ( int rowIdx ) mutable { + auto row = matrixView.getRow( rowIdx ); + row.setElement( rowIdx, 10* ( rowIdx + 1 ) ); + }; + + TNL::Algorithms::ParallelFor< Device >::exec( 0, matrix.getRows(), f ); + std::cout << matrix << std::endl;*/ +} + +int main( int argc, char* argv[] ) +{ + std::cout << "Creating matrix view on host: " << std::endl; + createMatrixView< TNL::Devices::Host >(); + +#ifdef HAVE_CUDA + std::cout << "Creating matrix view on CUDA device: " << std::endl; + createMatrixView< TNL::Devices::Cuda >(); +#endif +} diff --git a/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu b/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu new file mode 120000 index 000000000..3964e8ec9 --- /dev/null +++ b/Documentation/Examples/Matrices/DenseMatrixViewExample_constructor.cu @@ -0,0 +1 @@ +DenseMatrixViewExample_constructor.cpp \ No newline at end of file -- GitLab