From 362ecdd4fa3bc17a4ad339b4c1024953084178ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Oberhuber?= <oberhuber.tomas@gmail.com> Date: Tue, 4 May 2021 17:23:12 +0200 Subject: [PATCH] Fixing matrix elements organization for ellpack wrapping. --- Documentation/Examples/CMakeLists.txt | 112 ++++++------------ .../SparseMatrixViewExample_wrapEllpack.cpp | 2 +- .../Tutorials/Matrices/tutorial_Matrices.md | 2 +- src/TNL/Matrices/MatrixWrapping.h | 8 +- src/TNL/Matrices/SparseMatrix.hpp | 2 +- src/TNL/Matrices/SparseMatrixRowView.h | 10 +- src/TNL/Matrices/SparseMatrixRowView.hpp | 18 ++- src/UnitTests/Matrices/MatrixWrappingTest.h | 4 +- 8 files changed, 66 insertions(+), 92 deletions(-) diff --git a/Documentation/Examples/CMakeLists.txt b/Documentation/Examples/CMakeLists.txt index 29ba5a5dfc..e984d2f1f2 100644 --- a/Documentation/Examples/CMakeLists.txt +++ b/Documentation/Examples/CMakeLists.txt @@ -3,80 +3,42 @@ ADD_SUBDIRECTORY( Containers ) ADD_SUBDIRECTORY( Pointers ) ADD_SUBDIRECTORY( Matrices ) -ADD_EXECUTABLE( FileExample FileExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExample.out OUTPUT FileExample.out ) - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(FileExampleCuda FileExampleCuda.cu) - ADD_CUSTOM_COMMAND( COMMAND FileExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExampleCuda.out OUTPUT FileExampleCuda.out ) -ENDIF() - -ADD_EXECUTABLE( FileExampleSaveAndLoad FileExampleSaveAndLoad.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileExampleSaveAndLoad > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExampleSaveAndLoad.out OUTPUT FileExampleSaveAndLoad.out ) - -ADD_EXECUTABLE( FileNameExample FileNameExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileNameExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileNameExample.out OUTPUT FileNameExample.out ) - -ADD_EXECUTABLE( FileNameExampleDistributedSystemNodeCoordinates FileNameExampleDistributedSystemNodeCoordinates.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileNameExampleDistributedSystemNodeCoordinates > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileNameExampleDistributedSystemNodeCoordinates.out OUTPUT FileNameExampleDistributedSystemNodeCoordinates.out ) - - -ADD_EXECUTABLE( FileNameExampleDistributedSystemNodeId FileNameExampleDistributedSystemNodeId.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileNameExampleDistributedSystemNodeId > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileNameExampleDistributedSystemNodeId.out OUTPUT FileNameExampleDistributedSystemNodeId.out ) - -ADD_EXECUTABLE( ObjectExample_getType ObjectExample_getType.cpp ) -ADD_CUSTOM_COMMAND( COMMAND ObjectExample_getType > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ObjectExample_getType.out OUTPUT ObjectExample_getType.out ) - -ADD_EXECUTABLE( ParameterContainerExample ParameterContainerExample.cpp ) -ADD_EXECUTABLE( ConfigDescriptionExample ConfigDescriptionExample.cpp ) -ADD_EXECUTABLE( LoggerExample LoggerExample.cpp ) -ADD_EXECUTABLE( MathExample MathExample.cpp ) - -ADD_EXECUTABLE( ParseObjectTypeExample ParseObjectTypeExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND ParseObjectTypeExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParseObjectTypeExample.out OUTPUT ParseObjectTypeExample.out ) - -ADD_EXECUTABLE( StringExample StringExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExample.out OUTPUT StringExample.out ) - -ADD_EXECUTABLE( StringExampleGetAllocatedSize StringExampleGetAllocatedSize.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleGetAllocatedSize > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleGetAllocatedSize.out OUTPUT StringExampleGetAllocatedSize.out ) - -ADD_EXECUTABLE( StringExampleReplace StringExampleReplace.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleReplace > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleReplace.out OUTPUT StringExampleReplace.out ) - -ADD_EXECUTABLE( StringExampleSetSize StringExampleSetSize.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleSetSize > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleSetSize.out OUTPUT StringExampleSetSize.out ) - -ADD_EXECUTABLE( StringExampleSplit StringExampleSplit.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleSplit > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleSplit.out OUTPUT StringExampleSplit.out ) - -ADD_EXECUTABLE( StringExampleStrip StringExampleStrip.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleStrip > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleStrip.out OUTPUT StringExampleStrip.out ) - -ADD_EXECUTABLE( TimerExample TimerExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND TimerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TimerExample.out OUTPUT TimerExample.out ) - -ADD_EXECUTABLE( TimerExampleLogger TimerExampleLogger.cpp ) -ADD_CUSTOM_COMMAND( COMMAND TimerExampleLogger > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TimerExampleLogger.out OUTPUT TimerExampleLogger.out ) - - -ADD_CUSTOM_TARGET( RunExamples ALL DEPENDS - FileExample.out - FileExampleSaveAndLoad.out - FileNameExample.out - FileNameExampleDistributedSystemNodeCoordinates.out - FileNameExampleDistributedSystemNodeId.out - ObjectExample_getType.out - ParseObjectTypeExample.out - StringExample.out - StringExampleGetAllocatedSize.out - StringExampleReplace.out - StringExampleSplit.out - StringExampleStrip.out - TimerExample.out - TimerExampleLogger.out ) +set( COMMON_EXAMPLES + FileExampleCuda +) + +set( HOST_EXAMPLES + FileExample + FileExampleSaveAndLoad + FileNameExample + FileNameExampleDistributedSystemNodeCoordinates + FileNameExampleDistributedSystemNodeId + ObjectExample_getType + ParseObjectTypeExample + StringExample + StringExampleGetAllocatedSize + StringExampleReplace + StringExampleSplit + StringExampleStrip + TimerExample + TimerExampleLogger ) if( BUILD_CUDA ) - ADD_CUSTOM_TARGET( RunExamples-cuda ALL DEPENDS - FileExampleCuda.out ) -ENDIF() + foreach( target IN ITEMS ${COMMON_EXAMPLES} ) + cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) + add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) + set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) + endforeach() +else() + foreach( target IN ITEMS ${COMMON_EXAMPLES} ${HOST_EXAMPLES}) + add_executable( ${target} ${target}.cpp ) + add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) + set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) + endforeach() +endif() + +IF( BUILD_CUDA ) + ADD_CUSTOM_TARGET( RunExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} ) +ELSE() + ADD_CUSTOM_TARGET( RunExamples ALL DEPENDS ${HOST_OUTPUTS} ) +ENDIF() \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_wrapEllpack.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_wrapEllpack.cpp index 9f36df57ec..67df098916 100644 --- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_wrapEllpack.cpp +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_wrapEllpack.cpp @@ -26,7 +26,7 @@ void wrapMatrixView() /*** * Wrap the arrays `values` and `columnIndexes` to sparse matrix view */ - auto matrix = TNL::Matrices::wrapEllpackMatrix< Device >( rows, columns, 2, values, columnIndexes ); + auto matrix = TNL::Matrices::wrapEllpackMatrix< Device, TNL::Algorithms::Segments::RowMajorOrder >( rows, columns, 2, values, columnIndexes ); std::cout << "Matrix reads as: " << std::endl << matrix << std::endl; } diff --git a/Documentation/Tutorials/Matrices/tutorial_Matrices.md b/Documentation/Tutorials/Matrices/tutorial_Matrices.md index 1014c91036..efcf7a4bfe 100644 --- a/Documentation/Tutorials/Matrices/tutorial_Matrices.md +++ b/Documentation/Tutorials/Matrices/tutorial_Matrices.md @@ -677,7 +677,7 @@ Wrapping data corresponding with the Ellpack format is very similar as we can se \includelineno SparseMatrixViewExample_wrapEllpack.cpp -We encode the same sparse matrix as in the previous example. The essence of the Ellpack format is that we allocate the same number of matrix elements for each row which is two in our example. For some matrix rows we use the padding zeros for which we set the column index to -1 (line 21). Therefore the size of `valuesVector` and `columnIndexesVector` equals number of matrix rows times number of matrix elements allocated in each row. As before, we turn the vectors into C style pointers (lines 23-24) and wrap them into sparse matrix view with Ellpack format (line 29). Note that we must state the device on which the arrays are allocated explicitly. The result looks as follows: +We encode the same sparse matrix as in the previous example. The essence of the Ellpack format is that we allocate the same number of matrix elements for each row which is two in our example. For some matrix rows we use the padding zeros for which we set the column index to -1 (line 21). Therefore the size of `valuesVector` and `columnIndexesVector` equals number of matrix rows times number of matrix elements allocated in each row. As before, we turn the vectors into C style pointers (lines 23-24) and wrap them into sparse matrix view with Ellpack format (line 29). Note that we must state the device on which the arrays are allocated explicitly and also the matrix elements organization, which is \ref TNL::Algorithms::Segments::RowMajorOrder in this case. For Ellpack matrix stored on GPU, \ref TNL::Algorithms::Segments::ColumnMajorOrder is preferred. The result looks as follows: \include SparseMatrixViewExample_wrapEllpack.out diff --git a/src/TNL/Matrices/MatrixWrapping.h b/src/TNL/Matrices/MatrixWrapping.h index fa91de8315..6c0c6bb8ca 100644 --- a/src/TNL/Matrices/MatrixWrapping.h +++ b/src/TNL/Matrices/MatrixWrapping.h @@ -94,9 +94,9 @@ wrapCSRMatrix( const Index& rows, const Index& columns, Index* rowPointers, Real /// This is to prevent from appearing in Doxygen documentation. /// \cond HIDDEN_CLASS template< typename Device, + ElementsOrganization Organization, typename Real, typename Index, - ElementsOrganization Organization = Algorithms::Segments::DefaultElementsOrganization< Device >::getOrganization(), int Alignment = 1 > struct EllpackMatrixWrapper { @@ -140,15 +140,15 @@ struct EllpackMatrixWrapper * \include SparseMatrixViewExample_wrapEllpack.out */ template< typename Device, + ElementsOrganization Organization, typename Real, typename Index, - ElementsOrganization Organization = Algorithms::Segments::DefaultElementsOrganization< Device >::getOrganization(), int Alignment = 1 > auto wrapEllpackMatrix( const Index rows, const Index columns, const Index nonzerosPerRow, Real* values, Index* columnIndexes ) --> decltype( EllpackMatrixWrapper< Device, Real, Index, Organization, Alignment >::wrap( rows, columns, nonzerosPerRow, values, columnIndexes ) ) +-> decltype( EllpackMatrixWrapper< Device, Organization, Real, Index, Alignment >::wrap( rows, columns, nonzerosPerRow, values, columnIndexes ) ) { - return EllpackMatrixWrapper< Device, Real, Index, Organization, Alignment >::wrap( rows, columns, nonzerosPerRow, values, columnIndexes ); + return EllpackMatrixWrapper< Device, Organization, Real, Index, Alignment >::wrap( rows, columns, nonzerosPerRow, values, columnIndexes ); } } //namespace Matrices diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index b58620c2d0..ac86884257 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -165,7 +165,7 @@ getConstView() const -> ConstViewType this->getColumns(), this->getValues().getConstView(), this->columnIndexes.getConstView(), - this->segments.getConstView() ); + const_cast< SparseMatrix* >( this )->segments.getView() ); } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h index 4976a420e2..03a90af7a1 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.h +++ b/src/TNL/Matrices/SparseMatrixRowView.h @@ -14,6 +14,7 @@ #include <TNL/Cuda/CudaCallable.h> #include <TNL/Matrices/MatrixRowViewIterator.h> +#include <TNL/Matrices/details/SparseMatrixRowViewValueGetter.h> namespace TNL { namespace Matrices { @@ -101,6 +102,8 @@ class SparseMatrixRowView */ using IteratorType = MatrixRowViewIterator< RowView >; + using ValueGetterType = details::SparseMatrixRowViewValueGetter< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >; + /** * \brief Tells whether the parent matrix is a binary matrix. * @return `true` if the matrix is binary. @@ -163,7 +166,7 @@ class SparseMatrixRowView * \return constant reference to the matrix element value. */ __cuda_callable__ - const RealType& getValue( const IndexType localIdx ) const; + auto getValue( const IndexType localIdx ) const -> typename ValueGetterType::ConstResultType; /** * \brief Returns non-constants reference to value of an element with given rank in the row. @@ -173,7 +176,7 @@ class SparseMatrixRowView * \return non-constant reference to the matrix element value. */ __cuda_callable__ - RealType& getValue( const IndexType localIdx ); + auto getValue( const IndexType localIdx ) -> typename ValueGetterType::ResultType; /** * \brief Sets a value of matrix element with given rank in the matrix row. @@ -254,6 +257,9 @@ class SparseMatrixRowView __cuda_callable__ const IteratorType cend() const; + __cuda_callable__ + IndexType getPaddingIndex() const { return -1; }; + protected: SegmentViewType segmentView; diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp index 82ae9b8706..75cba117ab 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.hpp +++ b/src/TNL/Matrices/SparseMatrixRowView.hpp @@ -82,11 +82,14 @@ template< typename SegmentView, bool isBinary_ > __cuda_callable__ auto SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: -getValue( const IndexType localIdx ) const -> const RealType& +getValue( const IndexType localIdx ) const -> typename ValueGetterType::ConstResultType { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); - TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); - return values[ segmentView.getGlobalIndex( localIdx ) ]; + //TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); + return ValueGetterType::getValue( segmentView.getGlobalIndex( localIdx ), + values, + columnIndexes, + this->getPaddingIndex() ); } template< typename SegmentView, @@ -95,11 +98,14 @@ template< typename SegmentView, bool isBinary_ > __cuda_callable__ auto SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: -getValue( const IndexType localIdx ) -> RealType& +getValue( const IndexType localIdx ) -> typename ValueGetterType::ResultType { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); - TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); - return values[ segmentView.getGlobalIndex( localIdx ) ]; + //TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); + return ValueGetterType::getValue( segmentView.getGlobalIndex( localIdx ), + values, + columnIndexes, + this->getPaddingIndex() ); } template< typename SegmentView, diff --git a/src/UnitTests/Matrices/MatrixWrappingTest.h b/src/UnitTests/Matrices/MatrixWrappingTest.h index af3fb00b3f..9da8421d5d 100644 --- a/src/UnitTests/Matrices/MatrixWrappingTest.h +++ b/src/UnitTests/Matrices/MatrixWrappingTest.h @@ -77,7 +77,7 @@ TYPED_TEST( MatrixTest, WrapMatrix ) using DeviceType = typename DenseMatrix::DeviceType; using IndexType = typename DenseMatrix::IndexType; using CSRMatrix = TNL::Matrices::SparseMatrix< RealType, DeviceType, IndexType, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >; - using EllpackMatrix = TNL::Matrices::SparseMatrix< RealType, DeviceType, IndexType, TNL::Matrices::GeneralMatrix, RowMajorEllpack >; + using EllpackMatrix = TNL::Matrices::SparseMatrix< RealType, DeviceType, IndexType, TNL::Matrices::GeneralMatrix, RowMajorEllpack >; DenseMatrix denseMatrix{ { 1, 2, 0, 0 }, @@ -100,7 +100,7 @@ TYPED_TEST( MatrixTest, WrapMatrix ) auto wrappedDenseMatrix = TNL::Matrices::wrapDenseMatrix< DeviceType >( rows, columns, denseMatrixValues ); auto wrappedCSRMatrix = TNL::Matrices::wrapCSRMatrix< DeviceType >( rows, columns, csrMatrixRowPointers, csrMatrixValues, csrMatrixColumnIndexes ); - auto wrappedEllpackMatrix = TNL::Matrices::wrapEllpackMatrix< DeviceType >( rows, columns, ( IndexType ) 2, ellpackMatrixValues, ellpackMatrixColumnIndexes ); + auto wrappedEllpackMatrix = TNL::Matrices::wrapEllpackMatrix< DeviceType, TNL::Algorithms::Segments::RowMajorOrder >( rows, columns, ( IndexType ) 2, ellpackMatrixValues, ellpackMatrixColumnIndexes ); EXPECT_EQ( denseMatrix, wrappedDenseMatrix ); EXPECT_EQ( csrMatrix, wrappedCSRMatrix ); -- GitLab