diff --git a/Documentation/Examples/CMakeLists.txt b/Documentation/Examples/CMakeLists.txt index 29ba5a5dfc33b4fb6bbc88edd7f467e136f602ae..e984d2f1f2334ec7cd5cb9c8bf1ec680cab91e7c 100644 --- a/Documentation/Examples/CMakeLists.txt +++ b/Documentation/Examples/CMakeLists.txt @@ -3,80 +3,42 @@ ADD_SUBDIRECTORY( Containers ) ADD_SUBDIRECTORY( Pointers ) ADD_SUBDIRECTORY( Matrices ) -ADD_EXECUTABLE( FileExample FileExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExample.out OUTPUT FileExample.out ) - -IF( BUILD_CUDA ) - CUDA_ADD_EXECUTABLE(FileExampleCuda FileExampleCuda.cu) - ADD_CUSTOM_COMMAND( COMMAND FileExampleCuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExampleCuda.out OUTPUT FileExampleCuda.out ) -ENDIF() - -ADD_EXECUTABLE( FileExampleSaveAndLoad FileExampleSaveAndLoad.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileExampleSaveAndLoad > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileExampleSaveAndLoad.out OUTPUT FileExampleSaveAndLoad.out ) - -ADD_EXECUTABLE( FileNameExample FileNameExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileNameExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileNameExample.out OUTPUT FileNameExample.out ) - -ADD_EXECUTABLE( FileNameExampleDistributedSystemNodeCoordinates FileNameExampleDistributedSystemNodeCoordinates.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileNameExampleDistributedSystemNodeCoordinates > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileNameExampleDistributedSystemNodeCoordinates.out OUTPUT FileNameExampleDistributedSystemNodeCoordinates.out ) - - -ADD_EXECUTABLE( FileNameExampleDistributedSystemNodeId FileNameExampleDistributedSystemNodeId.cpp ) -ADD_CUSTOM_COMMAND( COMMAND FileNameExampleDistributedSystemNodeId > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/FileNameExampleDistributedSystemNodeId.out OUTPUT FileNameExampleDistributedSystemNodeId.out ) - -ADD_EXECUTABLE( ObjectExample_getType ObjectExample_getType.cpp ) -ADD_CUSTOM_COMMAND( COMMAND ObjectExample_getType > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ObjectExample_getType.out OUTPUT ObjectExample_getType.out ) - -ADD_EXECUTABLE( ParameterContainerExample ParameterContainerExample.cpp ) -ADD_EXECUTABLE( ConfigDescriptionExample ConfigDescriptionExample.cpp ) -ADD_EXECUTABLE( LoggerExample LoggerExample.cpp ) -ADD_EXECUTABLE( MathExample MathExample.cpp ) - -ADD_EXECUTABLE( ParseObjectTypeExample ParseObjectTypeExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND ParseObjectTypeExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/ParseObjectTypeExample.out OUTPUT ParseObjectTypeExample.out ) - -ADD_EXECUTABLE( StringExample StringExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExample.out OUTPUT StringExample.out ) - -ADD_EXECUTABLE( StringExampleGetAllocatedSize StringExampleGetAllocatedSize.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleGetAllocatedSize > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleGetAllocatedSize.out OUTPUT StringExampleGetAllocatedSize.out ) - -ADD_EXECUTABLE( StringExampleReplace StringExampleReplace.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleReplace > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleReplace.out OUTPUT StringExampleReplace.out ) - -ADD_EXECUTABLE( StringExampleSetSize StringExampleSetSize.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleSetSize > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleSetSize.out OUTPUT StringExampleSetSize.out ) - -ADD_EXECUTABLE( StringExampleSplit StringExampleSplit.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleSplit > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleSplit.out OUTPUT StringExampleSplit.out ) - -ADD_EXECUTABLE( StringExampleStrip StringExampleStrip.cpp ) -ADD_CUSTOM_COMMAND( COMMAND StringExampleStrip > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/StringExampleStrip.out OUTPUT StringExampleStrip.out ) - -ADD_EXECUTABLE( TimerExample TimerExample.cpp ) -ADD_CUSTOM_COMMAND( COMMAND TimerExample > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TimerExample.out OUTPUT TimerExample.out ) - -ADD_EXECUTABLE( TimerExampleLogger TimerExampleLogger.cpp ) -ADD_CUSTOM_COMMAND( COMMAND TimerExampleLogger > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/TimerExampleLogger.out OUTPUT TimerExampleLogger.out ) - - -ADD_CUSTOM_TARGET( RunExamples ALL DEPENDS - FileExample.out - FileExampleSaveAndLoad.out - FileNameExample.out - FileNameExampleDistributedSystemNodeCoordinates.out - FileNameExampleDistributedSystemNodeId.out - ObjectExample_getType.out - ParseObjectTypeExample.out - StringExample.out - StringExampleGetAllocatedSize.out - StringExampleReplace.out - StringExampleSplit.out - StringExampleStrip.out - TimerExample.out - TimerExampleLogger.out ) +set( COMMON_EXAMPLES + FileExampleCuda +) + +set( HOST_EXAMPLES + FileExample + FileExampleSaveAndLoad + FileNameExample + FileNameExampleDistributedSystemNodeCoordinates + FileNameExampleDistributedSystemNodeId + ObjectExample_getType + ParseObjectTypeExample + StringExample + StringExampleGetAllocatedSize + StringExampleReplace + StringExampleSplit + StringExampleStrip + TimerExample + TimerExampleLogger ) if( BUILD_CUDA ) - ADD_CUSTOM_TARGET( RunExamples-cuda ALL DEPENDS - FileExampleCuda.out ) -ENDIF() + foreach( target IN ITEMS ${COMMON_EXAMPLES} ) + cuda_add_executable( ${target}-cuda ${target}.cu OPTIONS ) + add_custom_command( COMMAND ${target}-cuda > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) + set( CUDA_OUTPUTS ${CUDA_OUTPUTS} ${target}.out ) + endforeach() +else() + foreach( target IN ITEMS ${COMMON_EXAMPLES} ${HOST_EXAMPLES}) + add_executable( ${target} ${target}.cpp ) + add_custom_command( COMMAND ${target} > ${TNL_DOCUMENTATION_OUTPUT_SNIPPETS_PATH}/${target}.out OUTPUT ${target}.out ) + set( HOST_OUTPUTS ${HOST_OUTPUTS} ${target}.out ) + endforeach() +endif() + +IF( BUILD_CUDA ) + ADD_CUSTOM_TARGET( RunExamples-cuda ALL DEPENDS ${CUDA_OUTPUTS} ) +ELSE() + ADD_CUSTOM_TARGET( RunExamples ALL DEPENDS ${HOST_OUTPUTS} ) +ENDIF() \ No newline at end of file diff --git a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_wrapEllpack.cpp b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_wrapEllpack.cpp index 9f36df57ec34130c6aaa2adf53afaa39c39a9911..67df09891612c6d10f0743a4796cbbac31f5af83 100644 --- a/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_wrapEllpack.cpp +++ b/Documentation/Examples/Matrices/SparseMatrix/SparseMatrixViewExample_wrapEllpack.cpp @@ -26,7 +26,7 @@ void wrapMatrixView() /*** * Wrap the arrays `values` and `columnIndexes` to sparse matrix view */ - auto matrix = TNL::Matrices::wrapEllpackMatrix< Device >( rows, columns, 2, values, columnIndexes ); + auto matrix = TNL::Matrices::wrapEllpackMatrix< Device, TNL::Algorithms::Segments::RowMajorOrder >( rows, columns, 2, values, columnIndexes ); std::cout << "Matrix reads as: " << std::endl << matrix << std::endl; } diff --git a/Documentation/Tutorials/Matrices/tutorial_Matrices.md b/Documentation/Tutorials/Matrices/tutorial_Matrices.md index 1014c9103645245c97ad898241d9be8820a90762..efcf7a4bfe9dcbabc90983e7c064877ad1c1ef46 100644 --- a/Documentation/Tutorials/Matrices/tutorial_Matrices.md +++ b/Documentation/Tutorials/Matrices/tutorial_Matrices.md @@ -677,7 +677,7 @@ Wrapping data corresponding with the Ellpack format is very similar as we can se \includelineno SparseMatrixViewExample_wrapEllpack.cpp -We encode the same sparse matrix as in the previous example. The essence of the Ellpack format is that we allocate the same number of matrix elements for each row which is two in our example. For some matrix rows we use the padding zeros for which we set the column index to -1 (line 21). Therefore the size of `valuesVector` and `columnIndexesVector` equals number of matrix rows times number of matrix elements allocated in each row. As before, we turn the vectors into C style pointers (lines 23-24) and wrap them into sparse matrix view with Ellpack format (line 29). Note that we must state the device on which the arrays are allocated explicitly. The result looks as follows: +We encode the same sparse matrix as in the previous example. The essence of the Ellpack format is that we allocate the same number of matrix elements for each row which is two in our example. For some matrix rows we use the padding zeros for which we set the column index to -1 (line 21). Therefore the size of `valuesVector` and `columnIndexesVector` equals number of matrix rows times number of matrix elements allocated in each row. As before, we turn the vectors into C style pointers (lines 23-24) and wrap them into sparse matrix view with Ellpack format (line 29). Note that we must state the device on which the arrays are allocated explicitly and also the matrix elements organization, which is \ref TNL::Algorithms::Segments::RowMajorOrder in this case. For Ellpack matrix stored on GPU, \ref TNL::Algorithms::Segments::ColumnMajorOrder is preferred. The result looks as follows: \include SparseMatrixViewExample_wrapEllpack.out diff --git a/src/TNL/Matrices/MatrixWrapping.h b/src/TNL/Matrices/MatrixWrapping.h index fa91de8315de528ddf16730a8138f0f89673b6b5..6c0c6bb8ca0b57bf21f493201202b3eec8bbdfab 100644 --- a/src/TNL/Matrices/MatrixWrapping.h +++ b/src/TNL/Matrices/MatrixWrapping.h @@ -94,9 +94,9 @@ wrapCSRMatrix( const Index& rows, const Index& columns, Index* rowPointers, Real /// This is to prevent from appearing in Doxygen documentation. /// \cond HIDDEN_CLASS template< typename Device, + ElementsOrganization Organization, typename Real, typename Index, - ElementsOrganization Organization = Algorithms::Segments::DefaultElementsOrganization< Device >::getOrganization(), int Alignment = 1 > struct EllpackMatrixWrapper { @@ -140,15 +140,15 @@ struct EllpackMatrixWrapper * \include SparseMatrixViewExample_wrapEllpack.out */ template< typename Device, + ElementsOrganization Organization, typename Real, typename Index, - ElementsOrganization Organization = Algorithms::Segments::DefaultElementsOrganization< Device >::getOrganization(), int Alignment = 1 > auto wrapEllpackMatrix( const Index rows, const Index columns, const Index nonzerosPerRow, Real* values, Index* columnIndexes ) --> decltype( EllpackMatrixWrapper< Device, Real, Index, Organization, Alignment >::wrap( rows, columns, nonzerosPerRow, values, columnIndexes ) ) +-> decltype( EllpackMatrixWrapper< Device, Organization, Real, Index, Alignment >::wrap( rows, columns, nonzerosPerRow, values, columnIndexes ) ) { - return EllpackMatrixWrapper< Device, Real, Index, Organization, Alignment >::wrap( rows, columns, nonzerosPerRow, values, columnIndexes ); + return EllpackMatrixWrapper< Device, Organization, Real, Index, Alignment >::wrap( rows, columns, nonzerosPerRow, values, columnIndexes ); } } //namespace Matrices diff --git a/src/TNL/Matrices/SparseMatrix.hpp b/src/TNL/Matrices/SparseMatrix.hpp index b58620c2d0d3c603e92760bc5e094280ede8bef3..ac8688425729f02f7cb987846cfe127b81a76299 100644 --- a/src/TNL/Matrices/SparseMatrix.hpp +++ b/src/TNL/Matrices/SparseMatrix.hpp @@ -165,7 +165,7 @@ getConstView() const -> ConstViewType this->getColumns(), this->getValues().getConstView(), this->columnIndexes.getConstView(), - this->segments.getConstView() ); + const_cast< SparseMatrix* >( this )->segments.getView() ); } template< typename Real, diff --git a/src/TNL/Matrices/SparseMatrixRowView.h b/src/TNL/Matrices/SparseMatrixRowView.h index 4976a420e22fb4544a4fbb454e7a17574105098d..03a90af7a11bdd2f6f17317517df916ed8c5d5ca 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.h +++ b/src/TNL/Matrices/SparseMatrixRowView.h @@ -14,6 +14,7 @@ #include <TNL/Cuda/CudaCallable.h> #include <TNL/Matrices/MatrixRowViewIterator.h> +#include <TNL/Matrices/details/SparseMatrixRowViewValueGetter.h> namespace TNL { namespace Matrices { @@ -101,6 +102,8 @@ class SparseMatrixRowView */ using IteratorType = MatrixRowViewIterator< RowView >; + using ValueGetterType = details::SparseMatrixRowViewValueGetter< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >; + /** * \brief Tells whether the parent matrix is a binary matrix. * @return `true` if the matrix is binary. @@ -163,7 +166,7 @@ class SparseMatrixRowView * \return constant reference to the matrix element value. */ __cuda_callable__ - const RealType& getValue( const IndexType localIdx ) const; + auto getValue( const IndexType localIdx ) const -> typename ValueGetterType::ConstResultType; /** * \brief Returns non-constants reference to value of an element with given rank in the row. @@ -173,7 +176,7 @@ class SparseMatrixRowView * \return non-constant reference to the matrix element value. */ __cuda_callable__ - RealType& getValue( const IndexType localIdx ); + auto getValue( const IndexType localIdx ) -> typename ValueGetterType::ResultType; /** * \brief Sets a value of matrix element with given rank in the matrix row. @@ -254,6 +257,9 @@ class SparseMatrixRowView __cuda_callable__ const IteratorType cend() const; + __cuda_callable__ + IndexType getPaddingIndex() const { return -1; }; + protected: SegmentViewType segmentView; diff --git a/src/TNL/Matrices/SparseMatrixRowView.hpp b/src/TNL/Matrices/SparseMatrixRowView.hpp index 82ae9b8706e4998fc88f2403afd109bb68f61f51..75cba117ab767cdb837b29df01894a8d027a0935 100644 --- a/src/TNL/Matrices/SparseMatrixRowView.hpp +++ b/src/TNL/Matrices/SparseMatrixRowView.hpp @@ -82,11 +82,14 @@ template< typename SegmentView, bool isBinary_ > __cuda_callable__ auto SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: -getValue( const IndexType localIdx ) const -> const RealType& +getValue( const IndexType localIdx ) const -> typename ValueGetterType::ConstResultType { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); - TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); - return values[ segmentView.getGlobalIndex( localIdx ) ]; + //TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); + return ValueGetterType::getValue( segmentView.getGlobalIndex( localIdx ), + values, + columnIndexes, + this->getPaddingIndex() ); } template< typename SegmentView, @@ -95,11 +98,14 @@ template< typename SegmentView, bool isBinary_ > __cuda_callable__ auto SparseMatrixRowView< SegmentView, ValuesView, ColumnsIndexesView, isBinary_ >:: -getValue( const IndexType localIdx ) -> RealType& +getValue( const IndexType localIdx ) -> typename ValueGetterType::ResultType { TNL_ASSERT_LT( localIdx, this->getSize(), "Local index exceeds matrix row capacity." ); - TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); - return values[ segmentView.getGlobalIndex( localIdx ) ]; + //TNL_ASSERT_FALSE( isBinary(), "Cannot call this method for binary matrix row." ); + return ValueGetterType::getValue( segmentView.getGlobalIndex( localIdx ), + values, + columnIndexes, + this->getPaddingIndex() ); } template< typename SegmentView, diff --git a/src/UnitTests/Matrices/MatrixWrappingTest.h b/src/UnitTests/Matrices/MatrixWrappingTest.h index af3fb00b3f4181a925b15cb84bbac568d02b1121..9da8421d5df8e6f0184c6fa763cdc9ab8e867792 100644 --- a/src/UnitTests/Matrices/MatrixWrappingTest.h +++ b/src/UnitTests/Matrices/MatrixWrappingTest.h @@ -77,7 +77,7 @@ TYPED_TEST( MatrixTest, WrapMatrix ) using DeviceType = typename DenseMatrix::DeviceType; using IndexType = typename DenseMatrix::IndexType; using CSRMatrix = TNL::Matrices::SparseMatrix< RealType, DeviceType, IndexType, TNL::Matrices::GeneralMatrix, TNL::Algorithms::Segments::CSRScalar >; - using EllpackMatrix = TNL::Matrices::SparseMatrix< RealType, DeviceType, IndexType, TNL::Matrices::GeneralMatrix, RowMajorEllpack >; + using EllpackMatrix = TNL::Matrices::SparseMatrix< RealType, DeviceType, IndexType, TNL::Matrices::GeneralMatrix, RowMajorEllpack >; DenseMatrix denseMatrix{ { 1, 2, 0, 0 }, @@ -100,7 +100,7 @@ TYPED_TEST( MatrixTest, WrapMatrix ) auto wrappedDenseMatrix = TNL::Matrices::wrapDenseMatrix< DeviceType >( rows, columns, denseMatrixValues ); auto wrappedCSRMatrix = TNL::Matrices::wrapCSRMatrix< DeviceType >( rows, columns, csrMatrixRowPointers, csrMatrixValues, csrMatrixColumnIndexes ); - auto wrappedEllpackMatrix = TNL::Matrices::wrapEllpackMatrix< DeviceType >( rows, columns, ( IndexType ) 2, ellpackMatrixValues, ellpackMatrixColumnIndexes ); + auto wrappedEllpackMatrix = TNL::Matrices::wrapEllpackMatrix< DeviceType, TNL::Algorithms::Segments::RowMajorOrder >( rows, columns, ( IndexType ) 2, ellpackMatrixValues, ellpackMatrixColumnIndexes ); EXPECT_EQ( denseMatrix, wrappedDenseMatrix ); EXPECT_EQ( csrMatrix, wrappedCSRMatrix );